From 6e4ec88db8467ef5b853d233bc3e551451729687 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Mon, 23 Oct 2023 12:47:15 +0400 Subject: [PATCH 001/275] [GHA][CONFORMANCE] Enable and conformance for Template in GHA (#20604) * [GHA][CONFORMANCE] Enable and conformance for Template in GHA * apply review * Update linux.yml --- .github/workflows/linux.yml | 12 ++++++++++++ .github/workflows/mac.yml | 12 ++++++++++++ .github/workflows/windows.yml | 8 ++++++++ 3 files changed, 32 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 733dfed4c09d14..639eca9957928d 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -760,6 +760,18 @@ jobs: ${INSTALL_TEST_DIR}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + source ${INSTALL_DIR}/setupvars.sh + ${INSTALL_TEST_DIR}/subgraphsDumperTests --gtest_print_time=1 \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + source ${INSTALL_DIR}/setupvars.sh + ${INSTALL_TEST_DIR}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter=*OpImpl*\ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml + - name: AUTO unit tests run: | source ${INSTALL_DIR}/setupvars.sh diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index d5084d7a5d19c6..b8e48226a1ca53 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -384,6 +384,18 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + {{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + {{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateOpImplTests.xml + - name: AUTO unit tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 6ce891e6767698..e6763d2a696377 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -564,6 +564,14 @@ jobs: run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateOpImplTests.xml + - name: GNA plugin unit tests shell: cmd run: | From 865b21ecd407189e3891149d14fdc831085ca3b3 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Mon, 23 Oct 2023 13:44:58 +0400 Subject: [PATCH 002/275] Introduce WA to improve performance of find_port() method (#20573) * Introduce WA to improve performance of find_port() method * Add mutex * Remove redindant lock * Reduce the number of get_tensor_ptr calls * Fixed typo * Removed WAs from Hetero plugin --- .../openvino/runtime/isync_infer_request.hpp | 15 +++--- src/inference/src/dev/isync_infer_request.cpp | 50 +++++++++++++------ src/plugins/hetero/src/sync_infer_request.cpp | 21 +++----- 3 files changed, 51 insertions(+), 35 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp index ed15438de2eb83..938fa8924fbb05 100644 --- a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp +++ b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp @@ -124,6 +124,12 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest { } }; + /** + * @brief Finds input or output port + * @return structure which contains index of Input/Output or report that port wasn't found + */ + FoundPort find_port(const ov::Output& port) const; + /** * @brief Converts batched tensors to tensor */ @@ -157,12 +163,9 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest { std::shared_ptr m_compiled_model; // Mutable to return reference to ov::Tensor mutable std::unordered_map, ov::SoPtr> m_tensors; - - /** - * @brief Finds input or output port - * @return structure which contains index of Input/Output or report that port wasn't found - */ - FoundPort find_port(const ov::Output& port) const; + // Cache ports + mutable std::unordered_map m_cached_ports; + mutable std::mutex m_cache_mutex; }; }; // namespace ov diff --git a/src/inference/src/dev/isync_infer_request.cpp b/src/inference/src/dev/isync_infer_request.cpp index 8e0f554fedd900..94d714d9f134a5 100644 --- a/src/inference/src/dev/isync_infer_request.cpp +++ b/src/inference/src/dev/isync_infer_request.cpp @@ -4,6 +4,7 @@ #include "openvino/runtime/isync_infer_request.hpp" +#include #include #include @@ -17,6 +18,7 @@ #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/plugin_itt.hpp" #include "openvino/runtime/tensor.hpp" +#include "openvino/util/common_util.hpp" namespace { void check_batched_tensors(const ov::Output& input, @@ -93,14 +95,18 @@ ov::IInferRequest::~IInferRequest() = default; ov::ISyncInferRequest::ISyncInferRequest(const std::shared_ptr& compiled_model) : m_compiled_model(compiled_model) { OPENVINO_ASSERT(m_compiled_model); - // Create map of empty tensors - for (const auto& input : get_inputs()) { - if (m_tensors.find(input.get_tensor_ptr()) == m_tensors.end()) - m_tensors[input.get_tensor_ptr()] = ov::SoPtr(); - } - for (const auto& output : get_outputs()) { - if (m_tensors.find(output.get_tensor_ptr()) == m_tensors.end()) - m_tensors[output.get_tensor_ptr()] = ov::SoPtr(); + // Create map of empty tensors and cache ports from the compiled model + auto port_type = ov::ISyncInferRequest::FoundPort::Type::INPUT; + for (const auto& ports : {get_inputs(), get_outputs()}) { + for (size_t i = 0; i < ports.size(); i++) { + const auto& port = ports[i]; + if (m_tensors.find(port.get_tensor_ptr()) == m_tensors.end()) + m_tensors[port.get_tensor_ptr()] = ov::SoPtr(); + size_t port_hash = ov::util::hash_combine(std::vector{std::hash()(port.get_node()), + std::hash()(port.get_index())}); + m_cached_ports[port_hash] = {i, port_type}; + } + port_type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT; } } @@ -118,18 +124,30 @@ ov::ISyncInferRequest::FoundPort ov::ISyncInferRequest::find_port(const ov::Outp // This function is hotspot, need optimization. auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) { return node1 == node2 || - (node1->get_friendly_name() == node2->get_friendly_name() && - node1->get_type_info() == node2->get_type_info() && - node1->outputs().size() == node2->outputs().size() && node1->inputs().size() == node2->inputs().size()); + (node1->outputs().size() == node2->outputs().size() && + node1->inputs().size() == node2->inputs().size() && node1->get_type_info() == node2->get_type_info() && + node1->get_friendly_name() == node2->get_friendly_name()); }; + // Find port without caching work slow because we need each time iterate over all ports and compare different + // strings So use WA with caching in order to make 2+ calls for the same ports faster. + // Calculate hash for the port + size_t port_hash = ov::util::hash_combine( + std::vector{std::hash()(port.get_node()), std::hash()(port.get_index())}); + { + std::lock_guard lock(m_cache_mutex); + if (m_cached_ports.find(port_hash) != m_cached_ports.end()) { + // Cached port for the hash was found + return m_cached_ports[port_hash]; + } + } ov::ISyncInferRequest::FoundPort::Type type = ov::ISyncInferRequest::FoundPort::Type::INPUT; for (const auto& ports : {get_inputs(), get_outputs()}) { for (size_t i = 0; i < ports.size(); i++) { - // TODO: Fix port comparison - // if (ports[i] == port) { if (ports[i].get_index() == port.get_index() && ports[i].get_names() == port.get_names() && check_nodes(ports[i].get_node(), port.get_node())) { - return {i, type}; + std::lock_guard lock(m_cache_mutex); + m_cached_ports[port_hash] = {i, type}; + return m_cached_ports[port_hash]; } } type = ov::ISyncInferRequest::FoundPort::Type::OUTPUT; @@ -275,10 +293,10 @@ void ov::ISyncInferRequest::allocate_tensor( void ov::ISyncInferRequest::check_tensors() const { const auto& inputs = m_compiled_model->inputs(); for (size_t i = 0; i < inputs.size(); i++) { - check_tensor(inputs[i], get_tensor_ptr(inputs[i])); + check_tensor(inputs[i], m_tensors.at(inputs[i].get_tensor_ptr())); } const auto& outputs = m_compiled_model->outputs(); for (size_t i = 0; i < outputs.size(); i++) { - check_tensor(outputs[i], get_tensor_ptr(outputs[i])); + check_tensor(outputs[i], m_tensors.at(outputs[i].get_tensor_ptr())); } } diff --git a/src/plugins/hetero/src/sync_infer_request.cpp b/src/plugins/hetero/src/sync_infer_request.cpp index 21fbd0b4f2e2a3..0bb4bc4b7e9a4f 100644 --- a/src/plugins/hetero/src/sync_infer_request.cpp +++ b/src/plugins/hetero/src/sync_infer_request.cpp @@ -49,20 +49,15 @@ ov::hetero::InferRequest::InferRequest(const std::shared_ptr ov::hetero::InferRequest::get_request(const ov::Output& port) const { - auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) { - return node1 == node2 || - (node1->get_friendly_name() == node2->get_friendly_name() && - node1->get_type_info() == node2->get_type_info() && - node1->outputs().size() == node2->outputs().size() && node1->inputs().size() == node2->inputs().size()); - }; - - for (const auto& kvp : m_port_to_subrequest_idx) { - if (kvp.first.get_index() == port.get_index() && kvp.first.get_names() == port.get_names() && - check_nodes(kvp.first.get_node(), port.get_node())) { - return m_subrequests[kvp.second]; - } + auto found_port = find_port(port); + ov::Output internal_port; + OPENVINO_ASSERT(found_port.found(), "Cannot find infer request for port ", port); + if (found_port.is_input()) { + internal_port = get_inputs().at(found_port.idx); + } else { + internal_port = get_outputs().at(found_port.idx); } - OPENVINO_THROW("Cannot find infer request for port ", port); + return m_subrequests[m_port_to_subrequest_idx.at(internal_port)]; } ov::SoPtr ov::hetero::InferRequest::get_tensor(const ov::Output& port) const { From 4fe6d5ec54fe941a70839ccd2507c50abc3d82bd Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Mon, 23 Oct 2023 15:04:05 +0400 Subject: [PATCH 003/275] Fix OpImplCheck in GHA (#20650) * Update mac.yml * Update mac.yml --- .github/workflows/mac.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index b8e48226a1ca53..1a55ecdeae87d4 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -387,13 +387,13 @@ jobs: - name: SubgraphsDumper tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - {{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 \ + ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml - name: Template OpImpl tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - {{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" \ + ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateOpImplTests.xml - name: AUTO unit tests From b67cff7cd5bcf44cc0a24e041f1097aa4636a4d3 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Mon, 23 Oct 2023 12:06:22 +0100 Subject: [PATCH 004/275] [CI] [GHA] Introduce macOS ARM64 as a matrix parameter in the macOS pipeline (#20363) * add m1 mac pipelines as a matrix parameter * Update mac.yml disable java_api because of macos arm64 - Java is not available on macOS arm64 runners * Update mac.yml added always condition for all tests * Update mac.yml * Update mac.yml * Update mac.yml * Update setup.py temp commit * Update tools/openvino_dev/setup.py * use matrix for var * add mxnet to extras only for x86_64 * skip failing tests * use xfail for Python tests; add missing filter for transformations tests * skip CPU func tests on x86_64 mac; skip some tests from CPU func tests on arm mac * Update mac.yml * skip tests on mac arm * skip tests on darwin; apply review * add more skips for python and c++ tests * skip tf tests * skip more tf tests; skip more Python UT stages * rm alwayses, rm triggers, add nightly trigger --------- Co-authored-by: Ilya Lavrenov --- .github/workflows/mac.yml | 163 +++++++++++++----- .../test_onnx/test_onnx_external_data.py | 4 + .../test_onnx/test_ops_nonlinear.py | 6 + .../test_onnx/test_ops_unary.py | 6 + src/frontends/onnx/tests/__init__.py | 1 + .../onnx/tests/tests_python/test_backend.py | 18 ++ .../test_mo_convert_pytorch.py | 3 + .../layer_tests/onnx_tests/test_reduce_lp.py | 4 + .../layer_tests/onnx_tests/test_roi_align.py | 2 + .../pytorch_tests/test_adaptive_max_pool.py | 8 + tests/layer_tests/pytorch_tests/test_all.py | 4 + .../pytorch_tests/test_argmax_argmin.py | 4 + .../layer_tests/pytorch_tests/test_cumsum.py | 4 + .../pytorch_tests/test_distance.py | 6 + tests/layer_tests/pytorch_tests/test_div.py | 4 + .../pytorch_tests/test_embedding_bag.py | 6 + .../pytorch_tests/test_fake_quantize.py | 6 + .../pytorch_tests/test_floor_divide.py | 4 + tests/layer_tests/pytorch_tests/test_full.py | 7 + .../pytorch_tests/test_grid_sampler.py | 4 + .../pytorch_tests/test_instance_norm.py | 4 + .../pytorch_tests/test_linspace.py | 6 + .../test_native_multi_head_attention.py | 4 + tests/layer_tests/pytorch_tests/test_nms.py | 4 + tests/layer_tests/pytorch_tests/test_norm.py | 4 + .../layer_tests/pytorch_tests/test_pooling.py | 20 +++ .../pytorch_tests/test_quantize.py | 6 + .../pytorch_tests/test_quantized_add.py | 4 + .../pytorch_tests/test_quantized_add_relu.py | 4 + .../pytorch_tests/test_quantized_cat.py | 8 + .../pytorch_tests/test_quantized_convnd.py | 4 + .../pytorch_tests/test_quantized_hardswish.py | 4 + .../pytorch_tests/test_quantized_linear.py | 6 + .../pytorch_tests/test_quantized_mul.py | 4 + .../pytorch_tests/test_var_mean.py | 6 + .../test_tfl_BroadcastTo.py | 4 + .../tensorflow_lite_tests/test_tfl_RFFT2D.py | 4 + .../test_tfl_SegmentSum.py | 4 + .../test_tf_AdjustContrastv2.py | 4 + .../tensorflow_tests/test_tf_BinaryOps.py | 4 + .../tensorflow_tests/test_tf_Bucketize.py | 4 + .../tensorflow_tests/test_tf_CropAndResize.py | 4 + .../tensorflow_tests/test_tf_DivNoNan.py | 4 + .../test_tf_FakeQuantWithMinMaxVars.py | 4 + .../tensorflow_tests/test_tf_If.py | 10 ++ .../tensorflow_tests/test_tf_LeakyRelu.py | 4 + .../tensorflow_tests/test_tf_LinSpace.py | 4 + .../tensorflow_tests/test_tf_LogSoftmax.py | 4 + .../test_tf_MaxPoolWithArgmax.py | 4 + .../tensorflow_tests/test_tf_NormalizeL2.py | 4 + .../tensorflow_tests/test_tf_Pooling.py | 6 + .../tensorflow_tests/test_tf_RandomUniform.py | 4 + .../tensorflow_tests/test_tf_Resize.py | 4 + .../tensorflow_tests/test_tf_ScatterND.py | 4 + .../tensorflow_tests/test_tf_SegmentSum.py | 4 + .../tensorflow_tests/test_tf_Softmax.py | 4 + .../tensorflow_tests/test_tf_SpaceToBatch.py | 4 + .../tensorflow_tests/test_tf_TopKV2.py | 4 + .../tensorflow_tests/test_tf_TruncateDiv.py | 4 + .../test_tf_UnsortedSegmentSum.py | 4 + .../tensorflow_tests/test_tf_Xlog1py.py | 4 + .../tensorflow_tests/test_tf_Xlogy.py | 4 + 62 files changed, 425 insertions(+), 43 deletions(-) diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 1a55ecdeae87d4..487536f615a8a6 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -1,6 +1,9 @@ -name: macOS (macOS 12, Python 3.11) +name: macOS (Python 3.11) on: workflow_dispatch: + schedule: + # at 00:00 on workdays + - cron: '0 0 * * 1,2,3,4,5' # pull_request: # paths-ignore: # - '**/docs/**' @@ -9,17 +12,17 @@ on: # - '**.md' # - '**/layer_tests_summary/**' # - '**/conformance/**' - push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - branches: - - master - - 'releases/**' +# push: +# paths-ignore: +# - '**/docs/**' +# - 'docs/**' +# - '**/**.md' +# - '**.md' +# - '**/layer_tests_summary/**' +# - '**/conformance/**' +# branches: +# - master +# - 'releases/**' concurrency: # github.ref is not unique in post-commit @@ -34,11 +37,22 @@ jobs: defaults: run: shell: bash - runs-on: macos-12-large + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13-large' + macos_deployment_target: '10.12' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + macos_deployment_target: '11.0' + runs-on: ${{ matrix.machine }} env: CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - MACOSX_DEPLOYMENT_TARGET: '10.12' + MACOSX_DEPLOYMENT_TARGET: ${{ matrix.macos_deployment_target }} CMAKE_CXX_COMPILER_LAUNCHER: ccache CMAKE_C_COMPILER_LAUNCHER: ccache OPENVINO_REPO: ${{ github.workspace }}/openvino @@ -100,9 +114,9 @@ jobs: # github.ref_name is 'ref/PR_#' in case of the PR, and 'branch_name' when executed on push save: ${{ github.ref_name == 'master' && 'true' || 'false' }} verbose: 2 - key: ${{ runner.os }}-main + key: ${{ runner.os }}-${{ matrix.arhitecture }}-main restore-keys: | - ${{ runner.os }}-main + ${{ runner.os }}-${{ matrix.arhitecture }}-main - name: CMake configure run: | @@ -144,6 +158,7 @@ jobs: run: | cmake \ -DBUILD_nvidia_plugin=OFF \ + -DBUILD_java_api=OFF \ -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" \ -DOPENVINO_EXTRA_MODULES=${{ env.OPENVINO_CONTRIB_REPO }}/modules \ -S ${{ env.OPENVINO_REPO }} \ @@ -158,7 +173,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' @@ -166,7 +181,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz if-no-files-found: 'error' @@ -175,7 +190,16 @@ jobs: defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests @@ -189,13 +213,13 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages @@ -248,7 +272,7 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} with: - name: test-results-samples + name: test-results-samples-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' @@ -258,7 +282,16 @@ jobs: defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests @@ -271,13 +304,13 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages @@ -314,7 +347,11 @@ jobs: - name: Low Precision Transformations Tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_lp_transformations_tests --gtest_print_time=1 \ + + # Skips under Ticket: 122660 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*smoke_LPT/FoldFakeQuantizeInTransformations.CompareFunctions*' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_lp_transformations_tests --gtest_print_time=1 "$skip_filter" \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-LpTransformations.xml - name: OpenVINO Conditional compilation tests @@ -337,8 +374,10 @@ jobs: --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-PaddleTests.xml - name: ONNX frontend tests + if: ${{ matrix.arhitecture == 'x86_64' }} # Ticket for ARM64: 122663 run: | source ${{ env.INSTALL_DIR }}/setupvars.sh + ${{ env.INSTALL_TEST_DIR }}/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ONNXFrontend.xml @@ -351,7 +390,11 @@ jobs: - name: TensorFlow frontend tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_frontend_tests --gtest_print_time=1 \ + + # Skips under Ticket: 122666 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*CompileModelsTests.ModelWithSplitConvConcat*:*NgramCompilation*' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_tensorflow_frontend_tests --gtest_print_time=1 "$skip_filter" \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TensorFlowFrontend.xml - name: TensorFlow Lite frontend tests @@ -363,7 +406,11 @@ jobs: - name: Transformations func tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 \ + + # Skips under Ticket: 122668 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*TransformationTestsF.CompressQuantizeWeights*:*TransformationTests/CompressQuantizeWeightsTests.FusionTest*' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_transformations_tests --gtest_print_time=1 "$skip_filter" \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-Transformations.xml - name: Common test utils tests @@ -456,7 +503,7 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ always() }} with: - name: test-results-cpp + name: test-results-cpp-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' @@ -466,7 +513,16 @@ jobs: defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + - arhitecture: 'x86_64' + machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: OPENVINO_REPO: ${{ github.workspace }}/openvino OPENVINO_CONTRIB_REPO: ${{ github.workspace }}/openvino_contrib @@ -491,13 +547,13 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages @@ -523,10 +579,16 @@ jobs: # Install the core OV wheel python3 -m pip install ${{ env.INSTALL_DIR }}/tools/openvino-*.whl + # mxnet is only available on x86_64 + extras_to_install="caffe,kaldi,onnx,tensorflow2,pytorch" + if [[ "${{ matrix.arhitecture }}" == "x86_64" ]]; then + extras_to_install="mxnet,$extras_to_install" + fi + # Find and install OV dev wheel pushd ${{ env.INSTALL_DIR }}/tools ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') - python3 -m pip install $ov_dev_wheel_name[mxnet,caffe,kaldi,onnx,tensorflow2,pytorch] + python3 -m pip install $ov_dev_wheel_name[$extras_to_install] popd - name: Python API 1.0 Tests @@ -609,6 +671,7 @@ jobs: TEST_DEVICE: CPU - name: TensorFlow 2 Layer Tests - TF FE + if: ${{ 'false' }} # Ticket: 123322 run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH @@ -646,6 +709,7 @@ jobs: TEST_PRECISION: FP16 - name: Python ONNX operators tests + if: ${{ 'false' }} # Ticket: 123325 run: | # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately python3 -m pytest -sv ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests -k 'not cuda' \ @@ -669,18 +733,27 @@ jobs: uses: actions/upload-artifact@v3 if: ${{ always() }} with: - name: test-results-python + name: test-results-python-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' CPU_Functional_Tests: name: CPU functional tests - if: ${{ 'false' }} # Ticket: 122001 needs: Build defaults: run: shell: bash - runs-on: macos-12 + strategy: + max-parallel: 2 + fail-fast: false + matrix: + include: + # ticket: 122001 + # - arhitecture: 'x86_64' + # machine: 'macos-13' + - arhitecture: 'arm64' + machine: 'macos-13-xlarge' + runs-on: ${{ matrix.machine }} env: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests @@ -692,33 +765,37 @@ jobs: - name: Download OpenVINO package uses: actions/download-artifact@v3 with: - name: openvino_package + name: openvino_package_${{ matrix.arhitecture }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package uses: actions/download-artifact@v3 with: - name: openvino_tests + name: openvino_tests_${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }} - name: Extract OpenVINO packages run: | pushd ${{ env.INSTALL_DIR }} - tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz || exit 1 + tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz popd pushd ${{ env.INSTALL_TEST_DIR }} - tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz || exit 1 + tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz popd - - name: Intel CPU plugin func tests + - name: CPU plugin func tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh - ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=*smoke* --gtest_output=xml:"${{ env.INSTALL_TEST_DIR }}/TEST-CPUFuncTests.xml" + + # Skips under Ticket: 122769 + skip_filter=${{ matrix.arhitecture == 'arm64' && '--gtest_filter=-*smoke_nonzero/NonZeroLayerTest.Inference/IS*:*smoke_NormalizeL2_*:*Extension.XmlModelWithExtensionFromDSO*:*Extension.OnnxModelWithExtensionFromDSO*:*ONNXQuantizedModels/QuantizedModelsTests.MaxPool*:*ONNXQuantizedModels/QuantizedModelsTests.Convolution*:**' || '' }} + + ${{ env.INSTALL_TEST_DIR }}/ov_cpu_func_tests --gtest_print_time=1 --gtest_filter=*smoke* "$skip_filter" --gtest_output=xml:"${{ env.INSTALL_TEST_DIR }}/TEST-CPUFuncTests.xml" - name: Upload Test Results uses: actions/upload-artifact@v3 if: ${{ always() }} with: - name: test-results-functional-cpu + name: test-results-functional-cpu-${{ matrix.arhitecture }} path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py b/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py index ec8f6c49e7ffb6..025c438fedf5d2 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_onnx_external_data.py @@ -1,15 +1,19 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform import os import numpy as np import ngraph as ng +import pytest from openvino.inference_engine import IECore from tests_compatibility.runtime import get_runtime +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_import_onnx_with_external_data(): model_path = os.path.join(os.path.dirname(__file__), "models/external_data.onnx") ie = IECore() diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py index 60ab593d097250..7b1ebc7295ce96 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_nonlinear.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import onnx import pytest @@ -45,6 +47,8 @@ def relu(x): assert_onnx_import_equals_callable("Relu", relu, [[-3, -2, -1], [1, 2, 3]]) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_leaky_relu(): def leaky_relu(x, alpha=0.01): return np.maximum(alpha * x, x) @@ -79,6 +83,8 @@ def parametic_relu(x, slope): assert np.allclose(output, expected_output) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_selu(): # f(x) = gamma * (alpha * exp(x) - alpha) for x <= 0, y = gamma * x for x > 0 def selu(x, alpha=1.67326319217681884765625, gamma=1.05070102214813232421875): diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py index ddbd8dd53e4a4a..ad7b8e8ffbaf85 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_ops_unary.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import onnx import onnx.mapping @@ -210,6 +212,8 @@ def hardmax_2d(data): assert np.allclose(ng_results, [expected]) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_hardsigmoid(): def hardsigmoid(data, alpha=0.2, beta=0.5): return np.clip(alpha * data + beta, 0, 1) @@ -447,6 +451,8 @@ def test_cast_errors(): @pytest.mark.parametrize("value_type", [pytest.param(np.float64), pytest.param(np.float32)]) +@pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122712') def test_constant(value_type): values = np.random.randn(5, 5).astype(value_type) node = onnx.helper.make_node( diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py index 857c3853cf8fd2..87220792d2d349 100644 --- a/src/frontends/onnx/tests/__init__.py +++ b/src/frontends/onnx/tests/__init__.py @@ -127,6 +127,7 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): "Not equal to tolerance") xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations") xfail_issue_58676 = xfail_test(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07") +skip_issue_58676 = pytest.mark.skip(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07") xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140") xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported") diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index d1ef686bdd4124..14034898b7c693 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -2,6 +2,7 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform import logging import onnx.backend.test @@ -24,6 +25,7 @@ xfail_issue_38735, skip_issue_39658, skip_issue_39658, + skip_issue_58676, xfail_issue_44858, xfail_issue_44965, xfail_issue_45180, @@ -683,6 +685,22 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None ), ] +if platform.system() == 'Darwin': + tests_expected_to_fail.extend([ + ( + skip_issue_58676, + "OnnxBackendNodeModelTest.test_mish_expanded_cpu" + ), + ( + skip_issue_58676, + "OnnxBackendNodeModelTest.test_resize_downsample_scales_linear_cpu" + ), + ( + skip_issue_58676, + "OnnxBackendNodeModelTest.test_div_uint8_cpu" + )] + ) + for test_group in tests_expected_to_fail: for test_case in test_group[1:]: expect_fail(f"{test_case}", test_group[0]) diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py index 6eab63bf682bd0..e437209cde9c9b 100644 --- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py +++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import unittest +import platform from typing import Tuple import numpy as np @@ -1236,6 +1237,8 @@ class TestPrecisionSensitive(): @pytest.mark.parametrize("create_model", test_data) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122714') def test_precision_sensitive(self, create_model, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): import numpy.testing as npt from pathlib import Path diff --git a/tests/layer_tests/onnx_tests/test_reduce_lp.py b/tests/layer_tests/onnx_tests/test_reduce_lp.py index e64929a680c20d..73cd86a2bbbc6f 100644 --- a/tests/layer_tests/onnx_tests/test_reduce_lp.py +++ b/tests/layer_tests/onnx_tests/test_reduce_lp.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest from common.layer_test_class import check_ir_version @@ -232,6 +234,8 @@ def create_reduce_lp_const(self, shape, axes, keep_dims, reduce_p, ir_version): @pytest.mark.parametrize("keep_dims", [True, False]) @pytest.mark.parametrize("reduce_p", [1, 2]) @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122846') def test_reduce_lp_precommit(self, params, keep_dims, reduce_p, ie_device, precision, ir_version, temp_dir, use_old_api): self._test(*self.create_reduce_lp(**params, keep_dims=keep_dims, reduce_p=reduce_p, diff --git a/tests/layer_tests/onnx_tests/test_roi_align.py b/tests/layer_tests/onnx_tests/test_roi_align.py index 13663808a3acd3..a29ddc4c1d1213 100644 --- a/tests/layer_tests/onnx_tests/test_roi_align.py +++ b/tests/layer_tests/onnx_tests/test_roi_align.py @@ -136,6 +136,8 @@ def create_net(self, input_shape, rois_shape, indices_shape, output_shape, @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Windows', reason="Ticket - 122731") + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122846') def test_roi_alignv10(self, params, ie_device, precision, ir_version, temp_dir, use_old_api): # TODO: ticket for investigating GPU failures: CVS-86300 if ie_device != "GPU": diff --git a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py index c01e58c2107eec..09be641a0fb96e 100644 --- a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py +++ b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -49,6 +51,8 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_adaptive_max_pool3d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): self.input_tensor = input_tensor self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) @@ -94,6 +98,8 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_adaptive_max_pool2d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): self.input_tensor = input_tensor self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) @@ -139,6 +145,8 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_adaptive_max_pool1d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): self.input_tensor = input_tensor self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_all.py b/tests/layer_tests/pytorch_tests/test_all.py index b5255f197cfef0..ca9b734c1ad1dd 100644 --- a/tests/layer_tests/pytorch_tests/test_all.py +++ b/tests/layer_tests/pytorch_tests/test_all.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -68,6 +70,8 @@ def test_all_noparams(self, input_tensor, ie_device, precision, ir_version): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_all(self, input_tensor, keepdim, ie_device, precision, ir_version): self.input_tensor = input_tensor for dim in range(len(input_tensor.shape)): diff --git a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py index 05abf128da400d..80ed6fcb872b5f 100644 --- a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py +++ b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -71,6 +73,8 @@ def forward(self, x): @pytest.mark.parametrize("dtype", ["float32", "int32", "int64"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_argmin_argmax(self, axes, keep_dims, op_type, dtype, ie_device, precision, ir_version): self._test(*self.create_model(op_type, axes, keep_dims), ie_device, precision, ir_version, trace_model=True, diff --git a/tests/layer_tests/pytorch_tests/test_cumsum.py b/tests/layer_tests/pytorch_tests/test_cumsum.py index 926cfe9e95c30a..771eb02768bdf0 100644 --- a/tests/layer_tests/pytorch_tests/test_cumsum.py +++ b/tests/layer_tests/pytorch_tests/test_cumsum.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -69,5 +71,7 @@ def forward_out_prim_dtype(self, x, y): @pytest.mark.parametrize("out,dtype_from_input", [(False, False), (True, False), (True, True)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_cumsum(self, axis, dtype, out, dtype_from_input, ie_device, precision, ir_version): self._test(*self.create_model(axis, dtype, out, dtype_from_input), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out, "out_dtype": dtype}) diff --git a/tests/layer_tests/pytorch_tests/test_distance.py b/tests/layer_tests/pytorch_tests/test_distance.py index 1c76a7243b47e3..f8cec6998ca7b6 100644 --- a/tests/layer_tests/pytorch_tests/test_distance.py +++ b/tests/layer_tests/pytorch_tests/test_distance.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -29,6 +31,8 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.parametrize("p", [2., 4., 6., 8.,]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_cdist(self, p, ie_device, precision, ir_version): self._test(*self.create_model(p), ie_device, precision, ir_version) @@ -61,5 +65,7 @@ def forward(self, x, y): @pytest.mark.parametrize("p", [2., 4., 6., 8.,]) @pytest.mark.parametrize("eps", [1e-06, 0.00001, 1e-07]) @pytest.mark.parametrize("keepdim", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_cdist(self, p, eps, keepdim, ie_device, precision, ir_version): self._test(*self.create_model(p, eps, keepdim), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_div.py b/tests/layer_tests/pytorch_tests/test_div.py index d6e696b62882d5..8b7dad351817d4 100644 --- a/tests/layer_tests/pytorch_tests/test_div.py +++ b/tests/layer_tests/pytorch_tests/test_div.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -116,6 +118,8 @@ def forward3(self, lhs, rhs): ])) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_div_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape, rounding_mode): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_embedding_bag.py b/tests/layer_tests/pytorch_tests/test_embedding_bag.py index 2595b2269316fd..d0c6d0c532856f 100644 --- a/tests/layer_tests/pytorch_tests/test_embedding_bag.py +++ b/tests/layer_tests/pytorch_tests/test_embedding_bag.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -42,6 +44,8 @@ def forward_offsets_per_sample_weights(self, indicies, weight, offsets, per_samp @pytest.mark.precommit @pytest.mark.parametrize("indicies_dtype", ["int", "int32"]) @pytest.mark.parametrize("per_sample_weights", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_embedding_bag(self, ie_device, precision, ir_version, indicies_dtype, per_sample_weights): self._test(*self.create_model(per_sample_weights), ie_device, precision, ir_version, kwargs_to_prepare_input={"indicies_dtype": indicies_dtype, "per_sample_weights": per_sample_weights}, @@ -85,6 +89,8 @@ def forward_per_sample_weights(self, indicies, weight, per_sample_wights): @pytest.mark.parametrize("indicies_size", [[1, 1], [2, 5], [3, 10], [4, 7]]) @pytest.mark.parametrize("indicies_dtype", ["int", "int32"]) @pytest.mark.parametrize("per_sample_weights", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_embedding_bag(self, ie_device, precision, ir_version, indicies_dtype, indicies_size, per_sample_weights): self._test(*self.create_model(per_sample_weights), ie_device, precision, ir_version, kwargs_to_prepare_input={"indicies_size": indicies_size, "indicies_dtype": indicies_dtype, "per_sample_weights": per_sample_weights}, diff --git a/tests/layer_tests/pytorch_tests/test_fake_quantize.py b/tests/layer_tests/pytorch_tests/test_fake_quantize.py index 6bb1d6601cb43b..3146ac87b90087 100644 --- a/tests/layer_tests/pytorch_tests/test_fake_quantize.py +++ b/tests/layer_tests/pytorch_tests/test_fake_quantize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -46,6 +48,8 @@ def forward(self, x): (1.0, 0, 0, 127), ], ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fake_quantize_per_tensor_affine( self, ie_device, precision, ir_version, scale, zero_point, quant_min, quant_max ): @@ -96,6 +100,8 @@ def forward(self, x): (torch.tensor([-0.005, -0.7, 0.1]), torch.tensor([1, 0, 1], dtype=torch.int32), 0, 0, 255), ], ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fake_quantize_per_channel_affine( self, ie_device, precision, ir_version, scale, zero_point, axis, quant_min, quant_max ): diff --git a/tests/layer_tests/pytorch_tests/test_floor_divide.py b/tests/layer_tests/pytorch_tests/test_floor_divide.py index cd427acb3dba56..44c1eadc3ce542 100644 --- a/tests/layer_tests/pytorch_tests/test_floor_divide.py +++ b/tests/layer_tests/pytorch_tests/test_floor_divide.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest @@ -52,6 +54,8 @@ def forward(self, input_tensor, other_tensor): ])) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_floor_divide(self, input_tensor, other_tensor, ie_device, precision, ir_version): self.input_tensor = input_tensor self.other_tensor = other_tensor diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index c564b1bb3731b9..52b5b2e3e58bd1 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -1,5 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +import platform + import numpy as np import pytest @@ -144,6 +147,8 @@ def forward(self, input_t: torch.Tensor, x:float): @pytest.mark.parametrize("mode", ["", "inplace", "out"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fill(self, shape, value, input_dtype, value_dtype, mode, ie_device, precision, ir_version): self._test(*self.create_model(mode), ie_device, precision, ir_version, kwargs_to_prepare_input={ @@ -183,6 +188,8 @@ def forward(self, x:torch.Tensor, y:float): @pytest.mark.parametrize("wrap", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_fill_diagonal(self, shape, value, input_dtype, value_dtype, wrap, ie_device, precision, ir_version): self._test(*self.create_model(shape, wrap), ie_device, precision, ir_version, kwargs_to_prepare_input={ diff --git a/tests/layer_tests/pytorch_tests/test_grid_sampler.py b/tests/layer_tests/pytorch_tests/test_grid_sampler.py index b142544c3b6e62..7b55862e2f0c2d 100644 --- a/tests/layer_tests/pytorch_tests/test_grid_sampler.py +++ b/tests/layer_tests/pytorch_tests/test_grid_sampler.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -35,6 +37,8 @@ def forward(self, input, grid): @pytest.mark.parametrize("align_corners", [True, False, None]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_grid_sampler(self, h_in, w_in, h_out, w_out, mode, padding_mode, align_corners, ie_device, precision, ir_version): self._test(*self.create_model(mode, padding_mode, align_corners), ie_device, precision, ir_version, kwargs_to_prepare_input={ "h_in": h_in, "w_in": w_in, "h_out": h_out, "w_out": w_out diff --git a/tests/layer_tests/pytorch_tests/test_instance_norm.py b/tests/layer_tests/pytorch_tests/test_instance_norm.py index 2fe3f5e13e066a..3ec2dd0144573d 100644 --- a/tests/layer_tests/pytorch_tests/test_instance_norm.py +++ b/tests/layer_tests/pytorch_tests/test_instance_norm.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -60,6 +62,8 @@ def forward(self, x): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_group_norm(self, params, ie_device, precision, ir_version, kwargs_to_prepare_input): self._test(*self.create_model(**params), ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, diff --git a/tests/layer_tests/pytorch_tests/test_linspace.py b/tests/layer_tests/pytorch_tests/test_linspace.py index aa6f70d3d71c89..4cf623e55fafad 100644 --- a/tests/layer_tests/pytorch_tests/test_linspace.py +++ b/tests/layer_tests/pytorch_tests/test_linspace.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -63,6 +65,8 @@ def forward(self, start, end, steps, d): @pytest.mark.parametrize( "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)] ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_linspace_with_prim_dtype(self, dtype, end, start, steps, ie_device, precision, ir_version): self._test( *self.create_model(dtype, ref_dtype=True), @@ -79,6 +83,8 @@ def test_linspace_with_prim_dtype(self, dtype, end, start, steps, ie_device, pre "start,end,steps", [(0, 1, 5), (-2, 1, 5), (1, -5, 7), (1, 10, 2), (-1, -5, 2), (-1, -5, 1), (1.25, -5.5, 5)] ) @pytest.mark.parametrize("use_out", [False, True]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_linspace_with_out(self, dtype, use_out, end, start, steps, ie_device, precision, ir_version): self._test( *self.create_model(dtype=dtype, use_out=use_out), diff --git a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py index 41e737dba6221d..26b7cdbd14812b 100644 --- a/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py +++ b/tests/layer_tests/pytorch_tests/test_native_multi_head_attention.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -74,6 +76,8 @@ def _prepare_input(self): ["need_weights", "average_attn_weights"], [[False, False], [True, False], [True, True]] ) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_native_multi_head_attention(self, ie_device, precision, ir_version, mask, need_weights, average_attn_weights): self._test(aten_native_multi_head_attention(mask, need_weights, average_attn_weights), None, "aten::_native_multi_head_attention", ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_nms.py b/tests/layer_tests/pytorch_tests/test_nms.py index ae09726a23b8f9..b703e98ccaffe9 100644 --- a/tests/layer_tests/pytorch_tests/test_nms.py +++ b/tests/layer_tests/pytorch_tests/test_nms.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest import numpy as np @@ -35,6 +37,8 @@ def forward(self, boxes, scores): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_nms(self, ie_device, precision, ir_version, boxes_num): self.boxes_num = boxes_num self._test(*self.create_model(), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_norm.py b/tests/layer_tests/pytorch_tests/test_norm.py index aef0a074059950..9422c170401702 100644 --- a/tests/layer_tests/pytorch_tests/test_norm.py +++ b/tests/layer_tests/pytorch_tests/test_norm.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -245,6 +247,8 @@ def forward_out(self, x, y): @pytest.mark.parametrize("dtype", ["float32", "float64", None]) @pytest.mark.parametrize("out", [True, False]) @pytest.mark.parametrize("prim_dtype", [True, False]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_linalg_matrix_norm(self, p, dim, keepdim, dtype, out, prim_dtype, ie_device, precision, ir_version): self._test(*self.create_model(p, dim, keepdim, dtype, out, prim_dtype), ie_device, precision, ir_version, diff --git a/tests/layer_tests/pytorch_tests/test_pooling.py b/tests/layer_tests/pytorch_tests/test_pooling.py index 3f4c94db6d45d0..f54902282ece1b 100644 --- a/tests/layer_tests/pytorch_tests/test_pooling.py +++ b/tests/layer_tests/pytorch_tests/test_pooling.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -133,6 +135,8 @@ def forward(self, x): @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): self._test(*self.create_model("avg_pool1d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, trace_model=True, @@ -151,6 +155,8 @@ def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): self._test(*self.create_model("avg_pool2d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), ie_device, precision, ir_version, trace_model=True, dynamic_shapes=False) @@ -160,6 +166,8 @@ def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): self._test(*self.create_model("avg_pool3d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, trace_model=True, @@ -170,6 +178,8 @@ def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): self._test(*self.create_model("max_pool1d", **params, ceil_mode=ceil_mode, dilation=dilation), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, dynamic_shapes=False) @@ -179,6 +189,8 @@ def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_ @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool2d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): to_trace = False if params["stride"] == []: @@ -191,6 +203,8 @@ def test_max_pool2d(self, params, ceil_mode, dilation, ie_device, precision, ir @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool3d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): self._test(*self.create_model("max_pool3d", **params, ceil_mode=ceil_mode, dilation=dilation), ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, dynamic_shapes=False) @@ -200,6 +214,8 @@ def test_max_pool3d(self, params, ceil_mode, dilation, ie_device, precision, ir_ @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): if ceil_mode and (np.array(params["padding"]).any() != 0): pytest.skip("ticket 122418") @@ -211,6 +227,8 @@ def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precis @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool2d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): if ceil_mode and (np.array(params["padding"]).any() != 0): pytest.skip("ticket 122418") @@ -225,6 +243,8 @@ def test_max_pool2d_indices(self, params, ceil_mode, dilation, ie_device, preci @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_max_pool3d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): if ceil_mode and (np.array(params["padding"]).any() != 0): pytest.skip("ticket 122418") diff --git a/tests/layer_tests/pytorch_tests/test_quantize.py b/tests/layer_tests/pytorch_tests/test_quantize.py index f1a7522159090e..600821fa16204c 100644 --- a/tests/layer_tests/pytorch_tests/test_quantize.py +++ b/tests/layer_tests/pytorch_tests/test_quantize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -49,6 +51,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantize_per_tensor_dequantize(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(aten_quantize_per_tensor_aten_dequantize(scale, zero_point, dtype), None, ["aten::quantize_per_tensor", "aten::dequantize"], @@ -88,6 +92,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantize_per_channel_dequantize(self, scale, zero_point, dtype, axis, ie_device, precision, ir_version): np.random.shuffle(scale), np.random.shuffle(zero_point) if dtype == torch.quint8: zero_point = abs(zero_point) diff --git a/tests/layer_tests/pytorch_tests/test_quantized_add.py b/tests/layer_tests/pytorch_tests/test_quantized_add.py index 960d3b4cca7aef..59a992fc088d5a 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_add.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_add.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -38,6 +40,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_add(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_add(scale, zero_point, dtype), None, ["quantized::add"], diff --git a/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py b/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py index 4a0dd797e3525c..6cb64dfab053d6 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_add_relu.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -38,6 +40,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_add_relu(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_add_relu(scale, zero_point, dtype), None, ["quantized::add_relu"], diff --git a/tests/layer_tests/pytorch_tests/test_quantized_cat.py b/tests/layer_tests/pytorch_tests/test_quantized_cat.py index db6e5278bb5c50..ce0bc880e78f66 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_cat.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_cat.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -73,6 +75,8 @@ def _prepare_input(self): @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version): self._test( aten_quantized_cat(scale, zero_point, dtype), @@ -91,6 +95,8 @@ def test_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_ @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_append_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version): self._test( aten_append_quantized_cat(scale, zero_point, dtype), @@ -130,6 +136,8 @@ def test_loop_append_quantized_cat(self, scale, zero_point, dtype, ie_device, pr @pytest.mark.parametrize("dtype", [torch.quint8, torch.qint8]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_add_quantized_cat(self, scale, zero_point, dtype, ie_device, precision, ir_version): self._test( aten_add_quantized_cat(scale, zero_point, dtype), diff --git a/tests/layer_tests/pytorch_tests/test_quantized_convnd.py b/tests/layer_tests/pytorch_tests/test_quantized_convnd.py index cf3ec0142cf46b..bc4ac9e1788b34 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_convnd.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_convnd.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import numpy as np import torch @@ -78,6 +80,8 @@ def forward(self, x): @pytest.mark.parametrize("zero_point", [0, 1]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_conv2d(self, params, bias, relu, scale, zero_point, ie_device, precision, ir_version): self._test( *self.create_model(**params, bias=bias, relu=relu, diff --git a/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py b/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py index a0b40783c4e98d..4508bbcb266ab6 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_hardswish.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -36,6 +38,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_hardswish(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_hardswish(scale, zero_point, dtype), None, ["quantized::hardswish"], diff --git a/tests/layer_tests/pytorch_tests/test_quantized_linear.py b/tests/layer_tests/pytorch_tests/test_quantized_linear.py index 1ded932f234055..bd89ea48303f25 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_linear.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_linear.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import torch import numpy as np @@ -73,6 +75,8 @@ def forward(self, inp): @pytest.mark.parametrize("trace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_linear(self, params, scale, zero_point, trace, ie_device, precision, ir_version): input_shape = params.get("input_shape") weight_shape = params.get("weight_shape") @@ -84,6 +88,8 @@ def test_quantized_linear(self, params, scale, zero_point, trace, ie_device, pre @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_hardtanh_linear(self, trace, inplace, ie_device, precision, ir_version): self._test(*self.create_hardtanh_model([10, 9], True, 1, 0.3, inplace), ie_device, precision, ir_version, kwargs_to_prepare_input={"input_shape": [2, 3, 9]}, trace_model=trace, freeze_model=False, quantized_ops=True, quant_size=0.3) diff --git a/tests/layer_tests/pytorch_tests/test_quantized_mul.py b/tests/layer_tests/pytorch_tests/test_quantized_mul.py index cc877daa919b5b..d170d70308b6a5 100644 --- a/tests/layer_tests/pytorch_tests/test_quantized_mul.py +++ b/tests/layer_tests/pytorch_tests/test_quantized_mul.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import torch @@ -38,6 +40,8 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_quantized_mul(self, scale, zero_point, dtype, ie_device, precision, ir_version): if dtype == torch.quint8: zero_point = abs(zero_point) self._test(quantized_mul(scale, zero_point, dtype), None, ["quantized::mul"], diff --git a/tests/layer_tests/pytorch_tests/test_var_mean.py b/tests/layer_tests/pytorch_tests/test_var_mean.py index 6ce85988e9edfb..bd8a5a10617eb4 100644 --- a/tests/layer_tests/pytorch_tests/test_var_mean.py +++ b/tests/layer_tests/pytorch_tests/test_var_mean.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -52,6 +54,8 @@ def forward(self, x): @pytest.mark.precommit @pytest.mark.parametrize("unbiased", [True, False]) @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version): self._test(*self.create_model(unbiased, op_type=op_type), ie_device, precision, ir_version) @@ -61,5 +65,7 @@ def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version): @pytest.mark.parametrize("dim", [None, 0, 1, 2, 3, -1, -2, (0, 1), (-1, -2), (0, 1, -1), (0, 1, 2, 3)]) @pytest.mark.parametrize("keepdim", [True, False]) @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"]) + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_var(self, unbiased, dim, keepdim, op_type, ie_device, precision, ir_version): self._test(*self.create_model(unbiased, dim, keepdim, two_args_case=False, op_type=op_type), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py index bae3f51ce97ff0..6f3eb1b70ed2f2 100644 --- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py +++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_BroadcastTo.py @@ -1,3 +1,5 @@ +import platform + import pytest import tensorflow as tf @@ -29,5 +31,7 @@ def make_model(self, params): @pytest.mark.parametrize("params", test_params) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 123324') def test_broadcast_to(self, params, ie_device, precision, temp_dir): self._test(ie_device, precision, temp_dir, params) diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py index b534878970ac59..1ae3464c207b34 100644 --- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py +++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_RFFT2D.py @@ -1,3 +1,5 @@ +import platform + import pytest import tensorflow as tf @@ -30,5 +32,7 @@ def make_model(self, params): @pytest.mark.parametrize("params", test_params) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 123324') def test_rfft2d(self, params, ie_device, precision, temp_dir): self._test(ie_device, precision, temp_dir, params) diff --git a/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py b/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py index a5ce2d314aee0b..c7339efaf7f55e 100644 --- a/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py +++ b/tests/layer_tests/tensorflow_lite_tests/test_tfl_SegmentSum.py @@ -1,3 +1,5 @@ +import platform + import pytest import tensorflow as tf @@ -40,5 +42,7 @@ def make_model(self, params): @pytest.mark.parametrize("params", test_params) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 123324') def test_segment_sum(self, params, ie_device, precision, temp_dir): self._test(ie_device, precision, temp_dir, params) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py index 88944c50a38091..896e1789111eaa 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_AdjustContrastv2.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -38,6 +40,8 @@ def create_adjust_contrast_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_adjust_contrast_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_adjust_contrast_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py index 62689f5609cc12..7e3964e68c9c35 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest @@ -126,6 +128,8 @@ def create_add_placeholder_const_net(self, x_shape, y_shape, ir_version, op_type 'Xdivy']) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_binary_op(self, params, ie_device, precision, ir_version, temp_dir, op_type, use_new_frontend, use_old_api): if precision == "FP16": diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py index d981b2997542b5..8ab60f9ac65beb 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -40,6 +42,8 @@ def create_bucketize_net(self, input_shape, input_type, boundaries_size): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_bucketize_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_bucketize_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py b/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py index 92ef18ff5aba98..30cefc07c942d2 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_CropAndResize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -53,6 +55,8 @@ def create_crop_and_resize_net(self, image_shape, num_boxes, crop_size_value, me @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_crop_and_resize_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_crop_and_resize_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py index 58db73ece154e1..5a6f3883185f23 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_DivNoNan.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -41,6 +43,8 @@ def create_div_no_nan_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_div_no_nan_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_div_no_nan_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py index 43d8da8e38019d..191b46e035a376 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantWithMinMaxVars.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -42,6 +44,8 @@ def create_fake_quant_with_min_max_vars_net(self, inputs_shape, min_value, max_v ]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_fake_quant_with_min_max_vars_basic(self, params, fake_quant_op, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_If.py b/tests/layer_tests/tensorflow_tests/test_tf_If.py index 0e4e7a6fb249e5..20085e6ac86672 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_If.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_If.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -67,6 +69,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': @@ -137,6 +141,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': @@ -215,6 +221,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': @@ -305,6 +313,8 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py b/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py index 1504ae706a9b19..ea672ac144d987 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LeakyRelu.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest @@ -31,6 +33,8 @@ def create_leaky_relu_net(self, x_shape, alpha_value): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_leaky_relu_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_leaky_relu_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py b/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py index c696eaaa0355e5..216fe7b7816de4 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LinSpace.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest @@ -28,6 +30,8 @@ def create_lin_space_net(self, num_value): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_lin_space_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_lin_space_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py index bef52905aa3159..063e310dd8174a 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -39,6 +41,8 @@ def create_log_softmax_net(self, logits_shape): @pytest.mark.precommit @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_log_softmax_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_log_softmax_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py index 4d1fed5747ba11..f08995f3c09d11 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_MaxPoolWithArgmax.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -59,6 +61,8 @@ def create_max_pool_with_argmax_net(self, input_shape, ksize, strides, input_typ ]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_max_pool_with_argmax_basic(self, params, input_type, padding, targmax, include_batch_in_index, with_second_output, ie_device, precision, ir_version, temp_dir, diff --git a/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py index 51a1b322af6541..5de76778d1d837 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.tf_layer_test_class import CommonTFLayerTest @@ -30,6 +32,8 @@ def create_normalize_l2_net(shape, axes): @pytest.mark.precommit @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_normalize_l2_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_normalize_l2_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py index eb3ac133b3687d..7c523740d79f96 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.layer_test_class import check_ir_version from common.tf_layer_test_class import CommonTFLayerTest @@ -145,6 +147,8 @@ def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, me @pytest.mark.parametrize("params", test_data_4D) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_pooling_net(**params, ir_version=ir_version, @@ -227,6 +231,8 @@ def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir, use_n @pytest.mark.parametrize("params", test_data_5D) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_pool_5D(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': diff --git a/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py b/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py index 0006afd9ab9eca..1f5f778db3ac2f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_RandomUniform.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest import tensorflow as tf from common.layer_test_class import check_ir_version @@ -88,6 +90,8 @@ def create_tf_random_uniform_net(self, global_seed, op_seed, x_shape, min_val, m @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_tf_fe + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_random_uniform_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Resize.py b/tests/layer_tests/tensorflow_tests/test_tf_Resize.py index 184a8115772128..c62492c7a76196 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Resize.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Resize.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -60,6 +62,8 @@ def create_resize_net(self, images_shape, images_type, size_value, align_corners @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_resize_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_resize_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py b/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py index dac986b96c281e..26ddcfdd53bcc2 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_ScatterND.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.tf_layer_test_class import CommonTFLayerTest @@ -69,6 +71,8 @@ def create_tf_scatternd_placeholder_const_net(self, x_shape, indices, updates, i @pytest.mark.parametrize("params", test_data) @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_tf_scatter_nd(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_tf_scatternd_placeholder_const_net(**params, ir_version=ir_version, diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py b/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py index 5d74c361f51c20..f0f99d4b9cf95f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -41,6 +43,8 @@ def create_segment_sum_net(self, data_shape, segment_ids_shape, data_type, segme @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_segment_sum_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if not use_new_frontend: diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py b/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py index fc9391feaae3e8..574fe3d32949f7 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Softmax.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -34,6 +36,8 @@ def create_softmax_net(self, input_shape): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_softmax_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_softmax_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py index b0f24322b01041..03e83dc39e9c8d 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import pytest from common.tf_layer_test_class import CommonTFLayerTest @@ -33,6 +35,8 @@ def create_space_to_batch_net(self, in_shape, pads_value, block_shape_value): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_space_to_batch_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_space_to_batch_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py index ece6f08471a643..73efaf490b23dd 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_TopKV2.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -46,6 +48,8 @@ def create_topk_v2_net(self, input_shape, input_type, k, sorted, is_first_output @pytest.mark.parametrize("params", test_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_topk_v2_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_topk_v2_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py index 508cde035b83ad..18440dbcd7f44a 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -42,6 +44,8 @@ def create_truncate_div_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_truncate_div_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_truncate_div_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py b/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py index 09afd6f26330ca..f7dcf2eeb324f2 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_UnsortedSegmentSum.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -55,6 +57,8 @@ def create_unsorted_segment_sum_net(self, data_shape, segment_ids_shape, num_seg ]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_unsorted_segment_sum_basic(self, params, data_type, segment_ids_type, num_segments_type, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py index 7c80fbdad88b09..4da47e7b5356c4 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlog1py.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -42,6 +44,8 @@ def create_xlog1py_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_xlog1py_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_xlog1py_net(**params), diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py index 6ecddeb439aed3..911c3b0eea2154 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Xlogy.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -42,6 +44,8 @@ def create_xlogy_net(self, input_shape, input_type): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit_tf_fe @pytest.mark.nightly + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_xlogy_basic(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): self._test(*self.create_xlogy_net(**params), From 5e017dc5d2de0512386b2b0bb2ee4536ce7a0345 Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Mon, 23 Oct 2023 16:48:51 +0530 Subject: [PATCH 005/275] fix compilation issue for openmp on windows (#20312) * fix compilation issue for openmp on windows * update based on suggestions --- src/inference/src/dev/threading/cpu_streams_executor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp index dba0082d647080..e61893e132dfeb 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp @@ -397,7 +397,7 @@ struct CPUStreamsExecutor::Impl { auto numaNodes = get_available_numa_nodes(); if (_config._streams != 0) { std::copy_n(std::begin(numaNodes), - std::min(static_cast(_config._streams), numaNodes.size()), + std::min(_config._streams, numaNodes.size()), std::back_inserter(_usedNumaNodes)); } else { _usedNumaNodes = numaNodes; From 99dfbb400a4b5bb794ebd4c52c016fcec8edbe2b Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Mon, 23 Oct 2023 15:24:35 +0400 Subject: [PATCH 006/275] [TF FE] Document full list of TF operations and their support by TF FE (#20640) * [TF FE] Document full list of TF operations and their support by TF FE Signed-off-by: Kazantsev, Roman * Update src/frontends/tensorflow/docs/supported_ops.md Co-authored-by: Karol Blaszczak --------- Signed-off-by: Kazantsev, Roman Co-authored-by: Karol Blaszczak --- src/frontends/tensorflow/README.md | 1 + .../tensorflow/docs/supported_ops.md | 1406 +++++++++++++++++ 2 files changed, 1407 insertions(+) create mode 100644 src/frontends/tensorflow/docs/supported_ops.md diff --git a/src/frontends/tensorflow/README.md b/src/frontends/tensorflow/README.md index 7fc421cd92c7f4..4a48203a2b41dc 100644 --- a/src/frontends/tensorflow/README.md +++ b/src/frontends/tensorflow/README.md @@ -205,6 +205,7 @@ py.test tensorflow_tests/test_tf_Unique.py --use_new_frontend ``` ## See also + * [Supported Operations](./docs/supported_ops.md) * [OpenVINO README](../../../README.md) * [OpenVINO Core Components](../../README.md) * [Developer documentation](../../../docs/dev/index.md) diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md new file mode 100644 index 00000000000000..5794e3f16653fd --- /dev/null +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -0,0 +1,1406 @@ +# TensorFlow Operations Supported by OpenVINO TensorFlow Frontend + +Here is a table of operations supported by the TensorFlow Frontend from [tf.raw_ops](https://www.tensorflow.org/api_docs/python/tf/raw_ops). +A "supported operation" is one that TensorFlow Frontend can convert to the OpenVINO representation. + +| Operation Name | Supported | Limitation | +|---------------------------------------------------------|-------------------------------|-------------------------------| +| Abort | NO | | +| Abs | YES | | +| AccumulateNV2 | NO | | +| AccumulatorApplyGradient | NO | | +| AccumulatorNumAccumulated | NO | | +| AccumulatorSetGlobalStep | NO | | +| AccumulatorTakeGradient | NO | | +| Acos | YES | | +| Acosh | YES | | +| Add | YES | | +| AddManySparseToTensorsMap | NO | | +| AddN | YES | | +| AddSparseToTensorsMap | NO | | +| AddV2 | YES | | +| AdjustContrast | NO | | +| AdjustContrastv2 | YES | | +| AdjustHue | NO | | +| AdjustSaturation | NO | | +| All | YES | | +| AllCandidateSampler | NO | | +| AllToAll | NO | | +| Angle | NO | | +| AnonymousHashTable | NO | | +| AnonymousIterator | NO | | +| AnonymousIteratorV2 | NO | | +| AnonymousIteratorV3 | NO | | +| AnonymousMemoryCache | NO | | +| AnonymousMultiDeviceIterator | NO | | +| AnonymousMultiDeviceIteratorV3 | NO | | +| AnonymousMutableDenseHashTable | NO | | +| AnonymousMutableHashTable | NO | | +| AnonymousMutableHashTableOfTensors | NO | | +| AnonymousRandomSeedGenerator | NO | | +| AnonymousSeedGenerator | NO | | +| Any | YES | | +| ApplyAdaMax | NO | | +| ApplyAdadelta | NO | | +| ApplyAdagrad | NO | | +| ApplyAdagradDA | NO | | +| ApplyAdagradV2 | NO | | +| ApplyAdam | NO | | +| ApplyAddSign | NO | | +| ApplyCenteredRMSProp | NO | | +| ApplyFtrl | NO | | +| ApplyFtrlV2 | NO | | +| ApplyGradientDescent | NO | | +| ApplyMomentum | NO | | +| ApplyPowerSign | NO | | +| ApplyProximalAdagrad | NO | | +| ApplyProximalGradientDescent | NO | | +| ApplyRMSProp | NO | | +| ApproxTopK | NO | | +| ApproximateEqual | NO | | +| ArgMax | YES | | +| ArgMin | YES | | +| AsString | NO | | +| Asin | YES | | +| Asinh | YES | | +| Assert | YES | | +| AssertCardinalityDataset | NO | | +| AssertNextDataset | NO | | +| AssertPrevDataset | NO | | +| Assign | YES | | +| AssignAdd | NO | | +| AssignAddVariableOp | NO | | +| AssignSub | NO | | +| AssignSubVariableOp | NO | | +| AssignVariableOp | YES | | +| AssignVariableXlaConcatND | NO | | +| Atan | YES | | +| Atan2 | NO | | +| Atanh | YES | | +| AudioSpectrogram | NO | | +| AudioSummary | NO | | +| AudioSummaryV2 | NO | | +| AutoShardDataset | NO | | +| AvgPool | YES | | +| AvgPool3D | YES | | +| AvgPool3DGrad | NO | | +| AvgPoolGrad | NO | | +| BandedTriangularSolve | NO | | +| Barrier | NO | | +| BarrierClose | NO | | +| BarrierIncompleteSize | NO | | +| BarrierInsertMany | NO | | +| BarrierReadySize | NO | | +| BarrierTakeMany | NO | | +| Batch | NO | | +| BatchCholesky | NO | | +| BatchCholeskyGrad | NO | | +| BatchDataset | NO | | +| BatchDatasetV2 | NO | | +| BatchFFT | NO | | +| BatchFFT2D | NO | | +| BatchFFT3D | NO | | +| BatchFunction | NO | | +| BatchIFFT | NO | | +| BatchIFFT2D | NO | | +| BatchIFFT3D | NO | | +| BatchMatMul | YES | | +| BatchMatMulV2 | YES | | +| BatchMatMulV3 | YES | | +| BatchMatrixBandPart | NO | | +| BatchMatrixDeterminant | NO | | +| BatchMatrixDiag | NO | | +| BatchMatrixDiagPart | NO | | +| BatchMatrixInverse | NO | | +| BatchMatrixSetDiag | NO | | +| BatchMatrixSolve | NO | | +| BatchMatrixSolveLs | NO | | +| BatchMatrixTriangularSolve | NO | | +| BatchNormWithGlobalNormalization | NO | | +| BatchNormWithGlobalNormalizationGrad | NO | | +| BatchSelfAdjointEig | NO | | +| BatchSelfAdjointEigV2 | NO | | +| BatchSvd | NO | | +| BatchToSpace | NO | | +| BatchToSpaceND | YES | | +| BesselI0 | NO | | +| BesselI0e | NO | | +| BesselI1 | NO | | +| BesselI1e | NO | | +| BesselJ0 | NO | | +| BesselJ1 | NO | | +| BesselK0 | NO | | +| BesselK0e | NO | | +| BesselK1 | NO | | +| BesselK1e | NO | | +| BesselY0 | NO | | +| BesselY1 | NO | | +| Betainc | NO | | +| BiasAdd | YES | | +| BiasAddGrad | NO | | +| BiasAddV1 | NO | | +| Bincount | NO | | +| Bitcast | NO | | +| BitwiseAnd | NO | | +| BitwiseOr | NO | | +| BitwiseXor | NO | | +| BlockLSTM | YES | | +| BlockLSTMGrad | NO | | +| BlockLSTMGradV2 | NO | | +| BlockLSTMV2 | NO | | +| BoostedTreesAggregateStats | NO | | +| BoostedTreesBucketize | NO | | +| BoostedTreesCalculateBestFeatureSplit | NO | | +| BoostedTreesCalculateBestFeatureSplitV2 | NO | | +| BoostedTreesCalculateBestGainsPerFeature | NO | | +| BoostedTreesCenterBias | NO | | +| BoostedTreesCreateEnsemble | NO | | +| BoostedTreesCreateQuantileStreamResource | NO | | +| BoostedTreesDeserializeEnsemble | NO | | +| BoostedTreesEnsembleResourceHandleOp | NO | | +| BoostedTreesExampleDebugOutputs | NO | | +| BoostedTreesFlushQuantileSummaries | NO | | +| BoostedTreesGetEnsembleStates | NO | | +| BoostedTreesMakeQuantileSummaries | NO | | +| BoostedTreesMakeStatsSummary | NO | | +| BoostedTreesPredict | NO | | +| BoostedTreesQuantileStreamResourceAddSummaries | NO | | +| BoostedTreesQuantileStreamResourceDeserialize | NO | | +| BoostedTreesQuantileStreamResourceFlush | NO | | +| BoostedTreesQuantileStreamResourceGetBucketBoundaries | NO | | +| BoostedTreesQuantileStreamResourceHandleOp | NO | | +| BoostedTreesSerializeEnsemble | NO | | +| BoostedTreesSparseAggregateStats | NO | | +| BoostedTreesSparseCalculateBestFeatureSplit | NO | | +| BoostedTreesTrainingPredict | NO | | +| BoostedTreesUpdateEnsemble | NO | | +| BoostedTreesUpdateEnsembleV2 | NO | | +| BroadcastArgs | YES | | +| BroadcastGradientArgs | NO | | +| BroadcastTo | YES | | +| Bucketize | YES | | +| BytesProducedStatsDataset | NO | | +| CSRSparseMatrixComponents | NO | | +| CSRSparseMatrixToDense | NO | | +| CSRSparseMatrixToSparseTensor | NO | | +| CSVDataset | NO | | +| CSVDatasetV2 | NO | | +| CTCBeamSearchDecoder | NO | | +| CTCGreedyDecoder | YES | | +| CTCLoss | YES | | +| CTCLossV2 | NO | | +| CacheDataset | NO | | +| CacheDatasetV2 | NO | | +| Case | NO | | +| Cast | YES | | +| Ceil | YES | | +| CheckNumerics | YES | | +| CheckNumericsV2 | YES | | +| Cholesky | NO | | +| CholeskyGrad | NO | | +| ChooseFastestBranchDataset | NO | | +| ChooseFastestDataset | NO | | +| ClipByValue | YES | | +| CloseSummaryWriter | NO | | +| CollectiveAllToAllV2 | NO | | +| CollectiveAllToAllV3 | NO | | +| CollectiveAssignGroupV2 | NO | | +| CollectiveBcastRecv | NO | | +| CollectiveBcastRecvV2 | NO | | +| CollectiveBcastSend | NO | | +| CollectiveBcastSendV2 | NO | | +| CollectiveGather | NO | | +| CollectiveGatherV2 | NO | | +| CollectiveInitializeCommunicator | NO | | +| CollectivePermute | NO | | +| CollectiveReduce | NO | | +| CollectiveReduceScatterV2 | NO | | +| CollectiveReduceV2 | NO | | +| CollectiveReduceV3 | NO | | +| CombinedNonMaxSuppression | NO | | +| Complex | NO | | +| ComplexAbs | NO | | +| CompositeTensorVariantFromComponents | NO | | +| CompositeTensorVariantToComponents | NO | | +| CompressElement | NO | | +| ComputeAccidentalHits | NO | | +| ComputeBatchSize | NO | | +| Concat | YES | | +| ConcatOffset | NO | | +| ConcatV2 | YES | | +| ConcatenateDataset | NO | | +| ConditionalAccumulator | NO | | +| ConfigureDistributedTPU | NO | | +| ConfigureTPUEmbedding | NO | | +| Conj | NO | | +| ConjugateTranspose | NO | | +| Const | YES | | +| ConsumeMutexLock | NO | | +| ControlTrigger | NO | | +| Conv | NO | | +| Conv2D | YES | | +| Conv2DBackpropFilter | NO | | +| Conv2DBackpropFilterV2 | NO | | +| Conv2DBackpropInput | YES | | +| Conv2DBackpropInputV2 | NO | | +| Conv3D | YES | | +| Conv3DBackpropFilter | NO | | +| Conv3DBackpropFilterV2 | NO | | +| Conv3DBackpropInput | NO | | +| Conv3DBackpropInputV2 | YES | | +| Copy | NO | | +| CopyHost | NO | | +| Cos | YES | | +| Cosh | YES | | +| CountUpTo | NO | | +| CreateSummaryDbWriter | NO | | +| CreateSummaryFileWriter | NO | | +| CropAndResize | YES | | +| CropAndResizeGradBoxes | NO | | +| CropAndResizeGradImage | NO | | +| Cross | NO | | +| CrossReplicaSum | NO | | +| CudnnRNN | NO | | +| CudnnRNNBackprop | NO | | +| CudnnRNNBackpropV2 | NO | | +| CudnnRNNBackpropV3 | NO | | +| CudnnRNNCanonicalToParams | NO | | +| CudnnRNNCanonicalToParamsV2 | NO | | +| CudnnRNNParamsSize | NO | | +| CudnnRNNParamsToCanonical | NO | | +| CudnnRNNParamsToCanonicalV2 | NO | | +| CudnnRNNV2 | NO | | +| CudnnRNNV3 | NO | | +| Cumprod | NO | | +| Cumsum | YES | | +| CumulativeLogsumexp | NO | | +| DataFormatDimMap | NO | | +| DataFormatVecPermute | NO | | +| DataServiceDataset | NO | | +| DataServiceDatasetV2 | NO | | +| DataServiceDatasetV3 | NO | | +| DataServiceDatasetV4 | NO | | +| DatasetCardinality | NO | | +| DatasetFromGraph | NO | | +| DatasetToGraph | NO | | +| DatasetToGraphV2 | NO | | +| DatasetToSingleElement | NO | | +| DatasetToTFRecord | NO | | +| Dawsn | NO | | +| DebugGradientIdentity | NO | | +| DebugGradientRefIdentity | NO | | +| DebugIdentity | NO | | +| DebugIdentityV2 | NO | | +| DebugIdentityV3 | NO | | +| DebugNanCount | NO | | +| DebugNumericSummary | NO | | +| DebugNumericSummaryV2 | NO | | +| DecodeAndCropJpeg | NO | | +| DecodeBase64 | NO | | +| DecodeBmp | NO | | +| DecodeCSV | NO | | +| DecodeCompressed | NO | | +| DecodeGif | NO | | +| DecodeImage | NO | | +| DecodeJSONExample | NO | | +| DecodeJpeg | NO | | +| DecodePaddedRaw | NO | | +| DecodePng | NO | | +| DecodeProtoV2 | NO | | +| DecodeRaw | NO | | +| DecodeWav | NO | | +| DeepCopy | NO | | +| DeleteIterator | NO | | +| DeleteMemoryCache | NO | | +| DeleteMultiDeviceIterator | NO | | +| DeleteRandomSeedGenerator | NO | | +| DeleteSeedGenerator | NO | | +| DeleteSessionTensor | NO | | +| DenseBincount | NO | | +| DenseCountSparseOutput | NO | | +| DenseToCSRSparseMatrix | NO | | +| DenseToDenseSetOperation | NO | | +| DenseToSparseBatchDataset | NO | | +| DenseToSparseSetOperation | NO | | +| DepthToSpace | YES | | +| DepthwiseConv2dNative | YES | | +| DepthwiseConv2dNativeBackpropFilter | NO | | +| DepthwiseConv2dNativeBackpropInput | NO | | +| Dequantize | NO | | +| DeserializeIterator | NO | | +| DeserializeManySparse | NO | | +| DeserializeSparse | NO | | +| DestroyResourceOp | NO | | +| DestroyTemporaryVariable | NO | | +| DeviceIndex | NO | | +| Diag | NO | | +| DiagPart | NO | | +| Digamma | NO | | +| Dilation2D | NO | | +| Dilation2DBackpropFilter | NO | | +| Dilation2DBackpropInput | NO | | +| DirectedInterleaveDataset | NO | | +| DisableCopyOnRead | NO | | +| DistributedSave | NO | | +| Div | NO | | +| DivNoNan | YES | | +| DrawBoundingBoxes | NO | | +| DrawBoundingBoxesV2 | NO | | +| DummyIterationCounter | NO | | +| DummyMemoryCache | NO | | +| DummySeedGenerator | NO | | +| DynamicEnqueueTPUEmbeddingArbitraryTensorBatch | NO | | +| DynamicPartition | YES | | +| DynamicStitch | YES | | +| EagerPyFunc | NO | | +| EditDistance | NO | | +| Eig | NO | | +| Einsum | YES | | +| Elu | YES | | +| EluGrad | NO | | +| Empty | NO | | +| EmptyTensorList | YES | | +| EncodeBase64 | NO | | +| EncodeJpeg | NO | | +| EncodeJpegVariableQuality | NO | | +| EncodePng | NO | | +| EncodeProto | NO | | +| EncodeWav | NO | | +| EnqueueTPUEmbeddingArbitraryTensorBatch | NO | | +| EnqueueTPUEmbeddingIntegerBatch | NO | | +| EnqueueTPUEmbeddingRaggedTensorBatch | NO | | +| EnqueueTPUEmbeddingSparseBatch | NO | | +| EnqueueTPUEmbeddingSparseTensorBatch | NO | | +| EnsureShape | YES | | +| Enter | YES | | +| Equal | YES | | +| Erf | YES | | +| Erfc | NO | | +| Erfinv | NO | | +| EuclideanNorm | YES | | +| Exit | YES | | +| Exp | YES | | +| ExpandDims | YES | | +| ExperimentalAssertNextDataset | NO | | +| ExperimentalAutoShardDataset | NO | | +| ExperimentalBytesProducedStatsDataset | NO | | +| ExperimentalCSVDataset | NO | | +| ExperimentalChooseFastestDataset | NO | | +| ExperimentalDatasetCardinality | NO | | +| ExperimentalDatasetToTFRecord | NO | | +| ExperimentalDenseToSparseBatchDataset | NO | | +| ExperimentalDirectedInterleaveDataset | NO | | +| ExperimentalGroupByReducerDataset | NO | | +| ExperimentalGroupByWindowDataset | NO | | +| ExperimentalIgnoreErrorsDataset | NO | | +| ExperimentalIteratorGetDevice | NO | | +| ExperimentalLMDBDataset | NO | | +| ExperimentalLatencyStatsDataset | NO | | +| ExperimentalMapAndBatchDataset | NO | | +| ExperimentalMapDataset | NO | | +| ExperimentalMatchingFilesDataset | NO | | +| ExperimentalMaxIntraOpParallelismDataset | NO | | +| ExperimentalNonSerializableDataset | NO | | +| ExperimentalParallelInterleaveDataset | NO | | +| ExperimentalParseExampleDataset | NO | | +| ExperimentalPrivateThreadPoolDataset | NO | | +| ExperimentalRandomDataset | NO | | +| ExperimentalRebatchDataset | NO | | +| ExperimentalScanDataset | NO | | +| ExperimentalSetStatsAggregatorDataset | NO | | +| ExperimentalSleepDataset | NO | | +| ExperimentalSlidingWindowDataset | NO | | +| ExperimentalSqlDataset | NO | | +| ExperimentalStatsAggregatorHandle | NO | | +| ExperimentalStatsAggregatorSummary | NO | | +| ExperimentalTakeWhileDataset | NO | | +| ExperimentalThreadPoolDataset | NO | | +| ExperimentalThreadPoolHandle | NO | | +| ExperimentalUnbatchDataset | NO | | +| ExperimentalUniqueDataset | NO | | +| Expint | NO | | +| Expm1 | NO | | +| ExtractGlimpse | NO | | +| ExtractGlimpseV2 | NO | | +| ExtractImagePatches | YES | | +| ExtractJpegShape | NO | | +| ExtractVolumePatches | NO | | +| FFT | NO | | +| FFT2D | NO | | +| FFT3D | NO | | +| FIFOQueue | YES | | +| FIFOQueueV2 | YES | | +| Fact | NO | | +| FakeParam | NO | | +| FakeQuantWithMinMaxArgs | YES | | +| FakeQuantWithMinMaxArgsGradient | NO | | +| FakeQuantWithMinMaxVars | YES | | +| FakeQuantWithMinMaxVarsGradient | NO | | +| FakeQuantWithMinMaxVarsPerChannel | YES | | +| FakeQuantWithMinMaxVarsPerChannelGradient | NO | | +| FakeQueue | NO | | +| Fill | YES | | +| FilterByLastComponentDataset | NO | | +| FilterDataset | NO | | +| FinalizeDataset | NO | | +| Fingerprint | NO | | +| FixedLengthRecordDataset | NO | | +| FixedLengthRecordDatasetV2 | NO | | +| FixedLengthRecordReader | NO | | +| FixedLengthRecordReaderV2 | NO | | +| FixedUnigramCandidateSampler | NO | | +| FlatMapDataset | NO | | +| Floor | YES | | +| FloorDiv | YES | | +| FloorMod | YES | | +| FlushSummaryWriter | NO | | +| For | NO | | +| FractionalAvgPool | NO | | +| FractionalAvgPoolGrad | NO | | +| FractionalMaxPool | NO | | +| FractionalMaxPoolGrad | NO | | +| FresnelCos | NO | | +| FresnelSin | NO | | +| FusedBatchNorm | YES | | +| FusedBatchNormGrad | NO | | +| FusedBatchNormGradV2 | NO | | +| FusedBatchNormGradV3 | NO | | +| FusedBatchNormV2 | YES | | +| FusedBatchNormV3 | YES | | +| FusedPadConv2D | NO | | +| FusedResizeAndPadConv2D | NO | | +| GRUBlockCell | YES | | +| GRUBlockCellGrad | NO | | +| Gather | YES | | +| GatherNd | YES | | +| GatherV2 | YES | | +| GenerateBoundingBoxProposals | NO | | +| GenerateVocabRemapping | NO | | +| GeneratorDataset | NO | | +| GetElementAtIndex | NO | | +| GetOptions | NO | | +| GetSessionHandle | NO | | +| GetSessionHandleV2 | NO | | +| GetSessionTensor | NO | | +| Greater | YES | | +| GreaterEqual | YES | | +| GroupByReducerDataset | NO | | +| GroupByWindowDataset | NO | | +| GuaranteeConst | NO | | +| HSVToRGB | NO | | +| HashTable | YES | | +| HashTableV2 | YES | | +| HistogramFixedWidth | NO | | +| HistogramSummary | NO | | +| IFFT | NO | | +| IFFT2D | NO | | +| IFFT3D | NO | | +| IRFFT | NO | | +| IRFFT2D | NO | | +| IRFFT3D | NO | | +| Identity | YES | | +| IdentityN | YES | | +| IdentityReader | NO | | +| IdentityReaderV2 | NO | | +| If | YES | | +| Igamma | NO | | +| IgammaGradA | NO | | +| Igammac | NO | | +| IgnoreErrorsDataset | NO | | +| Imag | NO | | +| ImageProjectiveTransformV2 | NO | | +| ImageProjectiveTransformV3 | NO | | +| ImageSummary | NO | | +| ImmutableConst | NO | | +| ImportEvent | NO | | +| InTopK | NO | | +| InTopKV2 | NO | | +| InfeedDequeue | NO | | +| InfeedDequeueTuple | NO | | +| InfeedEnqueue | NO | | +| InfeedEnqueuePrelinearizedBuffer | NO | | +| InfeedEnqueueTuple | NO | | +| InitializeTable | NO | | +| InitializeTableFromDataset | NO | | +| InitializeTableFromTextFile | NO | | +| InitializeTableFromTextFileV2 | NO | | +| InitializeTableV2 | NO | | +| InplaceAdd | NO | | +| InplaceSub | NO | | +| InplaceUpdate | NO | | +| InterleaveDataset | NO | | +| Inv | NO | | +| InvGrad | NO | | +| Invert | NO | | +| InvertPermutation | YES | | +| IsBoostedTreesEnsembleInitialized | NO | | +| IsBoostedTreesQuantileStreamResourceInitialized | NO | | +| IsFinite | YES | | +| IsInf | YES | | +| IsNan | YES | | +| IsTPUEmbeddingInitialized | NO | | +| IsVariableInitialized | YES | | +| IsotonicRegression | NO | | +| Iterator | YES | | +| IteratorFromStringHandle | NO | | +| IteratorFromStringHandleV2 | NO | | +| IteratorGetDevice | NO | | +| IteratorGetNext | YES | | +| IteratorGetNextAsOptional | NO | | +| IteratorGetNextSync | NO | | +| IteratorToStringHandle | NO | | +| IteratorV2 | YES | | +| L2Loss | YES | | +| LMDBDataset | NO | | +| LMDBReader | NO | | +| LRN | YES | | +| LRNGrad | NO | | +| LSTMBlockCell | NO | | +| LSTMBlockCellGrad | NO | | +| LatencyStatsDataset | NO | | +| LeakyRelu | YES | | +| LeakyReluGrad | NO | | +| LearnedUnigramCandidateSampler | NO | | +| LeftShift | NO | | +| LegacyParallelInterleaveDatasetV2 | NO | | +| Less | YES | | +| LessEqual | YES | | +| Lgamma | NO | | +| LinSpace | YES | | +| ListDataset | NO | | +| ListDiff | YES | | +| LoadAndRemapMatrix | NO | | +| LoadDataset | NO | | +| LoadTPUEmbeddingADAMParameters | NO | | +| LoadTPUEmbeddingAdadeltaParameters | NO | | +| LoadTPUEmbeddingAdagradMomentumParameters | NO | | +| LoadTPUEmbeddingAdagradParameters | NO | | +| LoadTPUEmbeddingCenteredRMSPropParameters | NO | | +| LoadTPUEmbeddingFTRLParameters | NO | | +| LoadTPUEmbeddingFrequencyEstimatorParameters | NO | | +| LoadTPUEmbeddingMDLAdagradLightParameters | NO | | +| LoadTPUEmbeddingMomentumParameters | NO | | +| LoadTPUEmbeddingProximalAdagradParameters | NO | | +| LoadTPUEmbeddingProximalYogiParameters | NO | | +| LoadTPUEmbeddingRMSPropParameters | NO | | +| LoadTPUEmbeddingStochasticGradientDescentParameters | NO | | +| Log | YES | | +| Log1p | YES | | +| LogMatrixDeterminant | NO | | +| LogSoftmax | YES | | +| LogUniformCandidateSampler | NO | | +| LogicalAnd | YES | | +| LogicalNot | YES | | +| LogicalOr | YES | | +| LookupTableExport | NO | | +| LookupTableExportV2 | NO | | +| LookupTableFind | NO | | +| LookupTableFindV2 | NO | | +| LookupTableImport | NO | | +| LookupTableImportV2 | NO | | +| LookupTableInsert | YES | | +| LookupTableInsertV2 | YES | | +| LookupTableRemoveV2 | NO | | +| LookupTableSize | NO | | +| LookupTableSizeV2 | NO | | +| LoopCond | YES | | +| LowerBound | NO | | +| Lu | NO | | +| MakeIterator | NO | | +| MapAndBatchDataset | NO | | +| MapClear | NO | | +| MapDataset | NO | | +| MapDefun | NO | | +| MapIncompleteSize | NO | | +| MapPeek | NO | | +| MapSize | NO | | +| MapStage | NO | | +| MapUnstage | NO | | +| MapUnstageNoKey | NO | | +| MatMul | YES | | +| MatchingFiles | NO | | +| MatchingFilesDataset | NO | | +| MatrixBandPart | NO | | +| MatrixDeterminant | NO | | +| MatrixDiag | YES | | +| MatrixDiagPart | NO | | +| MatrixDiagPartV2 | NO | | +| MatrixDiagPartV3 | NO | | +| MatrixDiagV2 | NO | | +| MatrixDiagV3 | NO | | +| MatrixExponential | NO | | +| MatrixInverse | NO | | +| MatrixLogarithm | NO | | +| MatrixSetDiag | NO | | +| MatrixSetDiagV2 | NO | | +| MatrixSetDiagV3 | NO | | +| MatrixSolve | NO | | +| MatrixSolveLs | NO | | +| MatrixSquareRoot | NO | | +| MatrixTriangularSolve | NO | | +| Max | YES | | +| MaxIntraOpParallelismDataset | NO | | +| MaxPool | YES | | +| MaxPool3D | YES | | +| MaxPool3DGrad | NO | | +| MaxPool3DGradGrad | NO | | +| MaxPoolGrad | NO | | +| MaxPoolGradGrad | NO | | +| MaxPoolGradGradV2 | NO | | +| MaxPoolGradGradWithArgmax | NO | | +| MaxPoolGradV2 | NO | | +| MaxPoolGradWithArgmax | NO | | +| MaxPoolV2 | YES | | +| MaxPoolWithArgmax | YES | | +| Maximum | YES | | +| Mean | YES | | +| Merge | YES | | +| MergeSummary | NO | | +| MergeV2Checkpoints | YES | | +| Mfcc | NO | | +| Min | YES | | +| Minimum | YES | | +| MirrorPad | YES | | +| MirrorPadGrad | NO | | +| Mod | YES | | +| ModelDataset | NO | | +| Mul | YES | | +| MulNoNan | NO | | +| MultiDeviceIterator | NO | | +| MultiDeviceIteratorFromStringHandle | NO | | +| MultiDeviceIteratorGetNextFromShard | NO | | +| MultiDeviceIteratorInit | NO | | +| MultiDeviceIteratorToStringHandle | NO | | +| Multinomial | NO | | +| MutableDenseHashTable | NO | | +| MutableDenseHashTableV2 | NO | | +| MutableHashTable | YES | | +| MutableHashTableOfTensors | NO | | +| MutableHashTableOfTensorsV2 | NO | | +| MutableHashTableV2 | YES | | +| MutexLock | NO | | +| MutexV2 | NO | | +| NcclAllReduce | NO | | +| NcclBroadcast | NO | | +| NcclReduce | NO | | +| Ndtri | NO | | +| Neg | YES | | +| NextAfter | NO | | +| NextIteration | YES | | +| NoOp | YES | | +| NonDeterministicInts | NO | | +| NonMaxSuppression | YES | | +| NonMaxSuppressionV2 | YES | | +| NonMaxSuppressionV3 | YES | | +| NonMaxSuppressionV4 | YES | | +| NonMaxSuppressionV5 | YES | | +| NonMaxSuppressionWithOverlaps | NO | | +| NonSerializableDataset | NO | | +| NotEqual | YES | | +| NthElement | NO | | +| OneHot | YES | | +| OneShotIterator | YES | | +| OnesLike | YES | | +| OptimizeDataset | NO | | +| OptimizeDatasetV2 | NO | | +| OptionalFromValue | NO | | +| OptionalGetValue | NO | | +| OptionalHasValue | NO | | +| OptionalNone | NO | | +| OptionsDataset | NO | | +| OrderedMapClear | NO | | +| OrderedMapIncompleteSize | NO | | +| OrderedMapPeek | NO | | +| OrderedMapSize | NO | | +| OrderedMapStage | NO | | +| OrderedMapUnstage | NO | | +| OrderedMapUnstageNoKey | NO | | +| OutfeedDequeue | NO | | +| OutfeedDequeueTuple | NO | | +| OutfeedDequeueTupleV2 | NO | | +| OutfeedDequeueV2 | NO | | +| OutfeedEnqueue | NO | | +| OutfeedEnqueueTuple | NO | | +| Pack | YES | | +| Pad | YES | | +| PadV2 | YES | | +| PaddedBatchDataset | NO | | +| PaddedBatchDatasetV2 | NO | | +| PaddingFIFOQueue | NO | | +| PaddingFIFOQueueV2 | NO | | +| ParallelBatchDataset | NO | | +| ParallelConcat | NO | | +| ParallelDynamicStitch | YES | | +| ParallelFilterDataset | NO | | +| ParallelInterleaveDataset | NO | | +| ParallelInterleaveDatasetV2 | NO | | +| ParallelInterleaveDatasetV3 | NO | | +| ParallelInterleaveDatasetV4 | NO | | +| ParallelMapDataset | NO | | +| ParallelMapDatasetV2 | NO | | +| ParameterizedTruncatedNormal | NO | | +| ParseExample | NO | | +| ParseExampleDataset | NO | | +| ParseExampleDatasetV2 | NO | | +| ParseExampleV2 | NO | | +| ParseSequenceExample | NO | | +| ParseSequenceExampleV2 | NO | | +| ParseSingleExample | NO | | +| ParseSingleSequenceExample | NO | | +| ParseTensor | NO | | +| PartitionedCall | YES | | +| Placeholder | YES | | +| PlaceholderV2 | NO | | +| PlaceholderWithDefault | YES | | +| Polygamma | NO | | +| PopulationCount | NO | | +| Pow | YES | | +| PrefetchDataset | NO | | +| Prelinearize | NO | | +| PrelinearizeTuple | NO | | +| PreventGradient | YES | | +| Print | NO | | +| PrintV2 | NO | | +| PriorityQueue | NO | | +| PriorityQueueV2 | NO | | +| PrivateThreadPoolDataset | NO | | +| Prod | YES | | +| PyFunc | NO | | +| PyFuncStateless | NO | | +| Qr | NO | | +| QuantizeAndDequantize | NO | | +| QuantizeAndDequantizeV2 | NO | | +| QuantizeAndDequantizeV3 | NO | | +| QuantizeAndDequantizeV4 | NO | | +| QuantizeAndDequantizeV4Grad | NO | | +| QuantizeDownAndShrinkRange | NO | | +| QuantizeV2 | NO | | +| QuantizedAdd | NO | | +| QuantizedAvgPool | NO | | +| QuantizedBatchNormWithGlobalNormalization | NO | | +| QuantizedBiasAdd | NO | | +| QuantizedConcat | NO | | +| QuantizedConv2D | NO | | +| QuantizedConv2DAndRelu | NO | | +| QuantizedConv2DAndReluAndRequantize | NO | | +| QuantizedConv2DAndRequantize | NO | | +| QuantizedConv2DPerChannel | NO | | +| QuantizedConv2DWithBias | NO | | +| QuantizedConv2DWithBiasAndRelu | NO | | +| QuantizedConv2DWithBiasAndReluAndRequantize | NO | | +| QuantizedConv2DWithBiasAndRequantize | NO | | +| QuantizedConv2DWithBiasSignedSumAndReluAndRequantize | NO | | +| QuantizedConv2DWithBiasSumAndRelu | NO | | +| QuantizedConv2DWithBiasSumAndReluAndRequantize | NO | | +| QuantizedDepthwiseConv2D | NO | | +| QuantizedDepthwiseConv2DWithBias | NO | | +| QuantizedDepthwiseConv2DWithBiasAndRelu | NO | | +| QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize | NO | | +| QuantizedInstanceNorm | NO | | +| QuantizedMatMul | NO | | +| QuantizedMatMulWithBias | NO | | +| QuantizedMatMulWithBiasAndDequantize | NO | | +| QuantizedMatMulWithBiasAndRelu | NO | | +| QuantizedMatMulWithBiasAndReluAndRequantize | NO | | +| QuantizedMatMulWithBiasAndRequantize | NO | | +| QuantizedMaxPool | NO | | +| QuantizedMul | NO | | +| QuantizedRelu | NO | | +| QuantizedRelu6 | NO | | +| QuantizedReluX | NO | | +| QuantizedReshape | NO | | +| QuantizedResizeBilinear | NO | | +| QueueClose | NO | | +| QueueCloseV2 | NO | | +| QueueDequeue | YES | | +| QueueDequeueMany | YES | | +| QueueDequeueManyV2 | NO | | +| QueueDequeueUpTo | YES | | +| QueueDequeueUpToV2 | YES | | +| QueueDequeueV2 | YES | | +| QueueEnqueue | NO | | +| QueueEnqueueMany | NO | | +| QueueEnqueueManyV2 | NO | | +| QueueEnqueueV2 | NO | | +| QueueIsClosed | NO | | +| QueueIsClosedV2 | NO | | +| QueueSize | NO | | +| QueueSizeV2 | NO | | +| RFFT | NO | | +| RFFT2D | NO | | +| RFFT3D | NO | | +| RGBToHSV | NO | | +| RaggedBincount | NO | | +| RaggedCountSparseOutput | NO | | +| RaggedCross | NO | | +| RaggedFillEmptyRows | NO | | +| RaggedFillEmptyRowsGrad | NO | | +| RaggedGather | NO | | +| RaggedRange | NO | | +| RaggedTensorFromVariant | NO | | +| RaggedTensorToSparse | NO | | +| RaggedTensorToTensor | NO | | +| RaggedTensorToVariant | NO | | +| RaggedTensorToVariantGradient | NO | | +| RandomCrop | NO | | +| RandomDataset | NO | | +| RandomDatasetV2 | NO | | +| RandomGamma | NO | | +| RandomGammaGrad | NO | | +| RandomIndexShuffle | NO | | +| RandomPoisson | NO | | +| RandomPoissonV2 | NO | | +| RandomShuffle | NO | | +| RandomShuffleQueue | NO | | +| RandomShuffleQueueV2 | NO | | +| RandomStandardNormal | NO | | +| RandomUniform | YES | | +| RandomUniformInt | YES | | +| Range | YES | | +| RangeDataset | NO | | +| Rank | YES | | +| ReadFile | NO | | +| ReadVariableOp | YES | | +| ReadVariableXlaSplitND | NO | | +| ReaderNumRecordsProduced | NO | | +| ReaderNumRecordsProducedV2 | NO | | +| ReaderNumWorkUnitsCompleted | NO | | +| ReaderNumWorkUnitsCompletedV2 | NO | | +| ReaderRead | NO | | +| ReaderReadUpTo | NO | | +| ReaderReadUpToV2 | NO | | +| ReaderReadV2 | NO | | +| ReaderReset | NO | | +| ReaderResetV2 | NO | | +| ReaderRestoreState | NO | | +| ReaderRestoreStateV2 | NO | | +| ReaderSerializeState | NO | | +| ReaderSerializeStateV2 | NO | | +| Real | NO | | +| RealDiv | YES | | +| RebatchDataset | NO | | +| RebatchDatasetV2 | NO | | +| Reciprocal | YES | | +| ReciprocalGrad | NO | | +| RecordInput | NO | | +| Recv | NO | | +| RecvTPUEmbeddingActivations | NO | | +| ReduceDataset | NO | | +| ReduceJoin | NO | | +| RefEnter | NO | | +| RefExit | NO | | +| RefIdentity | NO | | +| RefMerge | NO | | +| RefNextIteration | NO | | +| RefSelect | NO | | +| RefSwitch | NO | | +| RegexFullMatch | NO | | +| RegexReplace | NO | | +| RegisterDataset | NO | | +| RegisterDatasetV2 | NO | | +| Relu | YES | | +| Relu6 | YES | | +| Relu6Grad | NO | | +| ReluGrad | NO | | +| RemoteCall | NO | | +| RepeatDataset | NO | | +| RequantizationRange | NO | | +| RequantizationRangePerChannel | NO | | +| Requantize | NO | | +| RequantizePerChannel | NO | | +| Reshape | YES | | +| ResizeArea | NO | | +| ResizeBicubic | NO | | +| ResizeBicubicGrad | NO | | +| ResizeBilinear | YES | | +| ResizeBilinearGrad | NO | | +| ResizeNearestNeighbor | YES | | +| ResizeNearestNeighborGrad | NO | | +| ResourceAccumulatorApplyGradient | NO | | +| ResourceAccumulatorNumAccumulated | NO | | +| ResourceAccumulatorSetGlobalStep | NO | | +| ResourceAccumulatorTakeGradient | NO | | +| ResourceApplyAdaMax | NO | | +| ResourceApplyAdadelta | NO | | +| ResourceApplyAdagrad | NO | | +| ResourceApplyAdagradDA | NO | | +| ResourceApplyAdagradV2 | NO | | +| ResourceApplyAdam | NO | | +| ResourceApplyAdamWithAmsgrad | NO | | +| ResourceApplyAddSign | NO | | +| ResourceApplyCenteredRMSProp | NO | | +| ResourceApplyFtrl | NO | | +| ResourceApplyFtrlV2 | NO | | +| ResourceApplyGradientDescent | NO | | +| ResourceApplyKerasMomentum | NO | | +| ResourceApplyMomentum | NO | | +| ResourceApplyPowerSign | NO | | +| ResourceApplyProximalAdagrad | NO | | +| ResourceApplyProximalGradientDescent | NO | | +| ResourceApplyRMSProp | NO | | +| ResourceConditionalAccumulator | NO | | +| ResourceCountUpTo | NO | | +| ResourceGather | YES | | +| ResourceGatherNd | NO | | +| ResourceScatterAdd | NO | | +| ResourceScatterDiv | NO | | +| ResourceScatterMax | NO | | +| ResourceScatterMin | NO | | +| ResourceScatterMul | NO | | +| ResourceScatterNdAdd | NO | | +| ResourceScatterNdMax | NO | | +| ResourceScatterNdMin | NO | | +| ResourceScatterNdSub | NO | | +| ResourceScatterNdUpdate | NO | | +| ResourceScatterSub | NO | | +| ResourceScatterUpdate | NO | | +| ResourceSparseApplyAdadelta | NO | | +| ResourceSparseApplyAdagrad | NO | | +| ResourceSparseApplyAdagradDA | NO | | +| ResourceSparseApplyAdagradV2 | NO | | +| ResourceSparseApplyCenteredRMSProp | NO | | +| ResourceSparseApplyFtrl | NO | | +| ResourceSparseApplyFtrlV2 | NO | | +| ResourceSparseApplyKerasMomentum | NO | | +| ResourceSparseApplyMomentum | NO | | +| ResourceSparseApplyProximalAdagrad | NO | | +| ResourceSparseApplyProximalGradientDescent | NO | | +| ResourceSparseApplyRMSProp | NO | | +| ResourceStridedSliceAssign | NO | | +| Restore | NO | | +| RestoreSlice | NO | | +| RestoreV2 | YES | | +| RetrieveTPUEmbeddingADAMParameters | NO | | +| RetrieveTPUEmbeddingAdadeltaParameters | NO | | +| RetrieveTPUEmbeddingAdagradMomentumParameters | NO | | +| RetrieveTPUEmbeddingAdagradParameters | NO | | +| RetrieveTPUEmbeddingCenteredRMSPropParameters | NO | | +| RetrieveTPUEmbeddingFTRLParameters | NO | | +| RetrieveTPUEmbeddingFrequencyEstimatorParameters | NO | | +| RetrieveTPUEmbeddingMDLAdagradLightParameters | NO | | +| RetrieveTPUEmbeddingMomentumParameters | NO | | +| RetrieveTPUEmbeddingProximalAdagradParameters | NO | | +| RetrieveTPUEmbeddingProximalYogiParameters | NO | | +| RetrieveTPUEmbeddingRMSPropParameters | NO | | +| RetrieveTPUEmbeddingStochasticGradientDescentParameters | NO | | +| Reverse | YES | | +| ReverseSequence | YES | | +| ReverseV2 | YES | | +| RewriteDataset | NO | | +| RightShift | NO | | +| Rint | NO | | +| RngReadAndSkip | NO | | +| RngSkip | NO | | +| Roll | YES | | +| Round | YES | | +| Rsqrt | YES | | +| RsqrtGrad | NO | | +| SampleDistortedBoundingBox | NO | | +| SampleDistortedBoundingBoxV2 | NO | | +| SamplingDataset | NO | | +| Save | NO | | +| SaveDataset | NO | | +| SaveDatasetV2 | NO | | +| SaveSlices | NO | | +| SaveV2 | YES | | +| ScalarSummary | NO | | +| ScaleAndTranslate | NO | | +| ScaleAndTranslateGrad | NO | | +| ScanDataset | NO | | +| ScatterAdd | NO | | +| ScatterDiv | NO | | +| ScatterMax | NO | | +| ScatterMin | NO | | +| ScatterMul | NO | | +| ScatterNd | YES | | +| ScatterNdAdd | NO | | +| ScatterNdMax | NO | | +| ScatterNdMin | NO | | +| ScatterNdNonAliasingAdd | NO | | +| ScatterNdSub | NO | | +| ScatterNdUpdate | NO | | +| ScatterSub | NO | | +| ScatterUpdate | NO | | +| SdcaFprint | NO | | +| SdcaOptimizer | NO | | +| SdcaOptimizerV2 | NO | | +| SdcaShrinkL1 | NO | | +| SegmentMax | NO | | +| SegmentMaxV2 | NO | | +| SegmentMean | NO | | +| SegmentMin | NO | | +| SegmentMinV2 | NO | | +| SegmentProd | NO | | +| SegmentProdV2 | NO | | +| SegmentSum | YES | | +| SegmentSumV2 | NO | | +| Select | YES | | +| SelectV2 | YES | | +| SelfAdjointEig | NO | | +| SelfAdjointEigV2 | NO | | +| Selu | YES | | +| SeluGrad | NO | | +| Send | NO | | +| SendTPUEmbeddingGradients | NO | | +| SerializeIterator | NO | | +| SerializeManySparse | NO | | +| SerializeSparse | NO | | +| SerializeTensor | NO | | +| SetSize | NO | | +| SetStatsAggregatorDataset | NO | | +| Shape | YES | | +| ShapeN | YES | | +| ShardDataset | NO | | +| ShardedFilename | YES | | +| ShardedFilespec | NO | | +| ShuffleAndRepeatDataset | NO | | +| ShuffleAndRepeatDatasetV2 | NO | | +| ShuffleDataset | NO | | +| ShuffleDatasetV2 | NO | | +| ShuffleDatasetV3 | NO | | +| ShutdownDistributedTPU | NO | | +| Sigmoid | YES | | +| SigmoidGrad | NO | | +| Sign | YES | | +| Sin | YES | | +| Sinh | YES | | +| Size | YES | | +| SkipDataset | NO | | +| SleepDataset | NO | | +| Slice | YES | | +| SlidingWindowDataset | NO | | +| Snapshot | YES | | +| SnapshotChunkDataset | NO | | +| SnapshotDataset | NO | | +| SnapshotDatasetReader | NO | | +| SnapshotDatasetV2 | NO | | +| SnapshotNestedDatasetReader | NO | | +| SobolSample | NO | | +| Softmax | YES | | +| SoftmaxCrossEntropyWithLogits | NO | | +| Softplus | YES | | +| SoftplusGrad | NO | | +| Softsign | YES | | +| SoftsignGrad | NO | | +| SpaceToBatch | NO | | +| SpaceToBatchND | YES | | +| SpaceToDepth | YES | | +| SparseAccumulatorApplyGradient | NO | | +| SparseAccumulatorTakeGradient | NO | | +| SparseAdd | NO | | +| SparseAddGrad | NO | | +| SparseApplyAdadelta | NO | | +| SparseApplyAdagrad | NO | | +| SparseApplyAdagradDA | NO | | +| SparseApplyAdagradV2 | NO | | +| SparseApplyCenteredRMSProp | NO | | +| SparseApplyFtrl | NO | | +| SparseApplyFtrlV2 | NO | | +| SparseApplyMomentum | NO | | +| SparseApplyProximalAdagrad | NO | | +| SparseApplyProximalGradientDescent | NO | | +| SparseApplyRMSProp | NO | | +| SparseBincount | NO | | +| SparseConcat | NO | | +| SparseConditionalAccumulator | NO | | +| SparseCountSparseOutput | NO | | +| SparseCross | NO | | +| SparseCrossHashed | NO | | +| SparseCrossV2 | NO | | +| SparseDenseCwiseAdd | NO | | +| SparseDenseCwiseDiv | NO | | +| SparseDenseCwiseMul | NO | | +| SparseFillEmptyRows | YES | | +| SparseFillEmptyRowsGrad | NO | | +| SparseMatMul | NO | | +| SparseMatrixAdd | NO | | +| SparseMatrixMatMul | NO | | +| SparseMatrixMul | NO | | +| SparseMatrixNNZ | NO | | +| SparseMatrixOrderingAMD | NO | | +| SparseMatrixSoftmax | NO | | +| SparseMatrixSoftmaxGrad | NO | | +| SparseMatrixSparseCholesky | NO | | +| SparseMatrixSparseMatMul | NO | | +| SparseMatrixTranspose | NO | | +| SparseMatrixZeros | NO | | +| SparseReduceMax | NO | | +| SparseReduceMaxSparse | NO | | +| SparseReduceSum | NO | | +| SparseReduceSumSparse | NO | | +| SparseReorder | NO | | +| SparseReshape | YES | | +| SparseSegmentMean | NO | | +| SparseSegmentMeanGrad | NO | | +| SparseSegmentMeanGradV2 | NO | | +| SparseSegmentMeanWithNumSegments | NO | | +| SparseSegmentSqrtN | NO | | +| SparseSegmentSqrtNGrad | NO | | +| SparseSegmentSqrtNGradV2 | NO | | +| SparseSegmentSqrtNWithNumSegments | NO | | +| SparseSegmentSum | YES | | +| SparseSegmentSumGrad | NO | | +| SparseSegmentSumGradV2 | NO | | +| SparseSegmentSumWithNumSegments | NO | | +| SparseSlice | NO | | +| SparseSliceGrad | NO | | +| SparseSoftmax | NO | | +| SparseSoftmaxCrossEntropyWithLogits | NO | | +| SparseSparseMaximum | NO | | +| SparseSparseMinimum | NO | | +| SparseSplit | NO | | +| SparseTensorDenseAdd | NO | | +| SparseTensorDenseMatMul | NO | | +| SparseTensorSliceDataset | NO | | +| SparseTensorToCSRSparseMatrix | NO | | +| SparseToDense | YES | | +| SparseToSparseSetOperation | NO | | +| Spence | NO | | +| Split | YES | | +| SplitV | YES | | +| SqlDataset | NO | | +| Sqrt | YES | | +| SqrtGrad | NO | | +| Square | YES | | +| SquaredDifference | YES | | +| Squeeze | YES | | +| Stack | NO | | +| StackClose | NO | | +| StackCloseV2 | NO | | +| StackPop | NO | | +| StackPopV2 | NO | | +| StackPush | NO | | +| StackPushV2 | NO | | +| StackV2 | NO | | +| Stage | NO | | +| StageClear | NO | | +| StagePeek | NO | | +| StageSize | NO | | +| StatefulPartitionedCall | YES | | +| StatefulRandomBinomial | NO | | +| StatefulStandardNormal | NO | | +| StatefulStandardNormalV2 | NO | | +| StatefulTruncatedNormal | NO | | +| StatefulUniform | NO | | +| StatefulUniformFullInt | NO | | +| StatefulUniformInt | NO | | +| StatelessCase | NO | | +| StatelessIf | YES | | +| StatelessMultinomial | NO | | +| StatelessParameterizedTruncatedNormal | NO | | +| StatelessRandomBinomial | NO | | +| StatelessRandomGammaV2 | NO | | +| StatelessRandomGammaV3 | NO | | +| StatelessRandomGetAlg | NO | | +| StatelessRandomGetKeyCounter | NO | | +| StatelessRandomGetKeyCounterAlg | NO | | +| StatelessRandomNormal | NO | | +| StatelessRandomNormalV2 | NO | | +| StatelessRandomPoisson | NO | | +| StatelessRandomUniform | NO | | +| StatelessRandomUniformFullInt | NO | | +| StatelessRandomUniformFullIntV2 | NO | | +| StatelessRandomUniformInt | NO | | +| StatelessRandomUniformIntV2 | NO | | +| StatelessRandomUniformV2 | NO | | +| StatelessSampleDistortedBoundingBox | NO | | +| StatelessShuffle | NO | | +| StatelessTruncatedNormal | NO | | +| StatelessTruncatedNormalV2 | NO | | +| StatelessWhile | YES | | +| StaticRegexFullMatch | YES | | +| StaticRegexReplace | NO | | +| StatsAggregatorHandle | NO | | +| StatsAggregatorHandleV2 | NO | | +| StatsAggregatorSetSummaryWriter | NO | | +| StatsAggregatorSummary | NO | | +| StopGradient | YES | | +| StridedSlice | YES | | +| StridedSliceAssign | NO | | +| StridedSliceGrad | NO | | +| StringFormat | NO | | +| StringJoin | YES | | +| StringLength | NO | | +| StringLower | NO | | +| StringNGrams | NO | | +| StringSplit | NO | | +| StringSplitV2 | NO | | +| StringStrip | NO | | +| StringToHashBucket | NO | | +| StringToHashBucketFast | NO | | +| StringToHashBucketStrong | NO | | +| StringToNumber | NO | | +| StringUpper | NO | | +| Sub | YES | | +| Substr | NO | | +| Sum | YES | | +| SummaryWriter | NO | | +| Svd | NO | | +| Switch | YES | | +| SymbolicGradient | NO | | +| SyncDevice | NO | | +| TFRecordDataset | NO | | +| TFRecordDatasetV2 | NO | | +| TFRecordReader | NO | | +| TFRecordReaderV2 | NO | | +| TPUCompilationResult | NO | | +| TPUEmbeddingActivations | NO | | +| TPUOrdinalSelector | NO | | +| TPUPartitionedCall | NO | | +| TPUPartitionedInput | NO | | +| TPUPartitionedInputV2 | NO | | +| TPUPartitionedOutput | NO | | +| TPUPartitionedOutputV2 | NO | | +| TPUReplicateMetadata | NO | | +| TPUReplicatedInput | NO | | +| TPUReplicatedOutput | NO | | +| TakeDataset | NO | | +| TakeManySparseFromTensorsMap | NO | | +| TakeWhileDataset | NO | | +| Tan | YES | | +| Tanh | YES | | +| TanhGrad | NO | | +| TemporaryVariable | NO | | +| TensorArray | NO | | +| TensorArrayClose | NO | | +| TensorArrayCloseV2 | NO | | +| TensorArrayCloseV3 | YES | | +| TensorArrayConcat | NO | | +| TensorArrayConcatV2 | NO | | +| TensorArrayConcatV3 | YES | | +| TensorArrayGather | NO | | +| TensorArrayGatherV2 | NO | | +| TensorArrayGatherV3 | YES | | +| TensorArrayGrad | NO | | +| TensorArrayGradV2 | NO | | +| TensorArrayGradV3 | NO | | +| TensorArrayGradWithShape | NO | | +| TensorArrayPack | NO | | +| TensorArrayRead | NO | | +| TensorArrayReadV2 | NO | | +| TensorArrayReadV3 | YES | | +| TensorArrayScatter | NO | | +| TensorArrayScatterV2 | NO | | +| TensorArrayScatterV3 | YES | | +| TensorArraySize | NO | | +| TensorArraySizeV2 | NO | | +| TensorArraySizeV3 | YES | | +| TensorArraySplit | NO | | +| TensorArraySplitV2 | NO | | +| TensorArraySplitV3 | NO | | +| TensorArrayUnpack | NO | | +| TensorArrayV2 | NO | | +| TensorArrayV3 | YES | | +| TensorArrayWrite | NO | | +| TensorArrayWriteV2 | NO | | +| TensorArrayWriteV3 | YES | | +| TensorDataset | NO | | +| TensorListConcat | NO | | +| TensorListConcatLists | NO | | +| TensorListConcatV2 | NO | | +| TensorListElementShape | NO | | +| TensorListFromTensor | YES | | +| TensorListGather | NO | | +| TensorListGetItem | YES | | +| TensorListLength | YES | | +| TensorListPopBack | NO | | +| TensorListPushBack | YES | | +| TensorListPushBackBatch | NO | | +| TensorListReserve | YES | | +| TensorListResize | YES | | +| TensorListScatter | NO | | +| TensorListScatterIntoExistingList | NO | | +| TensorListScatterV2 | NO | | +| TensorListSetItem | YES | | +| TensorListSplit | NO | | +| TensorListStack | YES | | +| TensorScatterAdd | NO | | +| TensorScatterMax | NO | | +| TensorScatterMin | NO | | +| TensorScatterSub | NO | | +| TensorScatterUpdate | NO | | +| TensorSliceDataset | NO | | +| TensorStridedSliceUpdate | NO | | +| TensorSummary | NO | | +| TensorSummaryV2 | NO | | +| TextLineDataset | NO | | +| TextLineReader | NO | | +| TextLineReaderV2 | NO | | +| ThreadPoolDataset | NO | | +| ThreadPoolHandle | NO | | +| ThreadUnsafeUnigramCandidateSampler | NO | | +| Tile | YES | | +| TileGrad | NO | | +| Timestamp | NO | | +| ToBool | YES | | +| TopK | YES | | +| TopKV2 | YES | | +| Transpose | YES | | +| TridiagonalMatMul | NO | | +| TridiagonalSolve | NO | | +| TruncateDiv | YES | | +| TruncateMod | YES | | +| TruncatedNormal | NO | | +| Unbatch | NO | | +| UnbatchDataset | NO | | +| UnbatchGrad | NO | | +| UncompressElement | NO | | +| UnicodeDecode | NO | | +| UnicodeDecodeWithOffsets | NO | | +| UnicodeEncode | NO | | +| UnicodeScript | NO | | +| UnicodeTranscode | NO | | +| UniformCandidateSampler | NO | | +| UniformDequantize | NO | | +| UniformQuantize | NO | | +| UniformQuantizedAdd | NO | | +| UniformQuantizedClipByValue | NO | | +| UniformQuantizedConvolution | NO | | +| UniformQuantizedConvolutionHybrid | NO | | +| UniformQuantizedDot | NO | | +| UniformQuantizedDotHybrid | NO | | +| UniformRequantize | NO | | +| Unique | YES | | +| UniqueDataset | NO | | +| UniqueV2 | NO | | +| UniqueWithCounts | NO | | +| UniqueWithCountsV2 | NO | | +| Unpack | YES | | +| UnravelIndex | YES | | +| UnsortedSegmentJoin | NO | | +| UnsortedSegmentMax | NO | | +| UnsortedSegmentMin | NO | | +| UnsortedSegmentProd | NO | | +| UnsortedSegmentSum | YES | | +| Unstage | NO | | +| UnwrapDatasetVariant | NO | | +| UpperBound | NO | | +| VarHandleOp | YES | | +| VarIsInitializedOp | YES | | +| Variable | YES | | +| VariableShape | NO | | +| VariableV2 | YES | | +| Where | YES | | +| While | YES | | +| WholeFileReader | NO | | +| WholeFileReaderV2 | NO | | +| WindowDataset | NO | | +| WindowOp | NO | | +| WorkerHeartbeat | NO | | +| WrapDatasetVariant | NO | | +| WriteAudioSummary | NO | | +| WriteFile | NO | | +| WriteGraphSummary | NO | | +| WriteHistogramSummary | NO | | +| WriteImageSummary | NO | | +| WriteRawProtoSummary | NO | | +| WriteScalarSummary | NO | | +| WriteSummary | NO | | +| Xdivy | YES | | +| XlaConcatND | NO | | +| XlaSplitND | NO | | +| Xlog1py | YES | | +| Xlogy | YES | | +| ZerosLike | YES | | +| Zeta | NO | | +| ZipDataset | NO | | From 009ef5657c1fcb742ceffa3c32f29908edffa43e Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 24 Oct 2023 00:50:26 +0400 Subject: [PATCH 007/275] [TF FE] Provide full support of TF1 Control flow and TensorArray* ops (#20270) * [TF FE] Provide full support of TF1 Control flow and TensorArray ops Signed-off-by: Kazantsev, Roman * Add missed header for TensorArrayV3 op * Temporarily disable GRU cell fusion * Update src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp * Fix a case when element_shape for TensorArrayV3 * Fix translator for TensorArrayCloseV3 * Update summarize graph with TensorArrayCloseV3 * Add layer tests for TensorArrayScatterV3, Close, Size, Array * Fix output shape for Merge node * Remove unused variable * Fix translator for TensorArrayConcatV3 * Fix translator for TensorArrayConcatV3 * Add layer tests for TensorArrayWriteV3, Gather, and Concat Signed-off-by: Kazantsev, Roman * Add translator for GatherTree * Fix TF FE unit-test for GatherTree * Fix GatherTree translator * Fix GatherTree translator to handle 1d end_token * Fix undeclared parameter issue * Fix GatherTree unit-test * Add TensorArrayV3Replacer transformation * Temporarily disable dangling transformation * Recover RemoveMultiSubGraphOpDanglingParamsResults transformation * Recover GRUCellFusion transformation * Simplify check for GRUCellFusion transformation * Use proper name for unit-tests * Simplify translator for TensorArrayWriteV3 Signed-off-by: Kazantsev, Roman * Fix RemoveMultiSubgraphOpDanglingParamsResults transformation Signed-off-by: Kazantsev, Roman * Additional fix for remove_multi_subgraph_op_dangling_params * Make static TI run a dynamic subgraph * Dedicated SL test * Change condition to respect stat shapes * Adjust test to cover the code path properly * Recover fallback for still failing case GNMT --------- Signed-off-by: Kazantsev, Roman Co-authored-by: Maksim Kutakov --- .../common_optimizations/gru_cell_fusion.cpp | 9 + ...move_multi_subgraph_op_dangling_params.cpp | 26 +- src/frontends/tensorflow/src/frontend.cpp | 2 + src/frontends/tensorflow/src/op/merge.cpp | 35 +- .../src/op/tensor_array_operations.cpp | 332 ++++++++++++++++++ src/frontends/tensorflow/src/op_table.cpp | 18 + src/frontends/tensorflow/src/tf_utils.cpp | 12 +- .../tensorflow/tests/convert_model.cpp | 3 +- .../models_pbtxt/gather_tree_model.pbtxt | 103 ++++++ .../include/common_op_table.hpp | 1 + .../include/helper_ops/merge.hpp | 28 +- .../include/helper_ops/next_iteration.hpp | 4 + .../include/helper_ops/tensor_array.hpp | 60 ++++ .../tensor_array_v3_replacer.hpp | 29 ++ .../tensor_array_v3_replacer.cpp | 71 ++++ .../tensorflow_common/src/op/gather_tree.cpp | 39 ++ .../intel_cpu/src/nodes/tensoriterator.cpp | 14 +- .../intel_cpu/src/nodes/tensoriterator.h | 1 + .../functional/single_layer_tests/loop.cpp | 63 ++++ tests/layer_tests/common/utils/tf_utils.py | 2 +- .../test_tf_TensorArrayOps.py | 200 +++++++++++ tools/mo/openvino/tools/mo/convert_impl.py | 4 +- .../moc_tf_fe/conversion_basic_models_test.py | 12 +- 23 files changed, 1034 insertions(+), 34 deletions(-) create mode 100644 src/frontends/tensorflow/src/op/tensor_array_operations.cpp create mode 100644 src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt create mode 100644 src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp create mode 100644 src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp create mode 100644 src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp create mode 100644 src/frontends/tensorflow_common/src/op/gather_tree.cpp create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py diff --git a/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp index e5eae04c640553..5b3aaec614ff17 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gru_cell_fusion.cpp @@ -148,6 +148,15 @@ ov::pass::GRUCellFusion::GRUCellFusion() { Bh = rg.make(WRh.get_element_type(), Shape{1, static_cast(hidden_size)}, 0); } + // perform additional check for applicability of the transformation + // without this check, process_weights can fail + if (WR.get_partial_shape()[1] != (hidden_size + input_size)) { + return false; + } + if (WRh.get_partial_shape()[1] != (hidden_size + input_size)) { + return false; + } + Output Wzrh, Rzrh, Bzrh; if (cnt_of_consumers_of_zero_out == 1 && cnt_of_consumers_of_first_out == 2) { tie(Wzrh, Rzrh) = process_weights(rg, false, WR, WRh, input_size, hidden_size, axis_0, axis_1); diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp index f9738929931f21..3304ee3718ab57 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp @@ -116,7 +116,7 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } // Remove inputs bool pass_required = false; - std::set> required_inputs; + std::set required_inputs_indices; auto op_inputs = multi_subgraph_op->input_values(); std::vector> to_remove_descriptors_indexes; to_remove_descriptors_indexes.resize(subgraphs_size); @@ -133,7 +133,7 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } else { // collecting required inputs is needed to detect cases where the input // is not needed in a one body, but the other one uses it (for example If case) - required_inputs.insert(op_inputs[body_in_descriptors[i]->m_input_index]); // only unique + required_inputs_indices.insert(body_in_descriptors[i]->m_input_index); } } } @@ -148,7 +148,9 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } }; auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr& op, + std::set& required_inputs_indices, uint64_t removed_loop_idx) { + std::set new_required_inputs_indices; for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { auto& descriptors = op->get_input_descriptions(static_cast(body_idx)); for (auto& desc : descriptors) { @@ -157,6 +159,14 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st } } } + for (auto input_index : required_inputs_indices) { + if (input_index > removed_loop_idx) { + new_required_inputs_indices.insert(input_index - 1); + } else { + new_required_inputs_indices.insert(input_index); + } + } + required_inputs_indices = new_required_inputs_indices; }; // Remove dangling body params and input and update input descriptors for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { @@ -174,13 +184,17 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st update_body_param_desc(body_in_descriptors, body_in_descriptors[desc_idx]->m_body_parameter_index); // remove dangling input of MultiSubGraphOp which was not removed earlier - auto& current_input = op_inputs[body_in_descriptors[desc_idx]->m_input_index]; - if (std::count(std::begin(required_inputs), std::end(required_inputs), current_input) == 0 && + auto current_input_idx = body_in_descriptors[desc_idx]->m_input_index; + auto& current_input = op_inputs[current_input_idx]; + // the same input tensor can go to different input ports + if (std::count(std::begin(required_inputs_indices), + std::end(required_inputs_indices), + current_input_idx) == 0 && std::count(std::begin(op_inputs), std::end(op_inputs), current_input) > 0) { - op_inputs.erase(std::next(op_inputs.begin(), body_in_descriptors[desc_idx]->m_input_index)); + op_inputs.erase(std::next(op_inputs.begin(), current_input_idx)); // Move all input indexes (in all bodies) which are after these indicated by // to_remove_descriptors_indexes and are not used in any body - update_op_inputs_desc(multi_subgraph_op, body_in_descriptors[desc_idx]->m_input_index); + update_op_inputs_desc(multi_subgraph_op, required_inputs_indices, current_input_idx); } } else { updated_body_in_descriptors.emplace_back(body_in_descriptors[desc_idx]); diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp index 24b5824fe336d1..ad9b5b76bdfbda 100644 --- a/src/frontends/tensorflow/src/frontend.cpp +++ b/src/frontends/tensorflow/src/frontend.cpp @@ -14,6 +14,7 @@ #include "helper_transforms/embedding_segments_feature_fusing.hpp" #include "helper_transforms/gru_block_cell_replacer.hpp" #include "helper_transforms/saved_model_unused_remover.hpp" +#include "helper_transforms/tensor_array_v3_replacer.hpp" #include "input_model.hpp" #include "op_table.hpp" #include "openvino/core/so_extension.hpp" @@ -491,6 +492,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/frontends/tensorflow/src/op/merge.cpp b/src/frontends/tensorflow/src/op/merge.cpp index 3594f93ed08278..708de72aa3434f 100644 --- a/src/frontends/tensorflow/src/op/merge.cpp +++ b/src/frontends/tensorflow/src/op/merge.cpp @@ -5,6 +5,8 @@ #include "helper_ops/merge.hpp" #include "common_op_table.hpp" +#include "helper_ops/enter.hpp" +#include "helper_ops/next_iteration.hpp" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/op/constant.hpp" #include "utils.hpp" @@ -24,20 +26,47 @@ OutputVector translate_merge_op(const NodeContext& node) { auto node_name = node.get_name(); default_op_checks(node, 1, {"Merge"}); int input_size = static_cast(node.get_input_size()); - OutputVector inputs; + OutputVector inputs(input_size); for (int input_ind = 0; input_ind < input_size; ++input_ind) { - inputs.push_back(node.get_input(input_ind)); + inputs[input_ind] = node.get_input(input_ind); } // if Merge node has just one input, there is nothing to merge // return the same input and value_index equal to 0 - if (inputs.size() == 1) { + if (input_size == 1) { auto value_index = make_shared(element::i32, Shape{}, 0); value_index->output(0).set_names({node_name + ":1"}); inputs[0].add_names({node_name + ":0"}); return OutputVector{inputs[0], value_index}; } + // check if it is a case of TF1 While: Enter, NextIteration are going to Merge node + // in this case it can refine output shape and type for NextIteration based on Enter + if (input_size == 2) { + auto enter = as_type_ptr(inputs[0].get_node_shared_ptr()); + if (!enter) { + enter = as_type_ptr(inputs[1].get_node_shared_ptr()); + } + auto next_iteration = as_type_ptr(inputs[0].get_node_shared_ptr()); + if (!next_iteration) { + next_iteration = as_type_ptr(inputs[1].get_node_shared_ptr()); + } + + if (enter && next_iteration) { + // set output type and shape for NextIteration + // borrow them from Enter output + auto enter_output_type = enter->output(0).get_element_type(); + auto enter_output_shape = enter->output(0).get_partial_shape(); + auto next_iteration_output_shape = PartialShape::dynamic(enter_output_shape.rank()); + next_iteration->set_output_shape_and_type(next_iteration_output_shape, enter_output_type); + + // reset inputs + // refines input shapes and types for Merge node + inputs[0] = enter->output(0); + inputs[1] = next_iteration->output(0); + } + } + auto merge_node = make_shared(inputs, node.get_decoder()); set_node_name(node.get_name(), merge_node); diff --git a/src/frontends/tensorflow/src/op/tensor_array_operations.cpp b/src/frontends/tensorflow/src/op/tensor_array_operations.cpp new file mode 100644 index 00000000000000..c1b3d6ac205dc3 --- /dev/null +++ b/src/frontends/tensorflow/src/op/tensor_array_operations.cpp @@ -0,0 +1,332 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/enter.hpp" +#include "helper_ops/tensor_array.hpp" +#include "openvino/frontend/tensorflow/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/maximum.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_nd_update.hpp" +#include "openvino/op/scatter_update.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; +using namespace ov::frontend::tensorflow; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +namespace { +// the function creates the constant imitating initial tensor array container +Output create_initial_tensor_array_constant(int64_t tensor_element_rank, + const element::Type& element_type, + Output size, + const string& node_name) { + // adjust size to have it of shape [1] for further concatenation with element shape + auto new_size_shape = make_shared(element::i32, Shape{1}, 1); + size = make_shared(size, new_size_shape, false); + + // create a vector of size element_shape.rank() with ones + // and compute a shape of initial tensor array [size, 1, ..., 1] + vector ones(tensor_element_rank, 1); + auto ones_const = make_shared(element::i32, Shape{ones.size()}, ones); + auto target_shape = make_shared(OutputVector{size, ones_const}, 0); + + // create initial tensor array + auto scalar_value = make_shared(element_type, Shape{}, vector{0}); + auto initial_tensor_array = make_shared(scalar_value, target_shape); + + return initial_tensor_array->output(0); +} +} // namespace + +OutputVector translate_tensor_array_v3_op(const NodeContext& node) { + // TensorArrayV3 has just one input: + // 0) size to initialize a size of tensor array + default_op_checks(node, 1, {"TensorArrayV3"}); + auto dtype = node.get_attribute("dtype"); + auto size = node.get_input(0); + auto element_shape = node.get_attribute("element_shape"); + + if (element_shape.rank().is_static()) { + auto node_name = node.get_name(); + auto new_output1 = + create_initial_tensor_array_constant(element_shape.rank().get_length(), dtype, size, node.get_name()); + new_output1.set_names({node_name + ":0"}); + auto new_output2 = + create_initial_tensor_array_constant(element_shape.rank().get_length(), dtype, size, node.get_name()); + new_output2.set_names({node_name + ":1"}); + return OutputVector{new_output1, new_output2}; + } + + // dynamic case when it is unable retrieve element rank from the attribute + auto tensor_array_v3 = make_shared(size, dtype, node.get_decoder()); + set_node_name(node.get_name(), tensor_array_v3); + + return tensor_array_v3->outputs(); +} + +OutputVector translate_tensor_array_scatter_v3_op(const NodeContext& node) { + // TensorArrayScatterV3 has four inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) indices, a Tensor of type int32. The locations at which to write the tensor elements. + // 2) value, a Tensor. The concatenated tensor to write to the TensorArray + // 3) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output: + // 0) flow_out indicates that operation is complete and handle resource is updated + default_op_checks(node, 4, {"TensorArrayScatterV3"}); + auto indices = node.get_input(1); + auto value = node.get_input(2); + // flow_in is used for transferring input tensor array + auto tensor_array = node.get_input(3); + + // check if producer of tensor_array is TensorArrayV3, internal operation, still + // if yes, try to replace it with constant container + if (as_type_ptr(tensor_array.get_node_shared_ptr()) && + value.get_partial_shape().rank().is_static()) { + // set tensor element rank that gets known from TensorArrayScatterV3 operation + auto tensor_array_v3 = as_type_ptr(tensor_array.get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + value.get_partial_shape().rank().get_length() > 0, + "[TensorFlow Frontend] internal error or inconsistent model: value to TensorArrayScatterV3 is a scalar"); + int64_t tensor_element_rank = value.get_partial_shape().rank().get_length() - 1; + tensor_array_v3->set_element_rank(tensor_element_rank); + } + + // compute element shape (shape of a tensor in the tensor array) using value + auto element_shape = make_shared(value, element::i32)->output(0); + auto one_const = make_shared(element::i32, Shape{1}, 1); + auto max_const = make_shared(element::i32, Shape{1}, numeric_limits::max()); + element_shape = make_shared(element_shape, one_const, max_const, one_const); + + // compute size of tensor array + auto tensor_array_size = make_shared(tensor_array, element::i32)->output(0); + auto zero_const = make_shared(element::i32, Shape{1}, 0); + tensor_array_size = make_shared(tensor_array_size, zero_const, zero_const); + + // compute the new shape for tensor array where new tensors will be inserted + auto new_shape = make_shared(OutputVector{tensor_array_size, element_shape}, 0); + tensor_array = make_shared(tensor_array, new_shape); + + // adjust indices for ScatterNDUpdate to have a shape [N, 1] where N is a number of indices + indices = make_shared(indices, one_const); + + // compute updated tensor array using ScatterNDUpdate + // value should be of a shape [N, ] + auto updated_tensor_array = make_shared(tensor_array, indices, value); + set_node_name(node.get_name(), updated_tensor_array); + + // TensorArrayScatterV3 has just one output flow_out + // that is used for transferring updated tensor array + return {updated_tensor_array}; +} + +OutputVector translate_tensor_array_read_v3_op(const NodeContext& node) { + // TensorArrayReadV3 read an element from the TensorArray into the output + // and it has three inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) index, a Tensor of type int32. The location from which to read the value + // 2) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) read value from tensor array + default_op_checks(node, 3, {"TensorArrayReadV3"}); + auto index = node.get_input(1); + // flow_in is used for transferring input tensor array + auto tensor_array = node.get_input(2); + auto dtype = node.get_attribute("dtype"); + + // adjust the index to a scalar for using Gather operation + auto new_shape = make_shared(element::i32, Shape{0}, vector{}); + index = make_shared(index, new_shape, false); + + // gather tensor element by the required position + auto gather_axis = make_shared(element::i32, Shape{1}, 0); + Output tensor_element = make_shared(tensor_array, index, gather_axis); + tensor_element = make_shared(tensor_element, dtype); + + set_node_name(node.get_name(), tensor_element.get_node_shared_ptr()); + return {tensor_element}; +} + +OutputVector translate_tensor_array_close_v3_op(const NodeContext& node) { + // TensorArrayCloseV3 deletes the TensorArray from its resource container + // it outputs nothing + default_op_checks(node, 1, {"TensorArrayCloseV3"}); + return {}; +} + +OutputVector translate_tensor_array_size_v3_op(const NodeContext& node) { + // TensorArraySizeV3 gets the current size of the TensorArray + // it outputs int32 scalar equal to a size of the tensor array + default_op_checks(node, 2, {"TensorArraySizeV3"}); + // skip the handle by the first input + auto tensor_array = node.get_input(1); + + auto size = make_shared(tensor_array, element::i32)->output(0); + auto zero_const = make_shared(element::i32, Shape{1}, 0); + size = make_shared(size, zero_const, zero_const); + + // size must be scalar + auto scalar_shape = make_shared(element::i32, Shape{0}, vector{}); + size = make_shared(size, scalar_shape, false); + + set_node_name(node.get_name(), size.get_node_shared_ptr()); + return {size}; +} + +OutputVector translate_tensor_array_gather_v3_op(const NodeContext& node) { + // TensorArrayGatherV3 gathers specific elements from the TensorArray into output + // and it has three inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) indices, a Tensor of type int32. The location from which to read tensor elements + // 2) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) value with read tensor elements + // it outputs int32 scalar equal to a size of the tensor array + default_op_checks(node, 3, {"TensorArrayGatherV3"}); + // skip the handle by the first input + auto indices = node.get_input(1); + // flow_in serves for transferring tensor array + // handle input is ignored + auto tensor_array = node.get_input(2); + auto dtype = node.get_attribute("dtype"); + auto element_shape = node.get_attribute("element_shape", PartialShape::dynamic()); + + // gather tensor element by the required position + auto gather_axis = make_shared(element::i32, Shape{1}, 0); + Output tensor_element = make_shared(tensor_array, indices, gather_axis); + tensor_element = make_shared(tensor_element, dtype); + + // concretize tensor_element shape if this is specified + if (tensor_element.get_partial_shape().rank().is_dynamic() && element_shape.is_static()) { + auto element_shape_value = element_shape.get_shape(); + auto element_shape_const = + make_shared(element::i32, Shape{element_shape_value.size()}, element_shape_value); + auto size = make_shared(tensor_array, element::i32)->output(0); + auto zero_const = make_shared(element::i32, Shape{1}, 0); + size = make_shared(size, zero_const, zero_const); + auto new_shape = make_shared(OutputVector{size, element_shape_const}, 0); + tensor_element = make_shared(tensor_element, new_shape, false); + } + + set_node_name(node.get_name(), tensor_element.get_node_shared_ptr()); + return {tensor_element}; +} + +OutputVector translate_tensor_array_concat_v3_op(const NodeContext& node) { + // TensorArrayConcatV3 Concat the elements from the TensorArray into value + // and it has two inputs: + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) concatenated value by the first dimension + default_op_checks(node, 2, {"TensorArrayConcatV3"}); + // flow_in serves for transferring tensor array + // handle input is ignored + auto tensor_array = node.get_input(1); + auto dtype = node.get_attribute("dtype"); + + // since tensor array saves tensor elements in the concatenated form by the first dimension + // and for this operation they should be concatenated by the first dimension of the tensor element + // it needs to combine the first two dimensions + // tensor array is of shape [k, n0, n1, ..., nd] + // 1. compute element shape excluding the first dimension + auto zero_const = make_shared(element::i32, Shape{1}, 0); + auto one_const = make_shared(element::i32, Shape{1}, 1); + auto two_const = make_shared(element::i32, Shape{1}, 2); + auto max_const = make_shared(element::i32, Shape{1}, numeric_limits::max()); + auto tensor_array_shape = make_shared(tensor_array, element::i64); + auto element_shape_no_two_dims = make_shared(tensor_array_shape, two_const, max_const, one_const); + // 2. compute the first and second dimensions k and n0 + auto k = make_shared(tensor_array_shape, zero_const, zero_const); + auto n0 = make_shared(tensor_array_shape, one_const, zero_const); + auto k_by_n0 = make_shared(k, n0); + // 3. compute the first output containing concatenated tensor elements + // it folds the first and second dimensions + auto new_shape = make_shared(OutputVector{k_by_n0, element_shape_no_two_dims}, 0); + auto concatenated_array = make_shared(tensor_array, new_shape, false)->output(0); + concatenated_array = make_shared(concatenated_array, dtype); + concatenated_array.set_names({node.get_name() + ":0"}); + // 4. compute the second output with length of each tensor element for the concatenation + auto lengths = make_shared(n0, k)->output(0); + lengths.set_names({node.get_name() + ":1"}); + + return {concatenated_array, lengths}; +} + +OutputVector translate_tensor_array_write_v3_op(const NodeContext& node) { + // TensorArrayWriteV3 pushes an element onto the tensor_array. + // and it has four inputs + // 0) handle, a Tensor of type resource. The handle to a TensorArray. + // 1) index, a Tensor of type int32. The location where to write tensor element + // 2) value, a Tensor. The tensor to write at the specified location + // 3) flow_in A Tensor of type float32. A float scalar that enforces proper chaining of operations. + // The operation has one output + // 0) read value from tensor array + default_op_checks(node, 4, {"TensorArrayWriteV3"}); + auto handle = node.get_input(0); + auto index = node.get_input(1); + auto value = node.get_input(2); + // flow_in is used for transferring input tensor array + // tensor array has a rank equal to 1 + rank(element of tensor array) + // if it just initialized, its shape is equal to [tensor_array_size, 1, ..., 1] + // otherwise, it is equal to [tensor_array_size, ] + auto tensor_array = node.get_input(3); + + // reshape index to have it of [1] shape + auto new_index_shape = make_shared(element::i32, Shape{1}, 1); + index = make_shared(index, new_index_shape, false); + + if (auto enter = as_type_ptr(handle.get_node_shared_ptr())) { + if (as_type_ptr(enter->input_value(0).get_node_shared_ptr()) && + value.get_partial_shape().rank().is_static()) { + // set tensor element rank that gets known from TensorArrayWriteV3 operation + auto tensor_array_v3 = as_type_ptr(enter->input_value(0).get_node_shared_ptr()); + int64_t tensor_element_rank = value.get_partial_shape().rank().get_length(); + tensor_array_v3->set_element_rank(tensor_element_rank); + } + } + + // compute element shape in the input tensor array + auto tensor_array_shape = make_shared(tensor_array, element::i32); + + // compute the current size of tensor array + auto zero_const = make_shared(element::i32, Shape{1}, 0); + auto tensor_array_size = make_shared(tensor_array_shape, zero_const, zero_const); + + // adjust tensor array to have the correct shape [size, ] before value insertion + auto element_shape = make_shared(value, element::i32); + auto new_tensor_array_shape = make_shared(OutputVector{tensor_array_size, element_shape}, 0); + tensor_array = make_shared(tensor_array, new_tensor_array_shape); + + // update the resulted tensor using ScatterUpdate + value = make_shared(value, zero_const); + auto scatter_update = make_shared(tensor_array, index, value, zero_const); + + set_node_name(node.get_name(), scatter_update); + // use flow_out for transferring updated tensor array + return {scatter_update}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 149b2d76184497..3a4c570c6576fb 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -46,6 +46,14 @@ TF_OP_CONVERTER(translate_sparse_segment_sum_op); TF_OP_CONVERTER(translate_staticregexfullmatch_op); TF_OP_CONVERTER(translate_stringjoin_op); TF_OP_CONVERTER(translate_switch_op); +TF_OP_CONVERTER(translate_tensor_array_close_v3_op); +TF_OP_CONVERTER(translate_tensor_array_concat_v3_op); +TF_OP_CONVERTER(translate_tensor_array_gather_v3_op); +TF_OP_CONVERTER(translate_tensor_array_read_v3_op); +TF_OP_CONVERTER(translate_tensor_array_scatter_v3_op); +TF_OP_CONVERTER(translate_tensor_array_size_v3_op); +TF_OP_CONVERTER(translate_tensor_array_v3_op); +TF_OP_CONVERTER(translate_tensor_array_write_v3_op); TF_OP_CONVERTER(translate_varhandle_op); TF_OP_CONVERTER(translate_variable_op); TF_OP_CONVERTER(translate_varisinitialized_op); @@ -174,6 +182,8 @@ const std::map get_supported_ops() { {"Gather", CreatorFunction(translate_gather_op)}, {"GatherV2", CreatorFunction(translate_gather_v2_op)}, {"GatherNd", CreatorFunction(translate_gather_nd_op)}, + {"GatherTree", CreatorFunction(translate_gather_tree_op)}, + {"Addons>GatherTree", CreatorFunction(translate_gather_tree_op)}, {"HashTable", CreatorFunction(translate_hash_table_op)}, {"HashTableV2", CreatorFunction(translate_hash_table_op)}, {"Identity", CreatorFunction(translate_identity_op)}, @@ -269,6 +279,14 @@ const std::map get_supported_ops() { {"StatelessWhile", CreatorFunction(translate_while_op)}, {"StridedSlice", CreatorFunction(translate_strided_slice_op)}, {"Switch", CreatorFunction(translate_switch_op)}, + {"TensorArrayCloseV3", CreatorFunction(translate_tensor_array_close_v3_op)}, + {"TensorArrayConcatV3", CreatorFunction(translate_tensor_array_concat_v3_op)}, + {"TensorArrayGatherV3", CreatorFunction(translate_tensor_array_gather_v3_op)}, + {"TensorArrayReadV3", CreatorFunction(translate_tensor_array_read_v3_op)}, + {"TensorArrayScatterV3", CreatorFunction(translate_tensor_array_scatter_v3_op)}, + {"TensorArraySizeV3", CreatorFunction(translate_tensor_array_size_v3_op)}, + {"TensorArrayV3", CreatorFunction(translate_tensor_array_v3_op)}, + {"TensorArrayWriteV3", CreatorFunction(translate_tensor_array_write_v3_op)}, {"TensorListFromTensor", CreatorFunction(translate_tensor_list_from_tensor_op)}, {"TensorListGetItem", CreatorFunction(translate_tensor_list_get_item_op)}, {"TensorListLength", CreatorFunction(translate_tensor_list_length_op)}, diff --git a/src/frontends/tensorflow/src/tf_utils.cpp b/src/frontends/tensorflow/src/tf_utils.cpp index c72e8e7bb9080a..e298f49f92889f 100644 --- a/src/frontends/tensorflow/src/tf_utils.cpp +++ b/src/frontends/tensorflow/src/tf_utils.cpp @@ -423,7 +423,7 @@ shared_ptr create_loop_for_tf_while(const std::string& while_node_name FRONT_END_GENERAL_CHECK( cond_results.size() == 1 && cond_results[0], "[TensorFlow Frontend] Internal error or inconsistent model: condition body must contain one Result node."); - auto body_condition_output_idx = static_cast(body_results.size()); + auto body_condition_output_idx = body_results.size(); body_model->add_results(cond_results); // type setting for body graph parameters is needed for TensorList support since DT_VARIANT type is present @@ -435,14 +435,18 @@ shared_ptr create_loop_for_tf_while(const std::string& while_node_name loop->set_function(body_model); // body_results may contain less nodes than body_params that means back edge exists not for all body_params - for (size_t input_ind = 0; input_ind < static_cast(body_condition_output_idx); ++input_ind) { + for (size_t input_ind = 0; input_ind < body_condition_output_idx; ++input_ind) { loop->set_merged_input(body_params[input_ind], ov_inputs[input_ind], body_results[input_ind]->input_value(0)); } - loop->set_special_body_ports({-1, body_condition_output_idx}); + loop->set_special_body_ports({-1, static_cast(body_condition_output_idx)}); + // set invariant inputs for the loop + for (size_t input_ind = body_condition_output_idx; input_ind < input_size; ++input_ind) { + loop->set_invariant_input(body_params[input_ind], ov_inputs[input_ind]); + } // set external outputs for Loop node // do not get execution condition outside of the Loop node - for (size_t output_ind = 0; output_ind < static_cast(body_condition_output_idx); ++output_ind) { + for (size_t output_ind = 0; output_ind < body_condition_output_idx; ++output_ind) { loop->get_iter_value(body_results[output_ind]); } loop->validate_and_infer_types(); diff --git a/src/frontends/tensorflow/tests/convert_model.cpp b/src/frontends/tensorflow/tests/convert_model.cpp index fc00a6784963e3..f6ec18cf9cc12c 100644 --- a/src/frontends/tensorflow/tests/convert_model.cpp +++ b/src/frontends/tensorflow/tests/convert_model.cpp @@ -15,7 +15,8 @@ static const std::vector models{ std::string("2in_2out/2in_2out.pb"), std::string("forward_edge_model/forward_edge_model.pbtxt"), std::string("forward_edge_model2/forward_edge_model2.pbtxt"), - std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pbtxt")}; + std::string("concat_with_non_constant_axis/concat_with_non_constant_axis.pbtxt"), + std::string("gather_tree_model/gather_tree_model.pbtxt")}; INSTANTIATE_TEST_SUITE_P(TFConvertModelTest, FrontEndConvertModelTest, diff --git a/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt b/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt new file mode 100644 index 00000000000000..54351036dd72a2 --- /dev/null +++ b/src/frontends/tensorflow/tests/test_models/models_pbtxt/gather_tree_model.pbtxt @@ -0,0 +1,103 @@ +node { + name: "step_ids" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 20 + } + dim { + size: 2 + } + dim { + size: 30 + } + } + } + } +} +node { + name: "parent_ids" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 20 + } + dim { + size: 2 + } + dim { + size: 30 + } + } + } + } +} +node { + name: "max_seq_len" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } +} +node { + name: "end_token" + op: "Placeholder" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "shape" + value { + shape { + } + } + } +} +node { + name: "Addons>GatherTree" + op: "Addons>GatherTree" + input: "step_ids" + input: "parent_ids" + input: "max_seq_len" + input: "end_token" + attr { + key: "T" + value { + type: DT_INT32 + } + } +} diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 75a9bdcafc91ee..29efb83547d263 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -72,6 +72,7 @@ OP_CONVERTER_NAMED(translate_fused_batch_norm_op); OP_CONVERTER(translate_gather_op); OP_CONVERTER(translate_gather_v2_op); OP_CONVERTER(translate_gather_nd_op); +OP_CONVERTER(translate_gather_tree_op); OP_CONVERTER(translate_identity_op); OP_CONVERTER(translate_identity_n_op); OP_CONVERTER(translate_input_arg_op); diff --git a/src/frontends/tensorflow_common/include/helper_ops/merge.hpp b/src/frontends/tensorflow_common/include/helper_ops/merge.hpp index eb7e611f3e21f0..6261dd0e67c229 100644 --- a/src/frontends/tensorflow_common/include/helper_ops/merge.hpp +++ b/src/frontends/tensorflow_common/include/helper_ops/merge.hpp @@ -33,20 +33,34 @@ class Merge : public InternalOperation { ov::PartialShape output_data_shape = ov::PartialShape::dynamic(); auto input_size = get_input_size(); - bool merge_output_shape = true; for (size_t input_ind = 0; input_ind < input_size; ++input_ind) { auto input_type = get_input_element_type(input_ind); if (input_type.is_static()) { output_data_type = input_type; } - // check if it still needs to merge input shapes - // if yes, it tries to merge them - if (merge_output_shape && - !PartialShape::merge_into(output_data_shape, get_input_partial_shape(input_ind))) { - merge_output_shape = false; - // reset output shape to dynamic rank + auto input_shape = get_input_partial_shape(input_ind); + if (input_shape.rank().is_dynamic()) { + continue; + } + + if (output_data_shape.rank().is_dynamic()) { + // firstly met shape of static rank + // immediately use this shape of static rank + output_data_shape = input_shape; + } else if (output_data_shape.rank().is_static() && + output_data_shape.rank().get_length() != input_shape.rank().get_length()) { + // different inputs have different rank means output must be of a dynamic rank output_data_shape = ov::PartialShape::dynamic(); + break; + } else { + auto output_rank = output_data_shape.rank().get_length(); + for (int64_t dim_ind = 0; dim_ind < output_rank; ++dim_ind) { + if (input_shape[dim_ind] != output_data_shape[dim_ind]) { + // different inputs can have different dimensions so it must combine them + output_data_shape[dim_ind] = ov::Dimension::dynamic(); + } + } } } diff --git a/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp b/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp index eb262b4307af7f..e556c9ad4478da 100644 --- a/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp +++ b/src/frontends/tensorflow_common/include/helper_ops/next_iteration.hpp @@ -43,6 +43,10 @@ class NextIteration : public InternalOperation { producer_output_port_idx = m_producer_output_port_idx; } + void set_output_shape_and_type(const ov::PartialShape& output_shape, const ov::element::Type& output_type) { + set_output_type(0, output_type, output_shape); + } + private: bool m_back_edge_set; std::string m_producer_name; diff --git a/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp b/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp new file mode 100644 index 00000000000000..030ff12d5b68c5 --- /dev/null +++ b/src/frontends/tensorflow_common/include/helper_ops/tensor_array.hpp @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "internal_operation.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow { + +// Internal operation for TensorArrayV3 +// An array of Tensors of given size +// It has two outputs: +// 1. handle - resource (a reference) for tensor array +// 2. flow_out - float type will be used for storing tensor array +class TensorArrayV3 : public InternalOperation { +public: + OPENVINO_OP("TensorArrayV3", "ov::frontend::tensorflow", InternalOperation); + + TensorArrayV3(const Output& size, + const ov::element::Type element_type, + const std::shared_ptr& decoder = std::make_shared()) + : InternalOperation(decoder, OutputVector{size}, 2, "TensorArrayV3"), + m_element_type(element_type), + m_element_rank(-1) { + validate_and_infer_types(); + } + + void validate_and_infer_types() override { + set_output_type(0, m_element_type, ov::PartialShape::dynamic()); + set_output_type(1, m_element_type, ov::PartialShape::dynamic()); + } + + ov::element::Type get_element_type() const { + return m_element_type; + } + + int64_t get_element_rank() const { + return m_element_rank; + } + + void set_element_rank(int64_t element_rank) { + FRONT_END_GENERAL_CHECK( + element_rank >= 0, + "[TensorFlow Frontend] internal error: negavite element rank tries to set for TensorArrayV3"); + m_element_rank = element_rank; + } + +private: + ov::element::Type m_element_type; + int64_t m_element_rank; +}; + +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp b/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp new file mode 100644 index 00000000000000..42e5a0ad754ea7 --- /dev/null +++ b/src/frontends/tensorflow_common/include/helper_transforms/tensor_array_v3_replacer.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace pass { + +// This transformation replaces internal operation TensorArrayV3 with a Constant +// that simulates initial state of tensor array container +class TensorArrayV3Replacer : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::tensorflow::pass::TensorArrayV3Replacer"); + TensorArrayV3Replacer(); +}; + +} // namespace pass +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp new file mode 100644 index 00000000000000..72ed922511cd98 --- /dev/null +++ b/src/frontends/tensorflow_common/src/helper_transforms/tensor_array_v3_replacer.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "helper_transforms/tensor_array_v3_replacer.hpp" + +#include "helper_ops/tensor_array.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; +using namespace ov::pass; + +ov::frontend::tensorflow::pass::TensorArrayV3Replacer::TensorArrayV3Replacer() { + auto tensor_array_v3 = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + NodeRegistry rg; + + auto tensor_array_v3 = dynamic_pointer_cast(m.get_match_root()); + if (!tensor_array_v3) { + return false; + } + + int32_t tensor_element_rank = static_cast(tensor_array_v3->get_element_rank()); + if (tensor_element_rank < 0) { + return false; + } + + // retrieve all TensorArrayV3 inputs + auto size = tensor_array_v3->input_value(0); + auto element_type = tensor_array_v3->get_element_type(); + + // adjust size to have it of shape [1] for further concatenation with element shape + auto new_size_shape = rg.make(element::i32, Shape{1}, 1); + auto new_size = rg.make(size, new_size_shape, false); + + // create a vector of size element_shape.rank() with ones + // and compute a shape of initial tensor array [size, 1, ..., 1] + Output target_shape; + if (tensor_element_rank == 0) { + target_shape = new_size->output(0); + } else { + vector ones(tensor_element_rank, 1); + auto ones_const = rg.make(element::i32, Shape{ones.size()}, ones); + target_shape = rg.make(OutputVector{new_size, ones_const}, 0)->output(0); + } + + // create initial tensor array + auto scalar_value = make_shared(element_type, Shape{}, vector{0}); + auto initial_tensor_array = make_shared(scalar_value, target_shape); + + // preserve names of the node and the output tensor + initial_tensor_array->set_friendly_name(tensor_array_v3->get_friendly_name()); + copy_runtime_info(tensor_array_v3, rg.get()); + + ov::replace_node(tensor_array_v3, + ov::OutputVector{initial_tensor_array->output(0), initial_tensor_array->output(0)}); + return true; + }; + + auto m = + std::make_shared(tensor_array_v3, "ov::frontend::tensorflow::pass::TensorArrayV3Replacer"); + register_matcher(m, callback); +} diff --git a/src/frontends/tensorflow_common/src/op/gather_tree.cpp b/src/frontends/tensorflow_common/src/op/gather_tree.cpp new file mode 100644 index 00000000000000..e349efe6784e64 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/gather_tree.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/gather_tree.hpp" + +#include "common_op_table.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_gather_tree_op(const NodeContext& node) { + default_op_checks(node, 4, {"GatherTree", "Addons>GatherTree"}); + auto step_ids = node.get_input(0); + auto parent_ids = node.get_input(1); + auto max_sequence_lengths = node.get_input(2); + auto end_token = node.get_input(3); + + // adjust end_token that must be a scalar + auto new_shape_end_token = make_shared(element::i32, Shape{0}, vector{}); + end_token = make_shared(end_token, new_shape_end_token, false); + + auto gather_tree = make_shared(step_ids, parent_ids, max_sequence_lengths, end_token); + set_node_name(node.get_name(), gather_tree); + + return {gather_tree}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index b38ae2fde7e7db..b9b7345b37f493 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -513,7 +513,7 @@ void TensorIterator::createPrimitive() { lastUsedCond = initial_cond_check->getStatus(); } - if (isDynamicNode()) + if (runAsDynamic()) prepareDynamicBuffers(); Node::createPrimitive(); @@ -556,7 +556,7 @@ void TensorIterator::prepareParams() { prepareContinueCond(); prepareLoopBodyCurrentIteration(); - if (!isDynamicNode()) { + if (!runAsDynamic()) { prepareOutputPorts(); prepareBackEdges(); } @@ -568,6 +568,12 @@ void TensorIterator::prepareParams() { } void TensorIterator::execute(dnnl::stream strm) { + //Special case, the subgraph is dynamic while the node has all static shapes + if (runAsDynamic()) { + executeDynamicImpl(strm); + return; + } + sub_graph.ResetInferCount(); bool continue_cond = initial_cond_check->getStatus(); @@ -872,6 +878,10 @@ int TensorIterator::getNumIteration(const std::vector& inputPortMap, co return numIterations; } +bool TensorIterator::runAsDynamic() const { + return isDynamicNode() || Graph::Status::ReadyDynamic == sub_graph.getStatus(); +} + bool TensorIterator::created() const { return getType() == Type::TensorIterator; } diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.h b/src/plugins/intel_cpu/src/nodes/tensoriterator.h index 8633be5c28df61..104ee077f9a163 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.h +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.h @@ -138,6 +138,7 @@ class TensorIterator : public Node { void reshapeAndFillOutput(dnnl::stream strm); bool checkForInputAndBodyShapesInequality() const; int getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const; + bool runAsDynamic() const; ExtensionManager::Ptr ext_mng; Graph sub_graph; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp index b92646e458130e..cda499b042fb4e 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/loop.cpp @@ -371,6 +371,65 @@ class LoopForConcatLayerCPUTest : public LoopLayerCPUTest { } }; +class StaticLoopDynamicSubgraphCPUTest : public SubgraphBaseTest { + void SetUp() override { + InputShape input_shape = {{25, 1, 1}, {{25, 1, 1}}}; + InputShape input_exec_flag_shape = {{1}, {{1}}}; + targetDevice = ov::test::utils::DEVICE_CPU; + ElementType netType = ov::element::f32; + init_input_shapes({input_shape, input_exec_flag_shape}); + + ov::ParameterVector params; + params.push_back(std::make_shared(netType, inputDynamicShapes[0])); + + // exec_condition + params.push_back(std::make_shared(ov::element::boolean, inputDynamicShapes[1])); + + auto trip_count_input = std::make_shared(ov::element::i64, ov::Shape{1}, 2); + auto body_condition_const = std::make_shared(ov::element::boolean, ov::Shape{1}, true); + + // Body parameters + ov::ParameterVector body_params = {std::make_shared(netType, ov::PartialShape{25, 1, -1})}; + + // Body + auto broadcast_target_shape = std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{25, 1, 256}); + auto broadcast_axis_mapping = std::make_shared(ov::element::i64, ov::Shape{1}, 0); + auto broadcast = std::make_shared(body_params[0], broadcast_target_shape); + auto body = std::make_shared(ov::OutputVector{body_condition_const, broadcast}, body_params); + + auto loop = std::make_shared(trip_count_input, params[1]); + loop->set_function(body); + loop->set_special_body_ports(ov::op::v5::Loop::SpecialBodyPorts{-1, 0}); + + loop->set_merged_input(body_params.front(), params.front(), broadcast); + + auto out0 = loop->get_iter_value(body_condition_const, -1); + auto out1 = loop->get_iter_value(broadcast, -1); + + auto result0 = std::make_shared(out0); + auto result1 = std::make_shared(out1); + function = std::make_shared(ov::ResultVector{result0, result1}, params, "loop"); + } + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + + if (i == 1) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto* dataPtr = tensor.data(); + *dataPtr = true; + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } +}; + + TEST_P(LoopLayerCPUTest, CompareWithRefs) { run(); } @@ -387,6 +446,10 @@ TEST_P(LoopForConcatLayerCPUTest, CompareWithRefs) { run(); } +TEST_F(StaticLoopDynamicSubgraphCPUTest, smoke_StaticLoopWithDynSubgraph) { + run(); +} + namespace { const std::vector inputPrecisions = { diff --git a/tests/layer_tests/common/utils/tf_utils.py b/tests/layer_tests/common/utils/tf_utils.py index fb02c3f0a1b298..913048acf2e762 100644 --- a/tests/layer_tests/common/utils/tf_utils.py +++ b/tests/layer_tests/common/utils/tf_utils.py @@ -98,7 +98,7 @@ def summarize_graph(model_path, output_nodes_for_freeze=None, reshape_net=None): variables = list() outputs = list() graph = load_graph(model_path, output_nodes_for_freeze) - unlikely_output_types = ['Const', 'Assign', 'NoOp', 'Placeholder', 'Assert', 'switch_t', 'switch_f'] + unlikely_output_types = ['Const', 'Assign', 'NoOp', 'Placeholder', 'Assert', 'switch_t', 'switch_f', 'TensorArrayCloseV3'] control_dependents_map = collect_control_dependencies(graph) for node in graph.as_graph_def().node: if node.op == 'Placeholder': diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py b/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py new file mode 100644 index 00000000000000..098f099f74d24d --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_TensorArrayOps.py @@ -0,0 +1,200 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +def create_tensor_array(data_shape, data_type): + size = data_shape[0] + data = tf.compat.v1.placeholder(data_type, data_shape, 'data') + indices = tf.compat.v1.placeholder(tf.int32, [size], 'indices') + size_const = tf.constant(size, dtype=tf.int32, shape=[]) + handle, flow = tf.raw_ops.TensorArrayV3(size=size_const, dtype=tf.as_dtype(data_type)) + flow = tf.raw_ops.TensorArrayScatterV3(handle=handle, indices=indices, value=data, flow_in=flow) + return handle, flow + + +class TestTensorArraySizeV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + data_shape = inputs_info['data'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['indices'] = rng.permutation(self.size).astype(np.int32) + return inputs_data + + def create_tensor_array_size_v3(self, data_shape, data_type): + size = data_shape[0] + self.data_type = data_type + self.size = size + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + handle, flow = create_tensor_array(data_shape, data_type) + tf.raw_ops.TensorArraySizeV3(handle=handle, flow_in=flow) + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(data_shape=[5], data_type=np.float32), + dict(data_shape=[10, 20, 30], data_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_size_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_size_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestTensorArrayReadV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + data_shape = inputs_info['data'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['index_to_read'] = rng.integers(0, data_shape[0], []).astype(np.int32) + inputs_data['indices'] = rng.permutation(self.size).astype(np.int32) + return inputs_data + + def create_tensor_array_read_v3(self, data_shape, data_type): + size = data_shape[0] + self.data_type = data_type + self.size = size + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + handle, flow = create_tensor_array(data_shape, data_type) + index_to_read = tf.compat.v1.placeholder(tf.int32, [], 'index_to_read') + tf.raw_ops.TensorArrayReadV3(handle=handle, index=index_to_read, flow_in=flow, + dtype=tf.dtypes.as_dtype(data_type)) + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(data_shape=[6], data_type=np.float32), + dict(data_shape=[8, 5, 6, 10], data_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_read_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_read_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestTensorArrayWriteGatherV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + assert 'value_to_write' in inputs_info + data_shape = inputs_info['data'] + value_shape = inputs_info['value_to_write'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['value_to_write'] = rng.integers(-10, 10, value_shape).astype(self.data_type) + indices_data = rng.permutation(self.size).astype(np.int32) + inputs_data['indices'] = np.delete(indices_data, np.where(indices_data == self.index_to_write)) + return inputs_data + + def create_tensor_array_write_v3(self, size, data_shape, data_type, index_to_write, indices_to_gather): + self.data_type = data_type + self.size = size + self.index_to_write = index_to_write + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + value_to_write = tf.compat.v1.placeholder(data_type, data_shape[1:], 'value_to_write') + index_to_write_const = tf.constant(index_to_write, dtype=tf.int32, shape=[]) + indices_to_gather_const = tf.constant(indices_to_gather, dtype=tf.int32, shape=[len(indices_to_gather)]) + data = tf.compat.v1.placeholder(data_type, data_shape, 'data') + indices = tf.compat.v1.placeholder(tf.int32, [size - 1], 'indices') + size_const = tf.constant(size, dtype=tf.int32, shape=[]) + handle, flow = tf.raw_ops.TensorArrayV3(size=size_const, dtype=tf.as_dtype(data_type)) + flow = tf.raw_ops.TensorArrayScatterV3(handle=handle, indices=indices, value=data, flow_in=flow) + flow = tf.raw_ops.TensorArrayWriteV3(handle=handle, index=index_to_write_const, + value=value_to_write, flow_in=flow) + tf.raw_ops.TensorArrayGatherV3(handle=handle, indices=indices_to_gather_const, flow_in=flow, + dtype=tf.dtypes.as_dtype(data_type)) + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(size=7, data_shape=[6], data_type=np.float32, index_to_write=3, indices_to_gather=[0, 3, 1]), + dict(size=10, data_shape=[9, 2, 4], data_type=np.int32, index_to_write=2, indices_to_gather=[2, 1, 4, 3]), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_write_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_write_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestTensorArrayConcatV3(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'data' in inputs_info + assert 'indices' in inputs_info + data_shape = inputs_info['data'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['data'] = rng.integers(-10, 10, data_shape).astype(self.data_type) + inputs_data['indices'] = rng.permutation(self.size).astype(np.int32) + return inputs_data + + def create_tensor_array_concat_v3(self, data_shape, data_type): + size = data_shape[0] + self.data_type = data_type + self.size = size + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + handle, flow = create_tensor_array(data_shape, data_type) + tensor_array_concat_v3 = tf.raw_ops.TensorArrayConcatV3(handle=handle, flow_in=flow, + dtype=tf.as_dtype(data_type)) + tf.identity(tensor_array_concat_v3[0], name='values') + tf.identity(tensor_array_concat_v3[1], name='length') + tf.raw_ops.TensorArrayCloseV3(handle=handle) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(data_shape=[5, 3, 11, 2], data_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_tensor_array_concat_v3(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tensor_array_concat_v3(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py index ae6c39a144b0a3..9d683f4b6ac977 100644 --- a/tools/mo/openvino/tools/mo/convert_impl.py +++ b/tools/mo/openvino/tools/mo/convert_impl.py @@ -312,8 +312,8 @@ def update_fallback_with_conversion_error(use_new_frontend: bool, is_tf: bool, e conversion_error_re = r"^(\[TensorFlow\ Frontend\]\ Internal\ error\,\ no\ translator\ found\ for\ operation\(s\)\:\ )((\w+)(\,\ \w+)*)$" conversion_error_match = re.findall(conversion_error_re, ex_msg, re.MULTILINE) all_fallback_operations = [ - # corresponds to TF1 TensorList operation - "TensorArrayScatterV3", "TensorArrayV3", "TensorArraySizeV3", "TensorArrayGatherV3", + # corresponds to TF1 While operation + "LoopCond", "Enter", "NextIteration", "Exit", "Switch", "Merge", # corresponds to operations with complex tensors "FFT", "FFT2D", "FFT3D", "IFFT", "IFFT2D", "IFFT3D", "RFFT", "RFFT2D", "RFFT3D", "IRFFT", "IRFFT2D", "IRFFT3D", diff --git a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py index 8d905d8f13129d..26ea01b77d6722 100644 --- a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py +++ b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py @@ -235,17 +235,13 @@ def test_freeze_placeholder_with_unknown_rank(self, inputs, inputs_data, expecte freeze_placeholder_with_value, input_shape, only_conversion, True) - def test_conversion_failure_fallback_default(self): + def test_conversion_tf1_while_default(self): self.basic("ctc_model_based.pbtxt", None, None, None, None, None, None, True, True, False, False) - @unittest.skipIf(platform == 'darwin', reason="Ticket - 122182") - def test_conversion_failure_fallback_use_new_frontend(self): - with self.assertRaisesRegex(Exception, - "\[TensorFlow Frontend\] Internal error, no translator found for operation\(s\)\: " - "TensorArrayGatherV3\, TensorArrayReadV3\, TensorArraySizeV3\, TensorArrayV3\, TensorArrayWriteV3"): - self.basic("ctc_model_based.pbtxt", None, None, None, None, - None, None, True, True, True, False) + def test_conversion_tf1_while_use_new_frontend(self): + self.basic("ctc_model_based.pbtxt", None, None, None, None, + None, None, True, True, True, False) @unittest.skip("88349: Fix auto-pruning in legacy FE") def test_conversion_model_oneshot_iterator_use_legacy_frontend(self): From 8d0381b0fee3ce4e68bc5705f293c2eb3bcea9d3 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 23 Oct 2023 22:54:08 +0200 Subject: [PATCH 008/275] [PT FE] Implement custom op for types alignment (#20431) * [PT FE] Implement custom op for types alignment * Fix code style * Fix inplace ops * Fix layer tests * Remove no longer needed change * Fix ovc tests * Fix fe tests --- .../openvino/frontend/pytorch/ts_decoder.py | 4 +- .../src/openvino/runtime/utils/types.py | 2 + .../openvino/op/util/framework_node.hpp | 2 +- src/frontends/pytorch/src/frontend.cpp | 4 ++ .../pytorch/src/helper_ops/align_types.hpp | 43 ++++++++++++ .../pytorch/src/helper_ops/internal_op.hpp | 56 ++++++++++++++++ src/frontends/pytorch/src/op/add.cpp | 28 ++++++-- src/frontends/pytorch/src/op/div.cpp | 19 +++++- src/frontends/pytorch/src/op/sub.cpp | 22 ++++++- src/frontends/pytorch/src/op_table.cpp | 13 ++-- .../pytorch/src/pt_framework_node.hpp | 9 ++- .../src/transforms/align_types_removal.cpp | 60 +++++++++++++++++ .../src/transforms/align_types_removal.hpp | 24 +++++++ .../transforms/string_equality_replacer.cpp | 12 +--- src/frontends/pytorch/src/utils.cpp | 65 +++++++++++-------- src/frontends/pytorch/src/utils.hpp | 31 +++++++-- .../test_mo_convert_pytorch.py | 25 ++++--- .../ovc_python_api_tests/test_pytorch.py | 25 ++++--- .../py_frontend_tests/test_torch_decoder.py | 4 +- .../py_frontend_tests/test_torch_frontend.py | 3 + .../pytorch_tests/pytorch_layer_test_class.py | 14 ++-- tests/layer_tests/pytorch_tests/test_add.py | 2 +- .../pytorch_tests/test_comparision.py | 2 +- .../test_deformable_convolution.py | 41 ++++-------- .../layer_tests/pytorch_tests/test_device.py | 6 +- tests/layer_tests/pytorch_tests/test_dict.py | 2 +- .../pytorch_tests/test_distance.py | 4 +- tests/layer_tests/pytorch_tests/test_div.py | 2 +- tests/layer_tests/pytorch_tests/test_empty.py | 4 +- tests/layer_tests/pytorch_tests/test_eq.py | 2 +- .../pytorch_tests/test_floor_divide.py | 2 +- tests/layer_tests/pytorch_tests/test_fp16.py | 4 +- tests/layer_tests/pytorch_tests/test_full.py | 12 ++-- .../layer_tests/pytorch_tests/test_getitem.py | 2 +- tests/layer_tests/pytorch_tests/test_if.py | 2 +- tests/layer_tests/pytorch_tests/test_index.py | 2 +- .../pytorch_tests/test_index_put_.py | 4 +- tests/layer_tests/pytorch_tests/test_len.py | 2 +- .../pytorch_tests/test_listunpack.py | 1 + tests/layer_tests/pytorch_tests/test_mul.py | 2 +- tests/layer_tests/pytorch_tests/test_or.py | 3 +- tests/layer_tests/pytorch_tests/test_pow.py | 2 +- .../pytorch_tests/test_remainder.py | 2 +- .../layer_tests/pytorch_tests/test_repeat.py | 2 +- tests/layer_tests/pytorch_tests/test_rsub.py | 8 +-- .../pytorch_tests/test_strided_const.py | 2 +- tests/layer_tests/pytorch_tests/test_sub.py | 2 +- .../pytorch_tests/test_transpose.py | 1 + .../pytorch_tests/test_tuple_construct.py | 8 +-- 49 files changed, 429 insertions(+), 164 deletions(-) create mode 100644 src/frontends/pytorch/src/helper_ops/align_types.hpp create mode 100644 src/frontends/pytorch/src/helper_ops/internal_op.hpp create mode 100644 src/frontends/pytorch/src/transforms/align_types_removal.cpp create mode 100644 src/frontends/pytorch/src/transforms/align_types_removal.hpp diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index f7a398bf67e519..a57393e7638d67 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -372,8 +372,8 @@ def input_is_none(self, index: int) -> bool: return False def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]: - # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that + if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::_convolution", "aten::matmul"]: + # AliasDB::may_contain_alias sometimes return True for tensors produced by convolution or matmul, we have to workaround that return False try: return self.alias_db.may_contain_alias(self._raw_input(in_index), self._raw_output(out_index)) diff --git a/src/bindings/python/src/openvino/runtime/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py index 5eeeb021a7c724..aa986d4f873c9c 100644 --- a/src/bindings/python/src/openvino/runtime/utils/types.py +++ b/src/bindings/python/src/openvino/runtime/utils/types.py @@ -23,6 +23,7 @@ openvino_to_numpy_types_map = [ (Type.boolean, bool), + (Type.boolean, np.bool_), (Type.f16, np.float16), (Type.f32, np.float32), (Type.f64, np.float64), @@ -39,6 +40,7 @@ openvino_to_numpy_types_str_map = [ ("boolean", bool), + ("boolean", np.bool_), ("f16", np.float16), ("f32", np.float32), ("f64", np.float64), diff --git a/src/core/include/openvino/op/util/framework_node.hpp b/src/core/include/openvino/op/util/framework_node.hpp index 57a6be7a3a34fb..f8a63b38b823e5 100644 --- a/src/core/include/openvino/op/util/framework_node.hpp +++ b/src/core/include/openvino/op/util/framework_node.hpp @@ -81,7 +81,7 @@ class OPENVINO_API FrameworkNode : public MultiSubGraphOp { explicit FrameworkNode(const OutputVector& inputs, size_t output_size = 1, size_t num_subgraphs = 0); - void validate_and_infer_types() override; + virtual void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 14c1094b3d098f..36d4027dcc426f 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -20,6 +20,7 @@ #include "transformations/op_conversions/convert_convertlike.hpp" #include "transformations/resolve_names_collisions.hpp" #include "transforms.hpp" +#include "transforms/align_types_removal.hpp" #include "transforms/append_list_unpack_replacer.hpp" #include "transforms/aten_cat_replacer.hpp" #include "transforms/aten_getitem_replacer.hpp" @@ -177,6 +178,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass( element::TypeVector{element::u8, element::i8, element::u4, element::i4}); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -204,6 +206,8 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + // Second pass of AlignTypesRemoval after all converting transformations + manager.register_pass(); manager.register_pass(); manager.run_passes(model); diff --git a/src/frontends/pytorch/src/helper_ops/align_types.hpp b/src/frontends/pytorch/src/helper_ops/align_types.hpp new file mode 100644 index 00000000000000..cd69af250fa30d --- /dev/null +++ b/src/frontends/pytorch/src/helper_ops/align_types.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "internal_op.hpp" +#include "openvino/frontend/decoder.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { + +class AlignTypes : public InternalOperation { +public: + AlignTypes(const Output& lhs, const Output& rhs, bool align_scalars) + : InternalOperation("ov::align_types", + {lhs, rhs}, + 2, + "This is internal operation for type alignment and should be removed " + "at normalization step. It can't be removed if types can't be resolved."), + m_align_scalars(align_scalars) { + validate_and_infer_types(); + } + + void validate_and_infer_types() override { + auto lhs = input_value(0); + auto rhs = input_value(1); + auto out_type = infer_types(lhs, rhs, m_align_scalars); + set_output_type(0, out_type, get_input_partial_shape(0)); + set_output_type(1, out_type, get_input_partial_shape(1)); + } + +private: + const bool m_align_scalars; +}; +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/helper_ops/internal_op.hpp b/src/frontends/pytorch/src/helper_ops/internal_op.hpp new file mode 100644 index 00000000000000..510654dce8620a --- /dev/null +++ b/src/frontends/pytorch/src/helper_ops/internal_op.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/frontend/decoder.hpp" +#include "pt_framework_node.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { + +class InternalOpDecoder : public DummyDecoder { +public: + explicit InternalOpDecoder(const std::string& op_type, const size_t num_outputs) + : m_op_type(op_type), + m_num_outputs(num_outputs) {} + const std::string& get_op_type() const override { + return m_op_type; + } + size_t num_of_outputs() const override { + return m_num_outputs; + } + size_t get_subgraph_size() const override { + return 0; + } + const std::string& decoder_type_name() const override { + return m_decoder_type; + } + +private: + const std::string m_op_type; + const std::string m_decoder_type = "internal_op"; + const size_t m_num_outputs; +}; + +class InternalOperation : public PtFrameworkNode { +protected: + InternalOperation(const std::string& op_type, + const OutputVector& inputs, + size_t num_outputs, + const std::string& no_conversion_reason) + : PtFrameworkNode(std::make_shared(op_type, num_outputs), inputs) { + auto attrs = get_attrs(); + attrs[PtFrameworkNode::failed_conversion_key] = no_conversion_reason; + set_attrs(attrs); + } +}; +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op/add.cpp b/src/frontends/pytorch/src/op/add.cpp index f0a997b6f8fa3a..33699ad90fa39c 100644 --- a/src/frontends/pytorch/src/op/add.cpp +++ b/src/frontends/pytorch/src/op/add.cpp @@ -15,7 +15,9 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_add(const NodeContext& context) { +using namespace ov::op; + +OutputVector translate_add_common(const NodeContext& context, bool inplace) { num_inputs_check(context, 2, 3); auto lhs = context.get_input(0); auto rhs = context.get_input(1); @@ -26,12 +28,28 @@ OutputVector translate_add(const NodeContext& context) { // Case when two lists gets concatenated FRONT_END_OP_CONVERSION_CHECK(false, "aten::add is used for concatenation of lists, not possible to convert"); } - align_eltwise_input_types(context, lhs, rhs, true); + if (inplace) { + if (lhs.get_element_type().is_dynamic() || lhs.get_element_type() != rhs.get_element_type()) + rhs = context.mark_node(std::make_shared(rhs, lhs)); + } else { + align_eltwise_input_types(context, lhs, rhs, true); + } if (!context.input_is_none(2)) { - auto converted_alpha = context.mark_node(std::make_shared(context.get_input(2), rhs)); - rhs = context.mark_node(std::make_shared(converted_alpha, rhs)); + auto converted_alpha = context.mark_node(std::make_shared(context.get_input(2), rhs)); + rhs = context.mark_node(std::make_shared(converted_alpha, rhs)); } - return {context.mark_node(std::make_shared(lhs, rhs))}; + auto add = context.mark_node(std::make_shared(lhs, rhs)); + if (inplace) + context.mutate_input(0, add); + return {add}; +}; + +OutputVector translate_add(const NodeContext& context) { + return translate_add_common(context, false); +}; + +OutputVector translate_add_(const NodeContext& context) { + return translate_add_common(context, true); }; } // namespace op diff --git a/src/frontends/pytorch/src/op/div.cpp b/src/frontends/pytorch/src/op/div.cpp index 7fb12ec253413a..dbbb6c89af7e6c 100644 --- a/src/frontends/pytorch/src/op/div.cpp +++ b/src/frontends/pytorch/src/op/div.cpp @@ -17,7 +17,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_div(const NodeContext& context) { +OutputVector translate_div_common(const NodeContext& context, bool inplace) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); @@ -34,7 +34,12 @@ OutputVector translate_div(const NodeContext& context) { y = context.mark_node(std::make_shared(y, element::f32)); } } - align_eltwise_input_types(context, x, y, true); + if (inplace) { + if (x.get_element_type().is_dynamic() || x.get_element_type() != y.get_element_type()) + y = context.mark_node(std::make_shared(x, y)); + } else { + align_eltwise_input_types(context, x, y, true); + } auto res = context.mark_node(std::make_shared(x, y, true)); // TODO: ticket 103296; Temporarily disable ConvertDivide transformation disable_divide_conversion(res); @@ -44,9 +49,19 @@ OutputVector translate_div(const NodeContext& context) { const auto convert = context.mark_node(std::make_shared(res, element::i32)); res = context.mark_node(std::make_shared(convert, x)); } + if (inplace) + context.mutate_input(0, res); return {res}; }; +OutputVector translate_div(const NodeContext& context) { + return translate_div_common(context, false); +}; + +OutputVector translate_div_(const NodeContext& context) { + return translate_div_common(context, true); +}; + } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op/sub.cpp b/src/frontends/pytorch/src/op/sub.cpp index 94963ed9bdb61f..62534aee53864b 100644 --- a/src/frontends/pytorch/src/op/sub.cpp +++ b/src/frontends/pytorch/src/op/sub.cpp @@ -15,18 +15,34 @@ namespace op { using namespace ov::op; -OutputVector translate_sub(const NodeContext& context) { +OutputVector translate_sub_common(const NodeContext& context, bool inplace) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); - align_eltwise_input_types(context, x, y); + if (inplace) { + if (x.get_element_type().is_dynamic() || x.get_element_type() != y.get_element_type()) + y = context.mark_node(std::make_shared(x, y)); + } else { + align_eltwise_input_types(context, x, y); + } // default alpha is 1 so no need to multiply if alpha is not provided if (!context.input_is_none(2)) { auto alpha = context.get_input(2); auto casted_alpha = context.mark_node(std::make_shared(alpha, y)); y = context.mark_node(std::make_shared(casted_alpha, y)); } - return {context.mark_node(std::make_shared(x, y))}; + auto sub = context.mark_node(std::make_shared(x, y)); + if (inplace) + context.mutate_input(0, sub); + return {sub}; +}; + +OutputVector translate_sub(const NodeContext& context) { + return translate_sub_common(context, false); +}; + +OutputVector translate_sub_(const NodeContext& context) { + return translate_sub_common(context, true); }; } // namespace op diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index d9ac0aff6af2dc..c307be6fc22e2c 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -23,6 +23,7 @@ OP_CONVERTER(translate_adaptive_max_pool3d); OP_CONVERTER(translate_adaptive_max_pool2d); OP_CONVERTER(translate_adaptive_max_pool1d); OP_CONVERTER(translate_add); +OP_CONVERTER(translate_add_); OP_CONVERTER(translate_addcmul); OP_CONVERTER(translate_addmm); OP_CONVERTER(translate_all); @@ -57,6 +58,7 @@ OP_CONVERTER(translate_deform_conv); OP_CONVERTER(translate_derive_index); OP_CONVERTER(translate_dim); OP_CONVERTER(translate_div); +OP_CONVERTER(translate_div_); OP_CONVERTER(translate_elu); OP_CONVERTER(translate_embedding); OP_CONVERTER(translate_embedding_bag); @@ -175,6 +177,7 @@ OP_CONVERTER(translate_squeeze); OP_CONVERTER(translate_std); OP_CONVERTER(translate_std_mean); OP_CONVERTER(translate_sub); +OP_CONVERTER(translate_sub_); OP_CONVERTER(translate_sum); OP_CONVERTER(translate_t); OP_CONVERTER(translate_to); @@ -247,7 +250,7 @@ const std::map get_supported_ops_ts() { {"aten::adaptive_max_pool2d", op::quantizable_op}, {"aten::adaptive_max_pool3d", op::quantizable_op}, {"aten::add", op::translate_add}, - {"aten::add_", op::inplace_op}, + {"aten::add_", op::translate_add_}, {"aten::addcmul", op::translate_addcmul}, {"aten::addmm", op::translate_addmm}, {"aten::all", op::translate_all}, @@ -309,7 +312,7 @@ const std::map get_supported_ops_ts() { {"aten::dequantize", op::skip_node}, // we convert model to fp32 using FQ, so dequantization is not needed {"aten::dim", op::translate_dim}, {"aten::div", op::translate_div}, - {"aten::div_", op::inplace_op}, + {"aten::div_", op::translate_div_}, {"aten::dropout", op::skip_node}, {"aten::dropout_", op::skip_node}, {"aten::elu", op::translate_elu}, @@ -404,9 +407,9 @@ const std::map get_supported_ops_ts() { {"aten::minimum", op::translate_minimum}, {"aten::mm", op::translate_1to1_match_2_inputs}, {"aten::mul", op::translate_1to1_match_2_inputs_align_types}, - {"aten::mul_", op::inplace_op>}, + {"aten::mul_", op::inplace_translate_1to1_match_2_inputs_align_types}, {"aten::multiply", op::translate_1to1_match_2_inputs_align_types}, - {"aten::multiply_", op::inplace_op>}, + {"aten::multiply_", op::inplace_translate_1to1_match_2_inputs_align_types}, {"aten::narrow", op::translate_narrow}, {"aten::ne", op::translate_1to1_match_2_inputs_align_types}, {"aten::neg", op::translate_neg}, @@ -477,7 +480,7 @@ const std::map get_supported_ops_ts() { {"aten::std", op::translate_std}, {"aten::std_mean", op::translate_std_mean}, {"aten::sub", op::translate_sub}, - {"aten::sub_", op::inplace_op}, + {"aten::sub_", op::translate_sub_}, {"aten::sum", op::translate_sum}, {"aten::swapaxes", op::quantizable_op}, {"aten::t", op::translate_t}, diff --git a/src/frontends/pytorch/src/pt_framework_node.hpp b/src/frontends/pytorch/src/pt_framework_node.hpp index 04b71d1169ae81..00d967200405cb 100644 --- a/src/frontends/pytorch/src/pt_framework_node.hpp +++ b/src/frontends/pytorch/src/pt_framework_node.hpp @@ -20,14 +20,17 @@ class PtFrameworkNode : public ov::op::util::FrameworkNode { PtFrameworkNode(const std::shared_ptr& decoder, const OutputVector& inputs, size_t output_size, - bool is_backprop = false) + bool is_reverseprop = false) : ov::op::util::FrameworkNode(inputs, output_size, decoder->get_subgraph_size()), m_decoder(decoder) { ov::op::util::FrameworkNodeAttrs attrs; attrs.set_type_name("PTFrameworkNode"); - if (is_backprop) { - attrs[op_type_key] = m_decoder->get_op_type() + "_backprop"; + if (is_reverseprop) { + attrs[op_type_key] = m_decoder->get_op_type() + "_reverseprop"; attrs[schema_key] = "None"; + attrs[failed_conversion_key] = + "This is an internal openvino operation representing reverse data propagation. It should not appear in " + "graph in normal conversion flow and might be result of other failures."; } else { attrs[op_type_key] = m_decoder->get_op_type(); attrs[schema_key] = m_decoder->get_schema(); diff --git a/src/frontends/pytorch/src/transforms/align_types_removal.cpp b/src/frontends/pytorch/src/transforms/align_types_removal.cpp new file mode 100644 index 00000000000000..c5e43d8af13004 --- /dev/null +++ b/src/frontends/pytorch/src/transforms/align_types_removal.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "align_types_removal.hpp" + +#include +#include + +#include "helper_ops/align_types.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/split.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/util/framework_node.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +using namespace ov::op; + +AlignTypesRemoval::AlignTypesRemoval() { + auto align_types_pattern = ov::pass::pattern::wrap_type(); + + ov::matcher_pass_callback callback = [](ov::pass::pattern::Matcher& m) { + auto align_types = std::dynamic_pointer_cast(m.get_match_root()); + if (!align_types) + return false; + auto lhs_itype = align_types->get_input_element_type(0); + auto rhs_itype = align_types->get_input_element_type(1); + auto lhs_otype = align_types->get_output_element_type(0); + auto rhs_otype = align_types->get_output_element_type(1); + if (lhs_otype.is_static() && rhs_otype.is_static()) { + auto out1 = align_types->input_value(0); + auto out2 = align_types->input_value(1); + if (lhs_itype != lhs_otype) + out1 = std::make_shared(align_types->input_value(0), lhs_otype); + if (rhs_itype != rhs_otype) + out2 = std::make_shared(align_types->input_value(1), rhs_otype); + align_types->output(0).replace(out1); + align_types->output(1).replace(out2); + return true; + } + return false; + }; + + auto m = std::make_shared(align_types_pattern, + "ov::frontend::pytorch::pass::AlignTypesRemoval"); + this->register_matcher(m, callback); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/align_types_removal.hpp b/src/frontends/pytorch/src/transforms/align_types_removal.hpp new file mode 100644 index 00000000000000..bba81df9e0e086 --- /dev/null +++ b/src/frontends/pytorch/src/transforms/align_types_removal.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +class AlignTypesRemoval : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::AlignTypesRemoval"); + AlignTypesRemoval(); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp index 0219600799a3c0..f7e5e80b604a76 100644 --- a/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/string_equality_replacer.cpp @@ -26,16 +26,8 @@ using namespace ov::op; StringEqualityReplacer::StringEqualityReplacer() { auto framework_node_lhs = pattern::wrap_type(); auto framework_node_rhs = pattern::wrap_type(); - auto convert_lhs = pattern::wrap_type({framework_node_lhs}); - auto convert_like_lhs = pattern::wrap_type({framework_node_lhs, framework_node_rhs}); - auto convert_rhs = pattern::wrap_type({framework_node_rhs}); - auto convert_like_rhs = pattern::wrap_type({framework_node_rhs, framework_node_lhs}); - auto lhs_pattern = - std::make_shared(OutputVector{framework_node_lhs, convert_lhs, convert_like_lhs}); - auto rhs_pattern = - std::make_shared(OutputVector{framework_node_rhs, convert_rhs, convert_like_rhs}); - auto equal_op = pattern::wrap_type({lhs_pattern, rhs_pattern}); - auto not_equal_op = pattern::wrap_type({lhs_pattern, rhs_pattern}); + auto equal_op = pattern::wrap_type({framework_node_lhs, framework_node_rhs}); + auto not_equal_op = pattern::wrap_type({framework_node_lhs, framework_node_rhs}); auto string_equality_pattern = std::make_shared(OutputVector{equal_op, not_equal_op}); diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index afd9b5ebf85216..7decae35b30bbb 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -4,6 +4,7 @@ #include "utils.hpp" +#include "helper_ops/align_types.hpp" #include "op_table.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/frontend/pytorch/decoder.hpp" @@ -381,33 +382,17 @@ std::unordered_map bit_to_int{ }; } // namespace -void align_eltwise_input_types(const NodeContext& context, Output& lhs, Output& rhs, bool align_scalars) { +element::Type infer_types(const Output& lhs, const Output& rhs, bool align_scalars) { const auto& lhs_type = lhs.get_element_type(); const auto& rhs_type = rhs.get_element_type(); - auto out_type = context.get_output_type(0); - if (out_type.is()) { - auto otype = out_type.as(); - if (otype.is_real()) { - if (otype != lhs_type) { - lhs = context.mark_node(std::make_shared(lhs, otype)); - } - if (otype != rhs_type) { - rhs = context.mark_node(std::make_shared(rhs, otype)); - } - return; - } - } if (lhs_type.is_dynamic() || rhs_type.is_dynamic()) { - // if any of types is not known, align to lhs type. - // TODO: can be fixed with special operation? - rhs = context.mark_node(std::make_shared(rhs, lhs)); - return; + return element::dynamic; } // Both types are static, align types. If float and int types are used convert int type to f32, after that align // to the largest bitness, if both float or both int, just align bitness if (lhs_type == rhs_type) - return; + return lhs_type; // if one of operands is scalar, the resulting type is taken from the other operand except when scalar is float // type and other operand is int, in that case BOTH operands get fp32 type @@ -429,11 +414,9 @@ void align_eltwise_input_types(const NodeContext& context, Output& lhs, Ou if (!align_scalars) rhs_dst_type = element::f32; } else if (is_lhs_scalar && rhs_type != element::boolean) { - lhs = context.mark_node(std::make_shared(lhs, rhs)); - return; + return rhs_type; } else if (is_rhs_scalar && lhs_type != element::boolean) { - rhs = context.mark_node(std::make_shared(rhs, lhs)); - return; + return lhs_type; } if (!lhs_dst_type.is_real() && rhs_dst_type.is_real()) { @@ -470,13 +453,39 @@ void align_eltwise_input_types(const NodeContext& context, Output& lhs, Ou } } } + return lhs_dst_type; +} - // Cast to destination types - if (lhs_dst_type != lhs_type) { - lhs = context.mark_node(std::make_shared(lhs, lhs_dst_type)); +void align_eltwise_input_types(const NodeContext& context, Output& lhs, Output& rhs, bool align_scalars) { + const auto& lhs_type = lhs.get_element_type(); + const auto& rhs_type = rhs.get_element_type(); + auto out_type = context.get_output_type(0); + if (out_type.is()) { + auto otype = out_type.as(); + if (otype.is_real()) { + if (otype != lhs_type) { + lhs = context.mark_node(std::make_shared(lhs, otype)); + } + if (otype != rhs_type) { + rhs = context.mark_node(std::make_shared(rhs, otype)); + } + return; + } + } + auto dst_type = infer_types(lhs, rhs, align_scalars); + if (dst_type.is_dynamic()) { + // We can't decide the type at this point, create a special operation + auto at = std::make_shared(lhs, rhs, align_scalars); + lhs = at->output(0); + rhs = at->output(1); + return; + } + // Cast to destination type + if (dst_type != lhs_type) { + lhs = context.mark_node(std::make_shared(lhs, dst_type)); } - if (rhs_dst_type != rhs_type) { - rhs = context.mark_node(std::make_shared(rhs, rhs_dst_type)); + if (dst_type != rhs_type) { + rhs = context.mark_node(std::make_shared(rhs, dst_type)); } } diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index b4a37118961ab7..20bae6fa62f5c3 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -7,6 +7,7 @@ #include "openvino/frontend/pytorch/node_context.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" +#include "openvino/op/convert_like.hpp" namespace ov { @@ -65,11 +66,11 @@ Any simplified_type_interpret(Any type); void add_exception_to_fw_node(std::shared_ptr node, const std::string& msg); +element::Type infer_types(const Output& lhs, const Output& rhs, bool align_scalars); void align_eltwise_input_types(const NodeContext& context, Output& lhs, Output& rhs, bool align_scalars = false); - void align_output_types(const NodeContext& context, OutputVector& outputs); std::deque> get_list_as_outputs(const Output& start); @@ -125,12 +126,31 @@ OutputVector translate_1to1_match_2_inputs_align_types(const NodeContext& contex FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); auto lhs = context.get_input(0); auto rhs = context.get_input(1); - align_eltwise_input_types(context, lhs, rhs, true); + auto lhs_type = context.get_input_type(0); + auto rhs_type = context.get_input_type(1); + // If type is string or None, we shouldn't align + if (!lhs_type.is() && !rhs_type.is() && !lhs_type.is() && + !rhs_type.is()) + align_eltwise_input_types(context, lhs, rhs, true); OutputVector res = {context.mark_node(std::make_shared(lhs, rhs))}; align_output_types(context, res); return res; } +template +OutputVector inplace_translate_1to1_match_2_inputs_align_types(const NodeContext& context) { + num_inputs_check(context, 2, 2); + FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); + auto lhs = context.get_input(0); + auto rhs = context.get_input(1); + // For inplace op we know direction of type alignment + if (lhs.get_element_type().is_dynamic() || lhs.get_element_type() != rhs.get_element_type()) + rhs = context.mark_node(std::make_shared(rhs, lhs)); + OutputVector res = {context.mark_node(std::make_shared(lhs, rhs))}; + context.mutate_input(idx, res[0]); + return res; +} + inline OutputVector return_false_scalar(const NodeContext& context) { return {context.mark_node(ov::op::v0::Constant::create(element::boolean, Shape{}, {false}))}; } @@ -168,7 +188,7 @@ class DummyDecoder : public TorchDecoder { FRONT_END_NOT_IMPLEMENTED(get_output_debug_name); } virtual PartialShape get_output_shape(size_t index) const override { - FRONT_END_NOT_IMPLEMENTED(get_output_shape); + return PartialShape::dynamic(); } virtual Any get_output_type(size_t index) const override { FRONT_END_NOT_IMPLEMENTED(get_output_type); @@ -189,7 +209,7 @@ class DummyDecoder : public TorchDecoder { FRONT_END_NOT_IMPLEMENTED(get_op_type); } virtual const std::string& get_schema() const override { - FRONT_END_NOT_IMPLEMENTED(get_schema); + return m_schema; } virtual size_t num_of_outputs() const override { FRONT_END_NOT_IMPLEMENTED(num_of_outputs); @@ -218,6 +238,9 @@ class DummyDecoder : public TorchDecoder { virtual OutputVector inlined_inputs(size_t start_index) const override { FRONT_END_NOT_IMPLEMENTED(inlined_inputs); } + +private: + const std::string m_schema = "NONE"; }; } // namespace pytorch diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py index e437209cde9c9b..60bb92b85b5920 100644 --- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py +++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py @@ -284,8 +284,8 @@ def scripted_fn(x: torch.Tensor, y: torch.Tensor): return torch.sigmoid(torch.relu(x * y)) inp_shape = PartialShape([Dimension(1, -1), Dimension(-1, 5), 10]) - ref_model = make_ref_pt_model_two_inputs(inp_shape, dtype=Type.dynamic) - return scripted_fn, ref_model, {'input': [(inp_shape), (inp_shape)]} + ref_model = make_ref_pt_model_two_inputs(inp_shape) + return scripted_fn, ref_model, {'input': [(inp_shape, Type.f32), (inp_shape, Type.f32)]} def create_pytorch_nn_module_layout_list(tmp_dir): @@ -472,9 +472,9 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir): def create_pytorch_nn_module_shapes_list_static(tmp_dir): pt_model = make_pt_model_two_inputs() - ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20], dtype=Type.dynamic) + ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20]) - return pt_model, ref_model, {'input': [[1, 3, 20, 20], [1, 3, 20, 20]]} + return pt_model, ref_model, {'input': [([1, 3, 20, 20], Type.f32), ([1, 3, 20, 20], Type.f32)]} def create_pytorch_nn_module_shapes_list_static_via_input(tmp_dir): @@ -490,17 +490,16 @@ def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir): [-1, 3, 20, Dimension(-1, 20)]] param1 = ov.opset8.parameter(PartialShape( - inp_shapes[0]), name="x", dtype=Type.dynamic) + inp_shapes[0]), name="x", dtype=Type.f32) param2 = ov.opset8.parameter(PartialShape( - inp_shapes[1]), name="y", dtype=Type.dynamic) - cl = ov.opset8.convert_like(param2, param1) - mul = ov.opset8.multiply(param1, cl) + inp_shapes[1]), name="y", dtype=Type.f32) + mul = ov.opset8.multiply(param1, param2) relu = ov.opset8.relu(mul) sigm = ov.opset8.sigmoid(relu) parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input': inp_shapes} + return pt_model, ref_model, {'input': [(inp_shapes[0], Type.f32), (inp_shapes[1], Type.f32)]} def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): @@ -523,8 +522,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} @@ -537,8 +536,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_single_input_via_input(tmp_dir) def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[1, 3, 20, 20]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[1, 3, 20, 20], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} diff --git a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py index 5ae2dcac31c83e..90db75d01c0e78 100644 --- a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py +++ b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py @@ -281,8 +281,8 @@ def scripted_fn(x: torch.Tensor, y: torch.Tensor): return torch.sigmoid(torch.relu(x * y)) inp_shape = PartialShape([Dimension(1, -1), Dimension(-1, 5), 10]) - ref_model = make_ref_pt_model_two_inputs(inp_shape, dtype=Type.dynamic) - return scripted_fn, ref_model, {'input': [(inp_shape), (inp_shape)]} + ref_model = make_ref_pt_model_two_inputs(inp_shape) + return scripted_fn, ref_model, {'input': [(inp_shape, Type.f32), (inp_shape, Type.f32)]} def create_pytorch_nn_module_layout_list(tmp_dir): @@ -469,9 +469,9 @@ def create_pytorch_nn_module_scale_list_compression_enabled(tmp_dir): def create_pytorch_nn_module_shapes_list_static(tmp_dir): pt_model = make_pt_model_two_inputs() - ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20], dtype=Type.dynamic) + ref_model = make_ref_pt_model_two_inputs([1, 3, 20, 20]) - return pt_model, ref_model, {'input': [[1, 3, 20, 20], [1, 3, 20, 20]]} + return pt_model, ref_model, {'input': [([1, 3, 20, 20], Type.f32), ([1, 3, 20, 20], Type.f32)]} def create_pytorch_nn_module_shapes_list_static_via_input(tmp_dir): @@ -487,17 +487,16 @@ def create_pytorch_nn_module_shapes_list_dynamic(tmp_dir): [-1, 3, 20, Dimension(-1, 20)]] param1 = ov.opset8.parameter(PartialShape( - inp_shapes[0]), name="x", dtype=Type.dynamic) + inp_shapes[0]), name="x", dtype=Type.f32) param2 = ov.opset8.parameter(PartialShape( - inp_shapes[1]), name="y", dtype=Type.dynamic) - cl = ov.opset8.convert_like(param2, param1) - mul = ov.opset8.multiply(param1, cl) + inp_shapes[1]), name="y", dtype=Type.f32) + mul = ov.opset8.multiply(param1, param2) relu = ov.opset8.relu(mul) sigm = ov.opset8.sigmoid(relu) parameter_list = [param1, param2] ref_model = Model([sigm], parameter_list, "test") - return pt_model, ref_model, {'input': inp_shapes} + return pt_model, ref_model, {'input': [(inp_shapes[0], Type.f32), (inp_shapes[1], Type.f32)]} def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): @@ -520,8 +519,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_via_input(tmp_dir): def create_pytorch_nn_module_shapes_list_dynamic_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[Dimension(-1), 3, 20, Dimension(20, -1)], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} @@ -534,8 +533,8 @@ def create_pytorch_nn_module_shapes_list_dynamic_single_input_via_input(tmp_dir) def create_pytorch_nn_module_shapes_list_static_single_input(tmp_dir): pt_model = make_pt_model_one_input() - inp_shapes = [[1, 3, 20, 20]] - ref_model = make_ref_pt_model_one_input(inp_shapes[0], dtype=Type.dynamic) + inp_shapes = [[1, 3, 20, 20], Type.f32] + ref_model = make_ref_pt_model_one_input(inp_shapes[0]) return pt_model, ref_model, {'input': inp_shapes} diff --git a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py index 6adbf0d033e98f..7e1758bd84d4fe 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py @@ -641,7 +641,7 @@ def f(x, y): @pytest.mark.precommit def test_pytorch_decoder_can_convert_scripted_function(): - from openvino.tools.mo import convert_model + from openvino import convert_model, Type scripted = torch.jit.script(f) - model = convert_model(scripted) + model = convert_model(scripted, input=[Type.f32, Type.f32]) assert model is not None diff --git a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py index 6b4d748fc45632..f76c7b1fa97ba8 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_frontend.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_frontend.py @@ -22,6 +22,9 @@ def forward(self, x): class aten_multi_input_output(torch.nn.Module): def forward(self, x, y, z): + x = x.to(torch.float32) + y = y.to(torch.float32) + z = z.to(torch.float32) return torch.nn.functional.relu(x), x * y, z / x diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 00cdc427056518..f8b726c4c5f66d 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -77,10 +77,13 @@ def use_torch_compile_backend(): if use_torch_compile_backend(): self.torch_compile_backend_test(model, torch_inputs, custom_eps) else: + trace_model = kwargs.get('trace_model', False) + freeze_model = kwargs.get('freeze_model', True) with torch.no_grad(): - trace_model = kwargs.get('trace_model', False) - freeze_model = kwargs.get('freeze_model', True) - smodel, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) + if kwargs.get('use_convert_model', False): + smodel, converted_model = self.convert_via_mo(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) + else: + smodel, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) if kind is not None and not isinstance(kind, (tuple, list)): kind = [kind] @@ -162,12 +165,13 @@ def _prepare_input(self): raise RuntimeError("Please provide inputs generation function") def convert_via_mo(self, model, example_input, trace_model, dynamic_shapes, ov_inputs, freeze_model): - from openvino.tools.ovc import convert_model - kwargs = {"example_input": example_input if len(example_input) > 1 else example_input[0]} + from openvino import convert_model, PartialShape if trace_model: decoder = TorchScriptPythonDecoder(model, example_input=example_input, skip_freeze=not freeze_model) + kwargs = {"example_input": example_input if len(example_input) > 1 else example_input[0]} else: decoder = TorchScriptPythonDecoder(model, skip_freeze=not freeze_model) + kwargs = {"input": [(i.dtype, PartialShape([-1] * len(i.shape))) for i in example_input]} smodel = decoder.pt_module print(smodel.inlined_graph) if not dynamic_shapes: diff --git a/tests/layer_tests/pytorch_tests/test_add.py b/tests/layer_tests/pytorch_tests/test_add.py index 7cf243b0577bcb..d89a05420eb7e1 100644 --- a/tests/layer_tests/pytorch_tests/test_add.py +++ b/tests/layer_tests/pytorch_tests/test_add.py @@ -43,7 +43,7 @@ def forward2(self, lhs, rhs): @pytest.mark.parametrize("op_type", ["add", "add_"]) def test_add(self, ie_device, precision, ir_version, alpha, input_rhs, op_type): self.input_rhs = input_rhs - self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version) + self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version, use_convert_model=True) class TestAddTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_comparision.py b/tests/layer_tests/pytorch_tests/test_comparision.py index 98134a274f7bdb..a114afb1f712c8 100644 --- a/tests/layer_tests/pytorch_tests/test_comparision.py +++ b/tests/layer_tests/pytorch_tests/test_comparision.py @@ -55,7 +55,7 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit def test_comp(self, op, ie_device, precision, ir_version): - self._test(*self.create_model(op), ie_device, precision, ir_version) + self._test(*self.create_model(op), ie_device, precision, ir_version, use_convert_model=True) class TestCompMixedTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py index 8fa207efe85b41..fa4293b275e6c7 100644 --- a/tests/layer_tests/pytorch_tests/test_deformable_convolution.py +++ b/tests/layer_tests/pytorch_tests/test_deformable_convolution.py @@ -10,15 +10,6 @@ from torchvision.ops import deform_conv2d -def xfail_106712(test_param): - return pytest.param( - test_param, - marks=pytest.mark.xfail( - reason="Depending on number of groups and number of output channels, deformable convolution may return incorrect reasults. Ticket 106712" - ), - ) - - params = [ { "weights_shape": [64, 64, 3, 3], @@ -62,15 +53,13 @@ def xfail_106712(test_param): "padding": (2, 2), "dilation": (1, 1), }, - xfail_106712( - { - "weights_shape": [64, 16, 3, 3], - "offset_shape": [1, 18, 64, 64], - "stride": (1, 1), - "padding": (1, 1), - "dilation": (1, 1), - } - ), + { + "weights_shape": [64, 16, 3, 3], + "offset_shape": [1, 18, 64, 64], + "stride": (1, 1), + "padding": (1, 1), + "dilation": (1, 1), + }, { "weights_shape": [60, 16, 3, 3], "offset_shape": [1, 18, 64, 64], @@ -92,15 +81,13 @@ def xfail_106712(test_param): "padding": (1, 1), "dilation": (1, 1), }, - xfail_106712( - { - "weights_shape": [64, 32, 3, 3], - "offset_shape": [1, 36, 68, 68], - "stride": (1, 1), - "padding": (3, 3), - "dilation": (1, 1), - } - ), + { + "weights_shape": [64, 32, 3, 3], + "offset_shape": [1, 36, 68, 68], + "stride": (1, 1), + "padding": (3, 3), + "dilation": (1, 1), + }, { "weights_shape": [62, 32, 3, 3], "offset_shape": [1, 36, 68, 68], diff --git a/tests/layer_tests/pytorch_tests/test_device.py b/tests/layer_tests/pytorch_tests/test_device.py index a36bd731a6dae7..2efdf5c85c50fa 100644 --- a/tests/layer_tests/pytorch_tests/test_device.py +++ b/tests/layer_tests/pytorch_tests/test_device.py @@ -56,7 +56,8 @@ def test_device(self, device_string, ie_device, precision, ir_version): ie_device, precision, ir_version, - trace_model=False + trace_model=False, + use_convert_model=True, ) @pytest.mark.parametrize("device_string", ["cpu", "cuda"]) @@ -68,5 +69,6 @@ def test_device_type(self, device_string, ie_device, precision, ir_version): ie_device, precision, ir_version, - trace_model=False + trace_model=False, + use_convert_model=True, ) diff --git a/tests/layer_tests/pytorch_tests/test_dict.py b/tests/layer_tests/pytorch_tests/test_dict.py index a3e3c29b6baf1f..6e4db9dea825bd 100644 --- a/tests/layer_tests/pytorch_tests/test_dict.py +++ b/tests/layer_tests/pytorch_tests/test_dict.py @@ -23,4 +23,4 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_dict(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_distance.py b/tests/layer_tests/pytorch_tests/test_distance.py index f8cec6998ca7b6..fb9fade8206996 100644 --- a/tests/layer_tests/pytorch_tests/test_distance.py +++ b/tests/layer_tests/pytorch_tests/test_distance.py @@ -34,7 +34,7 @@ def forward(self, x, y): @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_cdist(self, p, ie_device, precision, ir_version): - self._test(*self.create_model(p), ie_device, precision, ir_version) + self._test(*self.create_model(p), ie_device, precision, ir_version, use_convert_model=True) class TestPairwiseDistance(PytorchLayerTest): @@ -68,4 +68,4 @@ def forward(self, x, y): @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_cdist(self, p, eps, keepdim, ie_device, precision, ir_version): - self._test(*self.create_model(p, eps, keepdim), ie_device, precision, ir_version) \ No newline at end of file + self._test(*self.create_model(p, eps, keepdim), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_div.py b/tests/layer_tests/pytorch_tests/test_div.py index 8b7dad351817d4..564cb2915c8686 100644 --- a/tests/layer_tests/pytorch_tests/test_div.py +++ b/tests/layer_tests/pytorch_tests/test_div.py @@ -49,7 +49,7 @@ def test_div_pt_spec(self, input_array, other_array, rounding_mode, ie_device, p self.other_array = other_array self.other_type = np.float32 self._test(*self.create_model(rounding_mode), - ie_device, precision, ir_version) + ie_device, precision, ir_version, use_convert_model=True) class TestDivTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_empty.py b/tests/layer_tests/pytorch_tests/test_empty.py index 7b5a4b92563dc5..c504e262038c87 100644 --- a/tests/layer_tests/pytorch_tests/test_empty.py +++ b/tests/layer_tests/pytorch_tests/test_empty.py @@ -134,7 +134,7 @@ def forward(self, input_tensor: torch.Tensor): @pytest.mark.precommit def test_new_empty(self, shape, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64]) @@ -142,4 +142,4 @@ def test_new_empty(self, shape, input_dtype, ie_device, precision, ir_version): @pytest.mark.nightly def test_new_empty_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_eq.py b/tests/layer_tests/pytorch_tests/test_eq.py index 91c30df79fabb8..6fa5f4ecfba5eb 100644 --- a/tests/layer_tests/pytorch_tests/test_eq.py +++ b/tests/layer_tests/pytorch_tests/test_eq.py @@ -45,4 +45,4 @@ def test_eq_pt_spec(self, input_array, other_array, types, ie_device, precision, self.input_type = types[0] self.other_array = other_array self.other_type = types[1] - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_floor_divide.py b/tests/layer_tests/pytorch_tests/test_floor_divide.py index 44c1eadc3ce542..0cdc46333b2651 100644 --- a/tests/layer_tests/pytorch_tests/test_floor_divide.py +++ b/tests/layer_tests/pytorch_tests/test_floor_divide.py @@ -59,7 +59,7 @@ def forward(self, input_tensor, other_tensor): def test_floor_divide(self, input_tensor, other_tensor, ie_device, precision, ir_version): self.input_tensor = input_tensor self.other_tensor = other_tensor - self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) @pytest.mark.parametrize('input_tensor', ([ np.random.randint(low=0, high=10, size=5).astype(np.float32), diff --git a/tests/layer_tests/pytorch_tests/test_fp16.py b/tests/layer_tests/pytorch_tests/test_fp16.py index b754306727935c..5952f40f167c54 100644 --- a/tests/layer_tests/pytorch_tests/test_fp16.py +++ b/tests/layer_tests/pytorch_tests/test_fp16.py @@ -29,7 +29,7 @@ def forward(self, x): @pytest.mark.parametrize("to_trace", [True, False]) def test_bf16(self, ie_device, precision, ir_version, to_trace): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=to_trace, freeze_model=False) + ir_version, trace_model=to_trace, freeze_model=False, use_convert_model=True) class TestFP16(PytorchLayerTest): @@ -53,4 +53,4 @@ def forward(self, x): @pytest.mark.parametrize("to_trace", [True, False]) def test_fp16(self, ie_device, precision, ir_version, to_trace): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=to_trace, freeze_model=False) + ir_version, trace_model=to_trace, freeze_model=False, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index 52b5b2e3e58bd1..cf3794be11e891 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -347,7 +347,7 @@ def forward(self, input_tensor: torch.Tensor, x: float): @pytest.mark.precommit def test_new_full(self, shape, value, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}) + kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value,input_dtype", [(0, np.uint8), (1, np.int32), (-1, np.float32), (0.5, np.float64)]) @@ -355,7 +355,7 @@ def test_new_full(self, shape, value, input_dtype, ie_device, precision, ir_vers @pytest.mark.nightly def test_new_full_with_dtype(self, value, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}) + kwargs_to_prepare_input={'value': value, 'input_dtype': input_dtype}, use_convert_model=True) class TestZerosAndOnes(PytorchLayerTest): @@ -562,7 +562,7 @@ def forward(self, input_tensor: torch.Tensor): @pytest.mark.precommit def test_new_zeros(self, shape, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64]) @@ -570,7 +570,7 @@ def test_new_zeros(self, shape, input_dtype, ie_device, precision, ir_version): @pytest.mark.nightly def test_new_zeros_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) class TestNewOnes(PytorchLayerTest): @@ -621,7 +621,7 @@ def forward(self, input_tensor: torch.Tensor): @pytest.mark.precommit def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("input_dtype", [bool, np.uint8, np.int8, np.int32, np.int64, np.float32, np.float64]) @@ -629,4 +629,4 @@ def test_new_ones(self, shape, input_dtype, ie_device, precision, ir_version): @pytest.mark.nightly def test_new_ones_with_dtype(self, shape, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, used_dtype=True), ie_device, precision, ir_version, - kwargs_to_prepare_input={'input_dtype': input_dtype}) + kwargs_to_prepare_input={'input_dtype': input_dtype}, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_getitem.py b/tests/layer_tests/pytorch_tests/test_getitem.py index b827f626914125..62d7e12ada6e61 100644 --- a/tests/layer_tests/pytorch_tests/test_getitem.py +++ b/tests/layer_tests/pytorch_tests/test_getitem.py @@ -102,4 +102,4 @@ def _prepare_input(self): @pytest.mark.parametrize("idx", [-4, -3, -2, -1, 0, 1, 2, 3]) def test_add_cat(self, ie_device, precision, ir_version, idx): self._test(aten_add_getitem(idx), None, ["aten::__getitem__", "aten::add", "prim::ListConstruct"], - ie_device, precision, ir_version, freeze_model=False) + ie_device, precision, ir_version, freeze_model=False, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_if.py b/tests/layer_tests/pytorch_tests/test_if.py index b4fd9470cc8be3..fa4b1e9967dd02 100644 --- a/tests/layer_tests/pytorch_tests/test_if.py +++ b/tests/layer_tests/pytorch_tests/test_if.py @@ -39,4 +39,4 @@ def forward(self, x, y): @pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 114818") def test_if(self, y, ie_device, precision, ir_version): self.y = y - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_index.py b/tests/layer_tests/pytorch_tests/test_index.py index 6f7cea86990d2c..4b4e53690f927e 100644 --- a/tests/layer_tests/pytorch_tests/test_index.py +++ b/tests/layer_tests/pytorch_tests/test_index.py @@ -150,4 +150,4 @@ def forward(self, x): [2, 2, 3, 4])) def test_index_mask(self, input_shape, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, kwargs_to_prepare_input={ - "input_shape": input_shape}, trace_model=True) + "input_shape": input_shape}, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_index_put_.py b/tests/layer_tests/pytorch_tests/test_index_put_.py index 55cbe39bd92d58..6f94a0912d4df2 100644 --- a/tests/layer_tests/pytorch_tests/test_index_put_.py +++ b/tests/layer_tests/pytorch_tests/test_index_put_.py @@ -162,7 +162,7 @@ def test_nonzero_index_put_(self, ie_device, precision, ir_version, input_data, self.values = input_data["values"] self.indices_0 = indices[0] self.indices_1 = indices[1] - self._test(*self.create_model(accumulate), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(accumulate), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) class TestMask_IndexPut(PytorchLayerTest): def _prepare_input(self): @@ -181,4 +181,4 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit def test_nonzero_index_put_(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_len.py b/tests/layer_tests/pytorch_tests/test_len.py index d6d3a7dc211564..7aa5f020b9c7fc 100644 --- a/tests/layer_tests/pytorch_tests/test_len.py +++ b/tests/layer_tests/pytorch_tests/test_len.py @@ -48,7 +48,7 @@ def test_len(self, ie_device, precision, ir_version, input_tensor): def test_len_int_list(self, ie_device, precision, ir_version, input_tensor): self.input_tensor = input_tensor self._test(*self.create_model_int_list(), - ie_device, precision, ir_version) + ie_device, precision, ir_version, use_convert_model=True) class TestLenEmpty(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_listunpack.py b/tests/layer_tests/pytorch_tests/test_listunpack.py index b001bccd2a97d1..39d72bfe54c6e9 100644 --- a/tests/layer_tests/pytorch_tests/test_listunpack.py +++ b/tests/layer_tests/pytorch_tests/test_listunpack.py @@ -123,6 +123,7 @@ def test_listconstruct_getitem_listunpack( ie_device, precision, ir_version, + use_convert_model=True, ) class TestMeshgridListUnpack(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_mul.py b/tests/layer_tests/pytorch_tests/test_mul.py index f22fd7b03bc0cb..c7aea00ee8ad89 100644 --- a/tests/layer_tests/pytorch_tests/test_mul.py +++ b/tests/layer_tests/pytorch_tests/test_mul.py @@ -37,7 +37,7 @@ def test_mul_pt_spec(self, input_array, other_array, ie_device, precision, ir_ve self.input_type = np.float32 self.other_array = other_array self.other_type = np.float32 - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) class TestMulTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_or.py b/tests/layer_tests/pytorch_tests/test_or.py index c6592a11af083f..bde1e61ecce74d 100644 --- a/tests/layer_tests/pytorch_tests/test_or.py +++ b/tests/layer_tests/pytorch_tests/test_or.py @@ -25,4 +25,5 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_or(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, dynamic_shapes=False, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, + dynamic_shapes=False, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_pow.py b/tests/layer_tests/pytorch_tests/test_pow.py index 9cf6468404e5d3..92e65898e353eb 100644 --- a/tests/layer_tests/pytorch_tests/test_pow.py +++ b/tests/layer_tests/pytorch_tests/test_pow.py @@ -41,7 +41,7 @@ def forward(self, input_data, exponent): @pytest.mark.precommit def test_pow(self, ie_device, precision, ir_version, test_input): self.test_input = test_input - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) class TestPowMixedTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_remainder.py b/tests/layer_tests/pytorch_tests/test_remainder.py index 4a499e85a37870..05ad2d3b1387fe 100644 --- a/tests/layer_tests/pytorch_tests/test_remainder.py +++ b/tests/layer_tests/pytorch_tests/test_remainder.py @@ -32,7 +32,7 @@ def forward(self, lhs, rhs): @pytest.mark.precommit def test_remainder(self, ie_device, precision, ir_version, input_rhs): self.input_rhs = input_rhs - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) class TestRemainderTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_repeat.py b/tests/layer_tests/pytorch_tests/test_repeat.py index 45263366c76c54..884a51e2a24f6f 100644 --- a/tests/layer_tests/pytorch_tests/test_repeat.py +++ b/tests/layer_tests/pytorch_tests/test_repeat.py @@ -77,4 +77,4 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_repeat_t5(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True) + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_rsub.py b/tests/layer_tests/pytorch_tests/test_rsub.py index 64c4b9619d7b73..9c144ad4da247b 100644 --- a/tests/layer_tests/pytorch_tests/test_rsub.py +++ b/tests/layer_tests/pytorch_tests/test_rsub.py @@ -40,9 +40,9 @@ def forward(self, x, y:int, alpha: float): @pytest.mark.nightly @pytest.mark.precommit - def test_rsub(self, ie_device, precision, ir_version, input_data): + def test_rsub_f(self, ie_device, precision, ir_version, input_data): self.input_data = input_data - self._test(*self.create_model(second_type="float"), ie_device, precision, ir_version) + self._test(*self.create_model(second_type="float"), ie_device, precision, ir_version, use_convert_model=True) @pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 4).astype(np.float32), np.array(5).astype(int), @@ -50,9 +50,9 @@ def test_rsub(self, ie_device, precision, ir_version, input_data): @pytest.mark.nightly @pytest.mark.precommit - def test_rsub(self, ie_device, precision, ir_version, input_data): + def test_rsub_i(self, ie_device, precision, ir_version, input_data): self.input_data = input_data - self._test(*self.create_model(second_type="int"), ie_device, precision, ir_version) + self._test(*self.create_model(second_type="int"), ie_device, precision, ir_version, use_convert_model=True) class TestRsubTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_strided_const.py b/tests/layer_tests/pytorch_tests/test_strided_const.py index 438edbc88e24fe..ab33e92f88b4b8 100644 --- a/tests/layer_tests/pytorch_tests/test_strided_const.py +++ b/tests/layer_tests/pytorch_tests/test_strided_const.py @@ -31,4 +31,4 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit def test_strides(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version) + self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_sub.py b/tests/layer_tests/pytorch_tests/test_sub.py index aa97b0f23653fb..381d1672454cbe 100644 --- a/tests/layer_tests/pytorch_tests/test_sub.py +++ b/tests/layer_tests/pytorch_tests/test_sub.py @@ -50,7 +50,7 @@ def _forward_inplace(self, x, y, alpha: float): @pytest.mark.precommit def test_sub(self, ie_device, precision, ir_version, input_data, inplace): self.input_data = input_data - self._test(*self.create_model(inplace), ie_device, precision, ir_version) + self._test(*self.create_model(inplace), ie_device, precision, ir_version, use_convert_model=True) class TestSubTypes(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_transpose.py b/tests/layer_tests/pytorch_tests/test_transpose.py index b3378761da8c74..5dec55ec59c597 100644 --- a/tests/layer_tests/pytorch_tests/test_transpose.py +++ b/tests/layer_tests/pytorch_tests/test_transpose.py @@ -91,4 +91,5 @@ def test_t_small(self, num_dims, input_dtype, mode, ie_device, precision, ir_ver precision, ir_version, kwargs_to_prepare_input={"num_dims": num_dims, "input_dtype": input_dtype}, + use_convert_model=True, ) diff --git a/tests/layer_tests/pytorch_tests/test_tuple_construct.py b/tests/layer_tests/pytorch_tests/test_tuple_construct.py index 1582df48c4b370..45413a940f132b 100644 --- a/tests/layer_tests/pytorch_tests/test_tuple_construct.py +++ b/tests/layer_tests/pytorch_tests/test_tuple_construct.py @@ -60,7 +60,7 @@ def forward(self, x): @pytest.mark.parametrize("case", ["single", "multiple", "none", "list", "tensor_tail", "list_and_tuple"]) @pytest.mark.nightly def test_tuple_construct(self, case, ie_device, precision, ir_version): - self._test(*self.create_model(case), ie_device, precision, ir_version) + self._test(*self.create_model(case), ie_device, precision, ir_version, use_convert_model=True) class TestTupleConstructTupleUnpack(PytorchLayerTest): @@ -86,7 +86,7 @@ def prepare_input(self, x): @pytest.mark.nightly def test_tuple_construct_unpack(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, - precision, ir_version, freeze_model=False) + precision, ir_version, freeze_model=False, use_convert_model=True) class TestTupleUnpackParameterSingle(PytorchLayerTest): @@ -208,7 +208,7 @@ def some_func(self, x: Tuple[torch.Tensor, torch.Tensor]): @pytest.mark.nightly def test(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=False, freeze_model=False) + ir_version, trace_model=False, freeze_model=False, use_convert_model=True) class TestTcOutsideTuInsideIfBody(PytorchLayerTest): @@ -236,4 +236,4 @@ def some_func(self, x: Tuple[torch.Tensor, torch.Tensor]): @pytest.mark.nightly def test(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, - ir_version, trace_model=False, freeze_model=False) + ir_version, trace_model=False, freeze_model=False, use_convert_model=True) From 894221ea38757559af60fb0a97874109fd07adc2 Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Tue, 24 Oct 2023 11:52:02 +0900 Subject: [PATCH 009/275] [GPU] Add RMS internal op, related transformation, primitive, and kernels for RMS decomposition fusion (#20355) * Initial implementation of primitive, kernel selector, dummy kernel for RMS Norm Signed-off-by: Andrew Park * RMS ref kernel implementation with single WI Signed-off-by: Andrew Park * Add TC and reference func for ov_gpu_unit_tests Signed-off-by: Andrew Park * Add internal RMS norm op Signed-off-by: Andrew Park * Add transformation which fuse RMS decompsition pattern to RMS internal op Signed-off-by: Andrew Park * Fix pattern for RMS fusion transformation * Update rms ref kernel for optimization and additional planar format suuport * Initial impl for optimized rms kernel excluding leftovers handling and case smaller than vector size * Update the initial version to handle leftovers and case smaller than vector size * Fuse pre decom and post comp reorders additionally * Enable dynamic impl for rms again * Revert fuse pre decomp and post comp reorders additionally * Add subgraph TC for ov_gpu_func_tests * decrease error margin for f32 data type * update description Signed-off-by: Andrew Park * update test param for input shapes * Apply comments * Fix failed TC for invalid gamma element type * Apply comments Signed-off-by: Andrew Park * Update pattern that fuse post reorder together * Apply comments --------- Signed-off-by: Andrew Park --- .../intel_gpu/include/intel_gpu/op/rms.hpp | 48 +++++ .../intel_gpu/plugin/primitives_list.hpp | 1 + .../include/intel_gpu/primitives/rms.hpp | 58 ++++++ .../src/graph/impls/ocl/register.cpp | 1 + .../src/graph/impls/ocl/register.hpp | 2 + .../intel_gpu/src/graph/impls/ocl/rms.cpp | 65 +++++++ .../intel_gpu/src/graph/include/rms_inst.h | 44 +++++ src/plugins/intel_gpu/src/graph/rms.cpp | 41 ++++ .../cl_kernels/rms_gpu_bfyx_opt.cl | 72 +++++++ .../kernel_selector/cl_kernels/rms_gpu_ref.cl | 45 +++++ .../src/kernel_selector/common_types.h | 1 + .../kernels/rms/rms_kernel_base.cpp | 94 +++++++++ .../kernels/rms/rms_kernel_base.h | 50 +++++ .../kernels/rms/rms_kernel_bfyx_opt.cpp | 120 ++++++++++++ .../kernels/rms/rms_kernel_bfyx_opt.h | 25 +++ .../kernels/rms/rms_kernel_ref.cpp | 35 ++++ .../kernels/rms/rms_kernel_ref.h | 20 ++ .../kernels/rms/rms_kernel_selector.cpp | 18 ++ .../kernels/rms/rms_kernel_selector.h | 23 +++ src/plugins/intel_gpu/src/plugin/ops/rms.cpp | 43 ++++ .../src/plugin/transformations/op/rms.cpp | 39 ++++ .../src/plugin/transformations/rms_fusion.cpp | 103 ++++++++++ .../src/plugin/transformations/rms_fusion.hpp | 19 ++ .../src/plugin/transformations_pipeline.cpp | 2 + .../dynamic/rms_norm_decomposition.cpp | 153 +++++++++++++++ .../tests/unit/test_cases/rms_gpu_test.cpp | 184 ++++++++++++++++++ .../rms_norm_decomposition_test.cpp | 144 ++++++++++++++ 27 files changed, 1450 insertions(+) create mode 100644 src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp create mode 100644 src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp create mode 100644 src/plugins/intel_gpu/src/graph/include/rms_inst.h create mode 100644 src/plugins/intel_gpu/src/graph/rms.cpp create mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl create mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h create mode 100644 src/plugins/intel_gpu/src/plugin/ops/rms.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp create mode 100644 src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp create mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp create mode 100644 src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp new file mode 100644 index 00000000000000..a6b7432ac28c51 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/op/rms.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { +/// \brief Operator performing Root Mean Square Normalization +/// +/// \note Performs re-scaling invariance and regularizes the summed input according to RMS statistics +class RMS : public ov::op::Op { +public: + OPENVINO_OP("RMS", "gpu_opset"); + + RMS() = default; + /// \brief Constructs an RMS operation. + /// + /// \param data Input tensor with data + /// \param gamma Gamma values for weight + /// \param eps Epsilon for not dividing by zero while normalizing the value + /// \param output_type Output element type + RMS(const Output& data, + const Output& gamma, + double epsilson, + const ov::element::Type output_type = ov::element::undefined); + + bool visit_attributes(ov::AttributeVisitor& visitor) override; + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + double get_epsilon() const { return m_epsilon; } + + void set_epsilon(double epsilon) { m_epsilon = epsilon; } + +private: + double m_epsilon{0}; + ov::element::Type m_output_type; +}; + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 17e62ca926397b..ceba5be5a5dd53 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -268,3 +268,4 @@ REGISTER_FACTORY(internal, NmsStaticShapeIE8); REGISTER_FACTORY(internal, MulticlassNmsIEInternal); REGISTER_FACTORY(internal, FullyConnected); REGISTER_FACTORY(internal, FullyConnectedCompressed); +REGISTER_FACTORY(internal, RMS); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp new file mode 100644 index 00000000000000..cf2ad7af502da9 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/rms.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "primitive.hpp" + +namespace cldnn { + +/// @brief Root Mean Square Normalization primitive +/// @details Performs re-scaling invariance and regularizes the summed input according to RMS statistics +struct rms : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(rms); + + rms() : primitive_base("", {}) {} + + /// @brief Constructs rms primitive + /// @param id This primitive id + /// @param input Input primitive id + /// @param gamma Gamma values for weight + /// @param epsilon Epsilon for not dividing by zero while normalizing + rms(const primitive_id& id, + const input_info& input, + const input_info& gamma, + const float epsilon, + const padding& output_padding = padding()) + : primitive_base(id, {input, gamma}, {output_padding}), + epsilon(epsilon) {} + + /// @brief Epsilon for not dividing by zero while normalizing + float epsilon; + + size_t hash() const override { + size_t seed = primitive::hash(); + seed = hash_combine(seed, epsilon); + return seed; + } + + bool operator==(const primitive& rhs) const override { + if (!compare_common_params(rhs)) + return false; + + auto rhs_casted = downcast(rhs); + + return epsilon == rhs_casted.epsilon; + } + + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + ob << epsilon; + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + ib >> epsilon; + } +}; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp index 6b35b9cdfb16ce..3cc96ee00735ce 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp @@ -65,6 +65,7 @@ void register_implementations() { REGISTER_OCL(reshape); REGISTER_OCL(reverse); REGISTER_OCL(reverse_sequence); + REGISTER_OCL(rms); REGISTER_OCL(roi_align); REGISTER_OCL(roi_pooling); REGISTER_OCL(roll); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp index 45f4018bf90dac..d591499148e77a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp @@ -59,6 +59,7 @@ #include "intel_gpu/primitives/resample.hpp" #include "intel_gpu/primitives/reshape.hpp" #include "intel_gpu/primitives/reverse_sequence.hpp" +#include "intel_gpu/primitives/rms.hpp" #include "intel_gpu/primitives/roi_align.hpp" #include "intel_gpu/primitives/roi_pooling.hpp" #include "intel_gpu/primitives/roll.hpp" @@ -146,6 +147,7 @@ REGISTER_OCL(reorg_yolo); REGISTER_OCL(reshape); REGISTER_OCL(reverse); REGISTER_OCL(reverse_sequence); +REGISTER_OCL(rms); REGISTER_OCL(roi_align); REGISTER_OCL(roi_pooling); REGISTER_OCL(roll); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp new file mode 100644 index 00000000000000..71f44e685157b0 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/rms.cpp @@ -0,0 +1,65 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "primitive_base.hpp" + +#include "rms_inst.h" +#include "rms/rms_kernel_selector.h" +#include "rms/rms_kernel_ref.h" + +namespace cldnn { +namespace ocl { + +struct rms_impl : typed_primitive_impl_ocl { + using parent = typed_primitive_impl_ocl; + using parent::parent; + using kernel_selector_t = kernel_selector::rms_kernel_selector; + using kernel_params_t = std::pair; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::rms_impl); + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { + const auto& primitive = impl_param.typed_desc(); + auto params = get_default_params(impl_param, is_shape_agnostic); + auto optional_params = get_default_optional_params(impl_param.get_program()); + + params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); + params.epsilon = primitive->epsilon; + return {params, optional_params}; + } + + void update_dispatch_data(const kernel_impl_params& impl_param) override { + auto kernel_params = get_kernel_params(impl_param, true); + (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); + } +}; + +namespace detail { + +attach_rms_impl::attach_rms_impl() { + auto types = { + data_types::f32, + data_types::f16, + data_types::i32 + }; + + auto formats = { + format::bfyx, + format::bfzyx + }; + + implementation_map::add(impl_types::ocl, + shape_types::any, + typed_primitive_impl_ocl::create, + types, + formats); +} + +} // namespace detail +} // namespace ocl +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/rms_inst.h b/src/plugins/intel_gpu/src/graph/include/rms_inst.h new file mode 100644 index 00000000000000..a7800249f40421 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/rms_inst.h @@ -0,0 +1,44 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "intel_gpu/primitives/rms.hpp" +#include "primitive_inst.h" + +#include + +namespace cldnn { + +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input(size_t index = 0) const { return get_dependency(index); } + std::vector get_shape_infer_dependencies() const override { return {}; } +}; + +using rms_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + +public: + template + static std::vector calc_output_layouts(rms_node const& /*node*/, const kernel_impl_params& impl_params) { + return forward_input0_shape(impl_params); + } + static layout calc_output_layout(rms_node const& node, kernel_impl_params const& impl_params); + static std::string to_string(rms_node const& node); + + typed_primitive_inst(network& network, rms_node const& node); +}; + +using rms_inst = typed_primitive_inst; + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/rms.cpp b/src/plugins/intel_gpu/src/graph/rms.cpp new file mode 100644 index 00000000000000..5002417df57394 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/rms.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_inst.h" + +#include "primitive_type_base.h" +#include "json_object.h" +#include + +namespace cldnn { +GPU_DEFINE_PRIMITIVE_TYPE_ID(rms); + +layout rms_inst::calc_output_layout(rms_node const& node, kernel_impl_params const& impl_param) { + auto desc = impl_param.typed_desc(); + auto input_layout = impl_param.get_input_layout(); + auto output_type = desc->output_data_types[0].value_or(input_layout.data_type); + auto output_format = input_layout.format; + + return layout(output_type, output_format, input_layout.get_tensor()); +} + +std::string rms_inst::to_string(rms_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + + std::stringstream primitive_description; + + json_composite rms_info; + rms_info.add("input_id", node.input(0).id()); + rms_info.add("epsilon", desc->epsilon); + + node_info->add("rms_info", rms_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +rms_inst::typed_primitive_inst(network& network, rms_node const& node) : parent(network, node) {} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl new file mode 100644 index 00000000000000..77d16d06405aa5 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_bfyx_opt.cl @@ -0,0 +1,72 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/batch_headers/fetch_data.cl" + +KERNEL(rms_gpu_bfyx_opt)( + OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* input, + const __global INPUT1_TYPE* gamma, + __global OUTPUT_TYPE* output) +{ + const uint in_data_idx = get_global_id(0); + const uint data_idx = get_global_id(1); + const uint lws_size = LWS; + const uint items_num = VEC_SIZE; + const uint data_size = DATA_SIZE; + const uint total_items_num = lws_size * items_num; +#if !IS_DYNAMIC + const uint leftovers = LEFTOVERS; +#else + const uint leftovers = data_size % items_num; +#endif + + const uint data_offset = data_idx * data_size; + const uint in_data_offset = data_offset + in_data_idx * items_num; + const uint gamma_offset = in_data_idx * items_num; + + ACCUMULATOR_TYPE rms = ACCUMULATOR_VAL_ZERO; + + __local ACCUMULATOR_TYPE slm_buf[SLM_SIZE]; + + INPUT_VEC_TYPE inputs = AS_INPUT_VEC_TYPE(VLOAD(0, input + in_data_offset)); + ACCUMULATOR_VEC_TYPE square = native_powr(TO_ACCUMULATOR_VEC_TYPE(inputs), (ACCUMULATOR_VEC_TYPE)(2)); + unroll_for (uint i = 0; i < VEC_SIZE; ++i) { + rms += square[i]; + } + + if (in_data_idx < leftovers) { + const uint input_idx = data_offset + total_items_num + in_data_idx; + rms += native_powr(TO_ACCUMULATOR_TYPE(input[input_idx]), 2); + } + + slm_buf[in_data_idx] = rms; + + barrier(CLK_LOCAL_MEM_FENCE); + if (in_data_idx == 0) { +#if !IS_DYNAMIC + unroll_for (uint i = 1; i < LWS; ++i) +#else + for (uint i = 1; i < lws_size; ++i) +#endif + rms += slm_buf[i]; + + rms = rms / data_size; + slm_buf[0] = native_powr(sqrt(rms + TO_ACCUMULATOR_TYPE(EPSILON)), -1); + } + barrier(CLK_LOCAL_MEM_FENCE); + + rms = slm_buf[0]; + + OUTPUT_VEC_TYPE results = TO_OUTPUT_VEC_TYPE((ACCUMULATOR_VEC_TYPE)(rms) * TO_ACCUMULATOR_VEC_TYPE(inputs) * AS_ACCUMULATOR_VEC_TYPE(VLOAD(0, gamma + gamma_offset))); + VSTORE(results, 0, output + in_data_offset); + + if (in_data_idx < leftovers) { + const uint input_idx = data_offset + total_items_num + in_data_idx; + const uint output_idx = data_offset + total_items_num + in_data_idx; + const uint gamma_idx = total_items_num + in_data_idx; + OUTPUT_TYPE result = TO_OUTPUT_TYPE(rms * TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(gamma[gamma_idx])); + output[output_idx] = result; + } +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl new file mode 100644 index 00000000000000..88c5eb520d33e3 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rms_gpu_ref.cl @@ -0,0 +1,45 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/fetch_utils.cl" + +KERNEL(rms_gpu_ref)( + OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* input, + const __global INPUT1_TYPE* gamma, + __global OUTPUT_TYPE* output) +{ + const uint b = get_global_id(0); + const uint f = get_global_id(1); + const uint w = 0; + + ACCUMULATOR_TYPE rms = ACCUMULATOR_VAL_ZERO; + for (uint z = 0; z < INPUT0_SIZE_Z; z++) { + for (uint y = 0; y < INPUT0_SIZE_Y; y++) { + for (uint x = 0; x < INPUT0_SIZE_X; x++) { + const uint input_idx = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); + rms += pow(TO_ACCUMULATOR_TYPE(input[input_idx]), 2); + } + } + } + + rms /= INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z; + rms = pow(sqrt(rms + TO_ACCUMULATOR_TYPE(EPSILON)), -1); + + for (uint z = 0; z < INPUT0_SIZE_Z; z++) { + for (uint y = 0; y < INPUT0_SIZE_Y; y++) { + for (uint x = 0; x < INPUT0_SIZE_X; x++) { + const uint input_idx = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); + const uint output_idx = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); +#if INPUT0_DIMS == 4 + const uint gamma_idx = y; +#elif INPUT0_DIMS == 5 + const uint gamma_idx = z; +#endif + OUTPUT_TYPE result = TO_OUTPUT_TYPE(rms) * TO_OUTPUT_TYPE(input[input_idx]) * TO_OUTPUT_TYPE(gamma[gamma_idx]); + output[output_idx] = result; + } + } + } +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 7706da6003fe74..8c841b6001f44d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -97,6 +97,7 @@ enum class KernelType { MULTICLASS_NMS, UNIQUE_COUNT, UNIQUE_GATHER, + RMS, }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp new file mode 100644 index 00000000000000..f93cee2876de93 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_base.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { +bool RMSKernelBase::Validate(const Params& p, const optional_params& o) const { + if (!KernelBaseOpenCL::Validate(p, o)) + return false; + + const rms_params& params = static_cast(p); + auto supported_dyn_layouts = { DataLayout::bfyx, DataLayout::bfzyx }; + if (params.has_dynamic_tensors() && (!layout_is_one_of(params.inputs, supported_dyn_layouts) || !layout_is_one_of(params.outputs, supported_dyn_layouts))) + return false; + + return true; +} + +JitConstants RMSKernelBase::GetJitConstants(const rms_params& params, RMSKernelBase::DispatchData) const { + JitConstants jit = MakeBaseParamsJitConstants(params); + + jit.AddConstant(MakeJitConstant("EPSILON", params.epsilon)); + jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); + + return jit; +} + +RMSKernelBase::DispatchData RMSKernelBase::SetDefault(const rms_params& params) const { + DispatchData dispatchData; + const auto& output = params.outputs[0]; + + dispatchData.gws = {output.Batch().v, output.Feature().v, 1}; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + + return dispatchData; +} + +KernelsData RMSKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const { + assert(params.GetType() == KernelType::RMS); + + if (!Validate(params, options)) + return {}; + + const rms_params& orgParams = static_cast(params); + auto dispatchData = SetDefault(orgParams); + + KernelData kd = KernelData::Default(params); + + auto cldnn_jit = GetJitConstants(orgParams, dispatchData); + auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options); + auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params); + }; + + auto& kernel = kd.kernels[0]; + FillCLKernelData(kernel, + dispatchData, + params.engineInfo, + kernelName, + jit, + entry_point, + EXE_MODE_DEFAULT, + false, + false, + 2, + GetFusedPrimitiveInputsCount(params), + 1, + orgParams.outputs[0].is_dynamic()); + + return {kd}; +} + +Datatype RMSKernelBase::GetAccumulatorType(const rms_params& params) const { + const auto& input_dt = params.inputs[0].GetDType(); + + switch (input_dt) { + case Datatype::F32: + case Datatype::F16: + return Datatype::F32; + case Datatype::INT8: return Datatype::INT32; + case Datatype::UINT8: return Datatype::INT32; + default: return Datatype::F32; + } +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h new file mode 100644 index 00000000000000..546c209bf03d77 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_base.h @@ -0,0 +1,50 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_base_opencl.h" + +namespace kernel_selector { +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// rms_params +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct rms_params : public base_params { + rms_params() : base_params(KernelType::RMS) {} + float epsilon = 0.0f; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// rms_optional_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct rms_optional_params : optional_params { + rms_optional_params() : optional_params(KernelType::RMS) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// RMSKernelBase +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class RMSKernelBase : public KernelBaseOpenCL { +public: + using KernelBaseOpenCL::KernelBaseOpenCL; + virtual ~RMSKernelBase() {} + + struct DispatchData : public CommonDispatchData { + size_t dataSize; + size_t dataCount; + size_t slmSize; + size_t maxSlmSize; + size_t leftovers; + + DispatchData() : dataSize(0), dataCount(0), slmSize(0), maxSlmSize(0), leftovers(0) {} + }; + +protected: + bool Validate(const Params&, const optional_params&) const override; + virtual JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const; + virtual DispatchData SetDefault(const rms_params& params) const; + KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const; + Datatype GetAccumulatorType(const rms_params& params) const; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp new file mode 100644 index 00000000000000..ad49fd86370e0a --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.cpp @@ -0,0 +1,120 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_bfyx_opt.h" +#include "kernel_selector_utils.h" +#include + +namespace kernel_selector { +ParamsKey RMSKernelBfyxOpt::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::bfzyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfzyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDifferentTypes(); + k.EnableDynamicShapesSupport(); + return k; +} + +JitConstants RMSKernelBfyxOpt::GetJitConstants(const rms_params& params, DispatchData dispatchData) const { + auto jit = Parent::GetJitConstants(params, dispatchData); + + if (params.has_dynamic_tensors()) { + const auto& input = params.inputs[0]; + DimensionAccessHelper dims(input); + const std::string data_size = toVectorMulString({dims.x(), dims.y(), dims.z()}); + const std::string lws_0 = "get_local_size(0)"; + jit.AddConstants({ + MakeJitConstant("DATA_SIZE", data_size), + MakeJitConstant("LWS", lws_0), + MakeJitConstant("SLM_SIZE", dispatchData.maxSlmSize) + }); + } else { + jit.AddConstants({ + MakeJitConstant("DATA_SIZE", dispatchData.dataSize), + MakeJitConstant("LWS", dispatchData.slmSize), + MakeJitConstant("SLM_SIZE", dispatchData.slmSize), + MakeJitConstant("LEFTOVERS", dispatchData.leftovers) + }); + } + jit.AddConstants({ + MakeJitConstant("VEC_SIZE", 8), + MakeJitConstant("VLOAD", "CAT(vload, VEC_SIZE)"), + MakeJitConstant("VSTORE", "CAT(vstore, VEC_SIZE)"), + MakeJitConstant("INPUT_VEC_TYPE", "MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE)"), + MakeJitConstant("ACCUMULATOR_VEC_TYPE", "MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, VEC_SIZE)"), + MakeJitConstant("OUTPUT_VEC_TYPE", "MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)"), + MakeJitConstant("AS_INPUT_VEC_TYPE", "CAT(as_, INPUT_VEC_TYPE)"), + MakeJitConstant("AS_ACCUMULATOR_VEC_TYPE", "CAT(as_, ACCUMULATOR_VEC_TYPE)"), + MakeJitConstant("TO_ACCUMULATOR_VEC_TYPE", "CAT(convert_, ACCUMULATOR_VEC_TYPE)"), + MakeJitConstant("TO_OUTPUT_VEC_TYPE", "CAT(convert_, OUTPUT_VEC_TYPE)"), + }); + + return jit; +} + +RMSKernelBase::DispatchData RMSKernelBfyxOpt::SetDefault(const rms_params& params) const { + DispatchData dispatchData; + const auto& input = params.inputs[0]; + + auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType()); + auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi); + dispatchData.maxSlmSize = max_lws; + + if (!params.has_dynamic_tensors()) { + dispatchData.dataSize = input.X().v * input.Y().v * input.Z().v; + dispatchData.dataCount = input.Batch().v * input.Feature().v; + dispatchData.slmSize = dispatchData.dataSize / 8; + dispatchData.leftovers = dispatchData.dataSize % 8; + + dispatchData.gws[0] = dispatchData.slmSize; + dispatchData.gws[1] = dispatchData.dataCount; + dispatchData.gws[2] = 1; + + dispatchData.lws[0] = dispatchData.slmSize; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; + } + return dispatchData; +} + +bool RMSKernelBfyxOpt::Validate(const Params& p, const optional_params& o) const { + if (!Parent::Validate(p, o)) + return false; + + const rms_params& params = static_cast(p); + const auto& gamma = params.inputs[1]; + + if (!gamma.is_dynamic()) { + size_t data_size = gamma.LogicalSize(); + if (data_size < 8) { + return false; + } + auto local_mem_per_wi = 2 * BytesPerElement(params.inputs[0].GetDType()); + auto max_lws = std::min(params.engineInfo.maxWorkGroupSize, params.engineInfo.maxLocalMemSize / local_mem_per_wi); + auto slm_size = data_size / 8; + if (slm_size > max_lws) { + return false; + } + } + + return true; +} + +KernelsData RMSKernelBfyxOpt::GetKernelsData(const Params& params, const optional_params& options) const { + return GetCommonKernelsData(params, options); +} + +KernelsPriority RMSKernelBfyxOpt::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { + return FORCE_PRIORITY_7; +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h new file mode 100644 index 00000000000000..a9b49c4c1cc654 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_bfyx_opt.h @@ -0,0 +1,25 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "rms_kernel_base.h" + +namespace kernel_selector { +class RMSKernelBfyxOpt : public RMSKernelBase { +public: + using Parent = RMSKernelBase; + RMSKernelBfyxOpt() : RMSKernelBase("rms_gpu_bfyx_opt") {} + virtual ~RMSKernelBfyxOpt() {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + +protected: + bool Validate(const Params&, const optional_params&) const override; + DispatchData SetDefault(const rms_params& params) const override; + JitConstants GetJitConstants(const rms_params& params, DispatchData dispatchData) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp new file mode 100644 index 00000000000000..9dbdf30154aea9 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_ref.h" +#include "kernel_selector_utils.h" +#include + +namespace kernel_selector { +ParamsKey RMSKernelRef::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::bfzyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfzyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + k.EnableDifferentTypes(); + k.EnableDynamicShapesSupport(); + return k; +} + +KernelsData RMSKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { + return GetCommonKernelsData(params, options); +} + +KernelsPriority RMSKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { + return FORCE_PRIORITY_9; +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h new file mode 100644 index 00000000000000..7c2e3dd512e8f7 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_ref.h @@ -0,0 +1,20 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "rms_kernel_base.h" + +namespace kernel_selector { +class RMSKernelRef : public RMSKernelBase { +public: + using Parent = RMSKernelBase; + RMSKernelRef() : RMSKernelBase("rms_gpu_ref") {} + virtual ~RMSKernelRef() {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp new file mode 100644 index 00000000000000..13cabf77011d48 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_kernel_selector.h" +#include "rms_kernel_ref.h" +#include "rms_kernel_bfyx_opt.h" + +namespace kernel_selector { +rms_kernel_selector::rms_kernel_selector() { + Attach(); + Attach(); +} + +KernelsData rms_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { + return GetNaiveBestKernel(params, options, KernelType::RMS); +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h new file mode 100644 index 00000000000000..f951264c7f5c6b --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rms/rms_kernel_selector.h @@ -0,0 +1,23 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector { +class rms_kernel_selector : public kernel_selector_base { +public: + static rms_kernel_selector& Instance() { + static rms_kernel_selector instance_; + return instance_; + } + + rms_kernel_selector(); + + virtual ~rms_kernel_selector() {} + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/plugin/ops/rms.cpp b/src/plugins/intel_gpu/src/plugin/ops/rms.cpp new file mode 100644 index 00000000000000..01289bd5022d6d --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/ops/rms.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/op/rms.hpp" +#include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/primitives/rms.hpp" + +namespace ov { +namespace op { +namespace internal { +using RMS = ov::intel_gpu::op::RMS; +} // namespace internal +} // namespace op +} // namespace ov + +namespace ov { +namespace intel_gpu { + +static void CreateRMSOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {2}); + auto inputs = p.GetInputInfo(op); + std::string primitive_name = layer_type_name_ID(op); + + auto get_output_data_types = [&]() { + std::vector output_data_types; + auto type = op->get_output_element_type(0); + output_data_types.push_back(cldnn::element_type_to_data_type(type)); + return output_data_types; + }; + auto rms = cldnn::rms(primitive_name, + inputs[0], + inputs[1], + op->get_epsilon()); + rms.output_data_types = get_output_data_types(); + p.add_primitive(*op, rms); +} + +REGISTER_FACTORY_IMPL(internal, RMS); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp new file mode 100644 index 00000000000000..5dcd12071d1712 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/rms.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/op/rms.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +RMS::RMS(const Output& data, + const Output& gamma, + double epsilson, + const ov::element::Type output_type) + : Op({data, gamma}), m_epsilon(epsilson), m_output_type(output_type) { + validate_and_infer_types(); +} + +bool RMS::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("epsilon", m_epsilon); + visitor.on_attribute("output_type", m_output_type); + return true; +} + +void RMS::validate_and_infer_types() { + auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; + set_output_type(0, output_type, get_input_partial_shape(0)); +} + +std::shared_ptr RMS::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0), + new_args.at(1), + m_epsilon); +} + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp new file mode 100644 index 00000000000000..bcd192454f3d3a --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rms_fusion.hpp" + +#include "intel_gpu/op/rms.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/reduce_mean.hpp" +#include "openvino/op/sqrt.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace intel_gpu { + +static std::function)> constant_value(const float target_value) { + return [=](const ov::Output& output) -> bool { + auto node = std::dynamic_pointer_cast(output.get_node_shared_ptr()); + if (!node) { + return false; + } + float value; + if (!ov::op::util::get_single_value(node, value)) { + return false; + } + return value == target_value; + }; +} + +RMSFusion::RMSFusion() { + using namespace ov::pass::pattern; + + // Detect RMS decomposition pattern + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma + auto x = any_input(); + + // x^2 + auto const_power = wrap_type(constant_value(2)); + auto power = wrap_type({x, const_power}); + + // ReduceMean(x^2,axes) + auto mean_axes = wrap_type(constant_value(-1)); + auto mean = wrap_type({power, mean_axes}); + + // ReduceMean(x^2,axes)+eps + auto eps = wrap_type(); + auto add_eps = wrap_type({mean, eps}); + + // Sqrt(ReduceMean(x^2,axes)+eps) + auto sqrt = wrap_type({add_eps}); + + // 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto const_div = wrap_type(constant_value(-1)); + auto div = wrap_type({sqrt, const_div}); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto mul1 = wrap_type({x, div}); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma + auto gamma = wrap_type(type_matches(element::f32)); + auto mul2 = wrap_type({gamma, mul1}); + + // compress RMS result + auto comp = wrap_type({mul2}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto x_output = pattern_map.at(x); + + auto const_eps_node = + std::dynamic_pointer_cast(pattern_map.at(eps).get_node_shared_ptr()); + float eps_value; + if (!ov::op::util::get_single_value(const_eps_node, eps_value)) { + return false; + } + + const auto& gamma_node = pattern_map.at(gamma).get_node_shared_ptr(); + auto output_type = m.get_match_root()->get_output_element_type(0); + + auto rms = std::make_shared(x_output, + gamma_node, + eps_value, + output_type); + rms->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), rms); + ov::replace_node(m.get_match_root(), rms); + + return true; + }; + + auto m = std::make_shared(comp, "RMSFusion"); + this->register_matcher(m, callback); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp new file mode 100644 index 00000000000000..66f236f3f26c38 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/rms_fusion.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + +class RMSFusion : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("RMSFusion", "0"); + RMSFusion(); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 10275dae95d729..dfc24774fcd26b 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -113,6 +113,7 @@ #include "plugin/transformations/convert_matmul_to_fc.hpp" #include "plugin/transformations/move_fc_reshape_to_weights.hpp" #include "plugin/transformations/convert_fc_to_compressed.hpp" +#include "plugin/transformations/rms_fusion.hpp" #include "transformations/low_precision/mark_dequantization_subgraph.hpp" #include "low_precision/pull_reshape_through_dequantization.hpp" @@ -642,6 +643,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.run_passes(func); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp new file mode 100644 index 00000000000000..2ea4fc415b52f0 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/rms_norm_decomposition.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_models/builders.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +using namespace ngraph; +using namespace ov::test; + +namespace SubgraphTestsDefinitions { +/* + * Input(F32) Const(F32) + * | \ / + * | Power(F32) Const(I64) + * | \ / + * | ReduceMean(F32) + * | | Const(F32) + * | | / + * | Add(F32) + * | | + * | Sqrt(F32) Const(F32) + * | | / + * | Divide(F32) + * | / + * Const(F32) Multiply(F32) + * \ | + * Multiply(F32) + * | + * Convert(F16) + */ +using RMSNormDecompositionParams = std::tuple, // input shapes + ov::test::ElementType, // input precision + std::map>; // additional config + +class RMSNormDecomposition : public testing::WithParamInterface, public SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::vector input_shapes; + ElementType input_precision; + std::map additional_config; + + std::tie(input_shapes, input_precision, additional_config) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (const auto& shape : input_shapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << ")_TS="; + for (const auto& shape : input_shapes) { + result << "("; + if (!shape.second.empty()) { + auto itr = shape.second.begin(); + do { + result << ov::test::utils::vec2str(*itr); + } while (++itr != shape.second.end() && result << "_"); + } + result << ")_"; + } + result << "input_precision=" << input_precision << "_"; + + result << "config=("; + for (const auto& configEntry : additional_config) { + result << configEntry.first << ", " << configEntry.second << ":"; + } + result << ")"; + + return result.str(); + } + +protected: + std::shared_ptr init_subgraph(std::vector& input_shapes, + const ov::Shape& target_shape, + const ov::element::Type input_precision) { + ov::ParameterVector params{std::make_shared(input_precision, input_shapes[0])}; + + // x^2 + auto power_const = ov::opset10::Constant::create(input_precision, {}, {2.f}); + auto power = std::make_shared(params[0], power_const); + + // ReduceMean(x^2,axes) + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + + // ReduceMean(x^2,axes)+eps + auto eps = ov::opset10::Constant::create(input_precision, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + + // Sqrt(ReduceMean(x^2,axes)+eps) + auto sqrt = std::make_shared(add_eps); + + // 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto div_const = ov::opset10::Constant::create(input_precision, {}, {1}); + auto div = std::make_shared(div_const, sqrt); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) + auto mul1 = std::make_shared(params[0], div); + + // x * 1/Sqrt(ReduceMean(x^2,axes)+eps) * gamma + auto dim = *target_shape.rbegin(); + auto gamma = ngraph::builder::makeConstant(input_precision, ov::Shape{dim}, std::vector{}, true); + auto mul2 = std::make_shared(gamma, mul1); + + auto comp = std::make_shared(mul2, ov::element::f16); + + return std::make_shared(NodeVector{comp}, params, "RMSNormDecomposition"); + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector input_shapes; + ElementType input_precision; + std::map additional_config; + + std::tie(input_shapes, input_precision, additional_config) = GetParam(); + + configuration.insert(additional_config.begin(), additional_config.end()); + init_input_shapes(input_shapes); + + inType = outType = input_precision; + + function = init_subgraph(inputDynamicShapes, targetStaticShapes.front().front(), input_precision); + } +}; + +TEST_P(RMSNormDecomposition, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); +} + +namespace { + +const std::vector input_precisions = {ov::element::f32, ov::element::f16}; + +const std::vector> input_shapes_basic = { + {{{-1, -1, 96}, {{1, 4, 96}}}}, + {{{-1, -1, -1}, {{1, 2, 16}}}}, + {{{}, {{1, 2, 6}}}}, + {{{}, {{1, 2, 18}}}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_RMSNormDecomposition_basic, + RMSNormDecomposition, + ::testing::Combine(::testing::ValuesIn(input_shapes_basic), + ::testing::ValuesIn(input_precisions), + ::testing::Values(std::map())), + RMSNormDecomposition::getTestCaseName); +} // namespace + +} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp new file mode 100644 index 00000000000000..deee8418e23fae --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp @@ -0,0 +1,184 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include +#include +#include "rms_inst.h" + +using namespace cldnn; +using namespace ::tests; + +class rms_gpu_test : public ::testing::TestWithParam {}; + +template +void rms_ref(const memory::ptr input, const memory::ptr gamma, memory::ptr output, float epsilon) { + auto input_layout = input->get_layout(); + auto gamma_layout = gamma->get_layout(); + + uint32_t batch_size = input_layout.batch(); + uint32_t feature_size = input_layout.feature(); + uint32_t y_size = input_layout.spatial(1); + uint32_t x_size = input_layout.spatial(0); + + cldnn::mem_lock src(input, get_test_stream()); + cldnn::mem_lock weight(gamma, get_test_stream()); + cldnn::mem_lock dst(output, get_test_stream()); + + for (uint32_t b = 0; b < batch_size; ++b) { + for (uint32_t f = 0; f < feature_size; ++f) { + float rms = 0.f; + for (uint32_t y = 0; y < y_size; ++y) { + for (uint32_t x = 0; x < x_size; ++x) { + auto tensor_src = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); + size_t src_offset = input_layout.get_linear_offset(tensor_src); + rms += std::pow(static_cast(src[src_offset]), 2); + } + } + rms /= y_size * x_size; + rms += epsilon; + rms = std::pow(std::sqrt(rms), -1); + + for (uint32_t y = 0; y < y_size; ++y) { + for (uint32_t x = 0; x < x_size; ++x) { + auto tensor_src = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); + auto tensor_weight = tensor(batch(b), feature(0), spatial(x, y, 0, 0)); + auto tensor_dst = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); + size_t src_offset = input_layout.get_linear_offset(tensor_src); + size_t weight_offset = input_layout.get_linear_offset(tensor_weight); + size_t dst_offset = input_layout.get_linear_offset(tensor_dst); + float result = rms * static_cast(src[src_offset]) * static_cast(weight[weight_offset]); + dst[dst_offset] = static_cast(result); + } + } + } + } +} + +TEST(rms_gpu_test, rms_test_bfyx_ref) { + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx}); + auto gamma = engine.allocate_memory({ov::PartialShape{1, 6}, data_types::f32, format::bfyx}); + auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx}); + + set_values(input, { + 0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, + 0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f + }); + set_values(gamma, { + 0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f + }); + + rms_ref(input, gamma, output_ref, 1e-5f); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("gamma", gamma->get_layout())); + topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f)); + + network network(engine, topology, get_test_default_config(engine)); + + network.set_input_data("input", input); + network.set_input_data("gamma", gamma); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "rms"); + + auto output = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); + + for (unsigned int i = 0; i < output_ref->count(); ++i) { + EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3); + } +} + +TEST(rms_gpu_test, rms_test_bfyx_opt) { + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx}); + auto gamma = engine.allocate_memory({ov::PartialShape{1, 16}, data_types::f32, format::bfyx}); + auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx}); + + set_values(input, { + 0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, -0.005402f, -0.002945f, + 0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f, 0.000568f, 0.002579f, + 0.003098f, -0.006989f, -0.000244f, 0.010193f, 0.002899f, -0.005798f, -0.026978f, 0.008789f, + 0.002258f, 0.006500f, 0.003159f, -0.012329f, 0.026245f, -0.001839f, 0.000259f, 0.002670f + }); + set_values(gamma, { + 0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f, 0.008362f, 0.008179f, + 0.018188f, 0.021973f, 0.005249f, 0.004639f, 0.004272f, 0.020264f, 0.013489f, 0.008789f + }); + + rms_ref(input, gamma, output_ref, 1e-5f); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("gamma", gamma->get_layout())); + topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f)); + + network network(engine, topology, get_test_default_config(engine)); + + network.set_input_data("input", input); + network.set_input_data("gamma", gamma); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "rms"); + + auto output = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); + + for (unsigned int i = 0; i < output_ref->count(); ++i) { + EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3); + } +} + +TEST(rms_gpu_test, rms_test_bfyx_opt_leftovers) { + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ov::PartialShape{1, 2, 18}, data_types::f32, format::bfyx}); + auto gamma = engine.allocate_memory({ov::PartialShape{1, 18}, data_types::f32, format::bfyx}); + auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 18}, data_types::f32, format::bfyx}); + + set_values(input, { + 0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, -0.005402f, -0.002945f, 0.006744f, + -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f, 0.000568f, 0.002579f, 0.003098f, -0.006989f, + -0.000244f, 0.010193f, 0.002899f, -0.005798f, -0.026978f, 0.008789f, 0.002258f, 0.006500f, 0.003159f, + -0.012329f, 0.026245f, -0.001839f, 0.000259f, 0.002670f, 0.001419f, 0.001617f,-0.006622f, 0.010864f + }); + set_values(gamma, { + 0.029785f, 0.014038f, 0.003098f, 0.013123f, 0.015137f, 0.009399f, 0.008362f, 0.008179f, 0.018188f, + 0.021973f, 0.005249f, 0.004639f, 0.004272f, 0.020264f, 0.013489f, 0.008789f, 0.006653f, 0.010315f + }); + + rms_ref(input, gamma, output_ref, 1e-5f); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("gamma", gamma->get_layout())); + topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f)); + + network network(engine, topology, get_test_default_config(engine)); + + network.set_input_data("input", input); + network.set_input_data("gamma", gamma); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "rms"); + + auto output = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); + + for (unsigned int i = 0; i < output_ref->count(); ++i) { + EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3); + } +} diff --git a/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp new file mode 100644 index 00000000000000..26d8638d2b904e --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/rms_norm_decomposition_test.cpp @@ -0,0 +1,144 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +TEST_F(TransformationTestsF, RMSNormFusionTest1) { + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {-1}); + auto div = std::make_shared(sqrt, div_const); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto rms_const = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto rms = std::make_shared(input, rms_const, 1e-5f, ov::element::f16); + + model_ref = std::make_shared(ov::NodeVector{rms}, ov::ParameterVector{input}); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest2) { + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1}); + auto div = std::make_shared(div_const, sqrt); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest3) { + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 2, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1}); + auto div = std::make_shared(sqrt, div_const); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest4) { + { + auto input = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {1}); + auto div = std::make_shared(div_const, sqrt); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, RMSNormFusionTest5) { + { + auto input = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 6}); + auto power_const = ov::opset10::Constant::create(ov::element::f32, {}, {2.f}); + auto power = std::make_shared(input, power_const); + auto mean_axes = ov::opset10::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + auto mean = std::make_shared(power, mean_axes, true); + auto eps = ov::opset10::Constant::create(ov::element::f32, {}, {1e-5f}); + auto add_eps = std::make_shared(mean, eps); + auto sqrt = std::make_shared(add_eps); + auto div_const = ov::opset10::Constant::create(ov::element::f32, {}, {-1}); + auto div = std::make_shared(sqrt, div_const); + auto mul1 = std::make_shared(input, div); + auto gamma = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto mul2 = std::make_shared(gamma, mul1); + auto comp = std::make_shared(mul2, ov::element::f16); + + model = std::make_shared(ov::NodeVector{comp}, ov::ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 6}); + auto rms_const = ov::opset10::Constant::create(ov::element::f32, ov::Shape{6}, {0.029f, 0.014f, 0.003f, 0.013f, 0.015f, 0.009f}); + auto rms = std::make_shared(input, rms_const, 1e-5f, ov::element::f16); + + model_ref = std::make_shared(ov::NodeVector{rms}, ov::ParameterVector{input}); + } +} From 59fe0a05a4d6edd54b5d225c2c34e2adf6a7b340 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Tue, 24 Oct 2023 09:21:30 +0400 Subject: [PATCH 010/275] [GPU] Fix for reorder fusing in fuse_constant_transposes pass (#20651) --- .../graph_optimizer/prepare_primitive_fusing.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 45739c78a1e36a..cc9d8602316fa5 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -1221,6 +1221,8 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { return format::find_format(new_order, fmt.block_sizes()); }; + std::vector> to_replace_nodes; + auto& proc_order = p.get_processing_order(); auto itr = proc_order.begin(); while (itr != proc_order.end()) { @@ -1285,9 +1287,7 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { auto new_reorder = std::make_shared(next_node->id() + "_reorder_fmt", new_const_node.id(), reorder_layout); auto& new_reorder_node = p.get_or_create(new_reorder); - p.replace(*next_node, new_reorder_node); - new_reorder_node.recalc_output_layout(false); - itr = std::find(proc_order.begin(), proc_order.end(), &new_reorder_node); + to_replace_nodes.emplace_back(std::make_pair(next_node, &new_reorder_node)); } else { layout reorder_layout = new_const_node.get_output_layout(); reorder_layout.format = format::bfyx; @@ -1299,6 +1299,11 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { } } } + + for (auto& nodes : to_replace_nodes) { + p.replace(*nodes.first, *nodes.second); + nodes.second->recalc_output_layout(false); + } } void prepare_primitive_fusing::optimize_fused_ops(program& p) { From bc82ba441930efb179c2de9e48900c8fd71bcc36 Mon Sep 17 00:00:00 2001 From: Ivan Novoselov Date: Tue, 24 Oct 2023 06:23:10 +0100 Subject: [PATCH 011/275] [Snippets] Dynamic pipeline reorganization (#18563) --- .../snippets/include/snippets/emitter.hpp | 1 - .../snippets/include/snippets/generator.hpp | 63 ++- .../include/snippets/lowered/expression.hpp | 1 - .../snippets/lowered/expression_factory.hpp | 7 + .../include/snippets/lowered/linear_ir.hpp | 1 + .../lowered/pass/insert_broadcastmove.hpp | 28 ++ .../lowered/pass/softmax_decomposition.hpp | 2 +- .../snippets/lowered/pass/validate_shapes.hpp | 31 ++ .../snippets/op/rank_normalization.hpp | 54 +++ .../snippets/include/snippets/op/subgraph.hpp | 57 ++- .../snippets/pass/align_element_types.hpp | 34 ++ .../snippets/pass/canonicalization.hpp | 39 ++ .../include/snippets/snippets_isa.hpp | 1 + .../include/snippets/snippets_isa_tbl.hpp | 1 + .../include/snippets/target_machine.hpp | 11 +- .../snippets/include/snippets/utils.hpp | 1 + src/common/snippets/src/generator.cpp | 21 +- src/common/snippets/src/lowered/linear_ir.cpp | 68 ++- .../src/lowered/pass/assign_registers.cpp | 17 +- .../src/lowered/pass/insert_broadcastmove.cpp | 90 ++++ .../src/lowered/pass/insert_buffers.cpp | 21 +- .../src/lowered/pass/insert_load_store.cpp | 30 +- .../snippets/src/lowered/pass/mark_loops.cpp | 3 +- .../src/lowered/pass/optimize_domain.cpp | 29 +- .../src/lowered/pass/propagate_layout.cpp | 12 +- .../lowered/pass/softmax_decomposition.cpp | 13 +- .../src/lowered/pass/validate_shapes.cpp | 48 ++ .../snippets/src/lowered/port_descriptor.cpp | 16 +- src/common/snippets/src/op/buffer.cpp | 6 +- .../snippets/src/op/rank_normalization.cpp | 57 +++ src/common/snippets/src/op/scalar.cpp | 13 +- src/common/snippets/src/op/subgraph.cpp | 413 +++++------------- .../snippets/src/pass/align_element_types.cpp | 106 +++++ .../src/pass/broadcast_to_movebroadcast.cpp | 20 +- .../snippets/src/pass/canonicalization.cpp | 84 ++++ .../snippets/src/pass/convert_constants.cpp | 3 +- .../snippets/src/pass/set_softmax_ports.cpp | 8 +- .../src/shape_inference/shape_inference.cpp | 1 + src/common/snippets/src/utils.cpp | 7 +- .../snippets/tests/include/lowering_utils.hpp | 15 +- .../tests/include/pass/canonicalization.hpp | 31 +- .../snippets/tests/src/lowering_utils.cpp | 6 +- .../tests/src/pass/canonicalization.cpp | 143 +++--- .../src/emitters/x64/cpu_generator.cpp | 205 +++++---- .../src/emitters/x64/cpu_generator.hpp | 16 +- .../emitters/x64/jit_snippets_emitters.cpp | 8 +- src/plugins/intel_cpu/src/extension.cpp | 1 + src/plugins/intel_cpu/src/nodes/subgraph.cpp | 250 +++++------ src/plugins/intel_cpu/src/nodes/subgraph.h | 32 +- .../snippets/x64/op/fused_mul_add.cpp | 5 + .../snippets/x64/op/fused_mul_add.hpp | 1 + .../shared_tests_instances/snippets/add.cpp | 3 + .../mul_add_to_fma.cpp | 11 +- 53 files changed, 1318 insertions(+), 827 deletions(-) create mode 100644 src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp create mode 100644 src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp create mode 100644 src/common/snippets/include/snippets/op/rank_normalization.hpp create mode 100644 src/common/snippets/include/snippets/pass/align_element_types.hpp create mode 100644 src/common/snippets/include/snippets/pass/canonicalization.hpp create mode 100644 src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp create mode 100644 src/common/snippets/src/lowered/pass/validate_shapes.cpp create mode 100644 src/common/snippets/src/op/rank_normalization.cpp create mode 100644 src/common/snippets/src/pass/align_element_types.cpp create mode 100644 src/common/snippets/src/pass/canonicalization.cpp diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp index e1ff08abbf7da2..a2aa4923c2eef4 100644 --- a/src/common/snippets/include/snippets/emitter.hpp +++ b/src/common/snippets/include/snippets/emitter.hpp @@ -12,7 +12,6 @@ namespace ov { namespace snippets { -using code = const uint8_t *; using RegInfo = std::pair, std::vector>; /** diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index b0d30f602a5a88..32b44b9e6abc81 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -11,12 +11,32 @@ #include "snippets_isa.hpp" #include "snippets/lowered/linear_ir.hpp" -#include "snippets/lowered/pass/pass.hpp" #include "snippets/shape_types.hpp" +#include "target_machine.hpp" namespace ov { namespace snippets { + +class Generator; +/** + * @interface LoweringResult + * @brief Holds all relevant information produced during lowering + * @param compiled_snippet pointer to interface class that encapsulates compiled binary code + * @param buffer_scratchpad_size the amount of additional memory required by the binary code to execute. + * Must be allocated and freed by the backend. + */ +class LoweringResult { + friend class Generator; + // Some emitters rely on other precompiled kernels. + // We need to keep the pointers to such emitters alive, so the kernels would still be accessible at runtime. + std::vector> m_saved_emitters{}; + +public: + std::shared_ptr compiled_snippet = nullptr; + size_t buffer_scratchpad_size = 0; +}; + /** * @interface Schedule * @brief Return scheduling information and pointer to generated kernel code @@ -26,20 +46,21 @@ class Schedule { public: Schedule() = default; /** - * @brief Default to create schedule out of specific parameters - * @param wd work domain for kernel execution - * @param p pointer to generated code + * @brief Create schedule out of specific parameters + * @param domain work domain for kernel execution + * @param lr lowering result produced during code generation */ - Schedule(const VectorDims& wd, code p) : parallel_exec_domain(wd), ptr(p) {} + Schedule(std::vector&& domain, LoweringResult&& lr) : parallel_exec_domain(domain), lowering_result(lr) {} + Schedule(std::vector domain, LoweringResult&& lr) : parallel_exec_domain(std::move(domain)), lowering_result(lr) {} /** * @brief Returns callable instanse of code pointer */ template K get_callable() const { - return reinterpret_cast(const_cast(ptr)); + return reinterpret_cast(const_cast(lowering_result.compiled_snippet->get_code())); } VectorDims parallel_exec_domain {}; - code ptr {nullptr}; + LoweringResult lowering_result {}; }; /** @@ -52,7 +73,7 @@ class Generator { /** * @brief Default constructor */ - Generator(const std::shared_ptr& t) : target(t), lowered_saved{} {} + Generator(const std::shared_ptr& t) : target(t) {} /** * @brief Default destructor */ @@ -62,17 +83,13 @@ class Generator { * @brief Allows to tweak the lowering process. */ /** - * @brief virtual method any specific implementation should implement - * @param m model in canonical for for table-based code generation - * @param config config with transformation and optimization parameters - * @param compile_params parameters for generated code - * @return pointer to generated code + * @brief generates executable code + * @param linear_ir lowered IR for code generation + * @param result variable to hande the result, only compiled_snippet and m_saved_emitters field will be modified + * @param compile_params compile-time parameters used for code generation + * @return void */ - struct LoweringResult { - LoweringResult(code c) : binary_code(c) {} - code binary_code = nullptr; - }; - LoweringResult generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params = nullptr); + void generate(lowered::LinearIR& linear_ir, LoweringResult& result, const void* compile_params = nullptr) const; /** * @brief gets target machine @@ -96,17 +113,21 @@ class Generator { */ opRegType get_op_reg_type(const std::shared_ptr& op) const; + virtual std::shared_ptr clone() const = 0; + protected: /** * @brief gets register type by specific plugin op type * @return register type */ virtual opRegType get_specific_op_reg_type(const std::shared_ptr& op) const; + /** + * @brief returns true if an emitter can use precompiled kernel. + * @return bool + */ + virtual bool uses_precompiled_kernel(const std::shared_ptr& emitter) const { return false; } std::shared_ptr target; - // todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then). - // This is temporary solution, remove this when kernel caching is implemented. Don't forget to make generate const method. - lowered::LinearIR lowered_saved; }; } // namespace snippets diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp index c5a1b2b8cb6f5e..289e52e0f59a73 100644 --- a/src/common/snippets/include/snippets/lowered/expression.hpp +++ b/src/common/snippets/include/snippets/lowered/expression.hpp @@ -74,7 +74,6 @@ class Expression : public std::enable_shared_from_this { std::vector m_loop_ids{}; std::shared_ptr m_shapeInference{nullptr}; }; -using ExpressionPtr = std::shared_ptr; class IOExpression : public Expression { friend class LinearIR; diff --git a/src/common/snippets/include/snippets/lowered/expression_factory.hpp b/src/common/snippets/include/snippets/lowered/expression_factory.hpp index bb238356dfa9d2..f179abf746c313 100644 --- a/src/common/snippets/include/snippets/lowered/expression_factory.hpp +++ b/src/common/snippets/include/snippets/lowered/expression_factory.hpp @@ -27,6 +27,13 @@ class LinearIR::ExpressionFactory { } return create(n, params...); } + template::value, bool>::type = true> + static ExpressionPtr shallow_copy(const std::shared_ptr& expr) { + if (const auto& io_expr = std::dynamic_pointer_cast(expr)) + return std::make_shared(*io_expr); + else + return std::make_shared(*expr); + } private: /* -- Default Builders - initialize input port connectors from parents and create new output port connectors themselves */ diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp index 8b6a320e18cad7..6d4a357914da39 100644 --- a/src/common/snippets/include/snippets/lowered/linear_ir.hpp +++ b/src/common/snippets/include/snippets/lowered/linear_ir.hpp @@ -116,6 +116,7 @@ class LinearIR { IShapeInferSnippets::Result shape_infer(const std::vector& input_shapes); const std::shared_ptr& get_shape_infer_instance() const {return m_shape_infer; } VectorDims get_master_shape() const; + LinearIR deep_copy() const; private: std::shared_ptr m_shape_infer = nullptr; diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp new file mode 100644 index 00000000000000..fe4f9956d81c66 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/insert_broadcastmove.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "pass.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface InsertMovebroadcast + * @brief Injects explicit Movebroadcast operations when the most varying dim is broadcasted + * @ingroup snippets + */ +class InsertBroadcastMove : public Pass { +public: + OPENVINO_RTTI("InsertBroadcastMove", "Pass") + bool run(LinearIR& linear_ir) override; +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp index 8b5634ebb29fa4..795dc0d3725f1c 100644 --- a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp @@ -18,8 +18,8 @@ namespace pass { */ class SoftmaxDecomposition : public Pass { public: - explicit SoftmaxDecomposition(size_t vector_size); OPENVINO_RTTI("SoftmaxDecomposition", "Pass") + explicit SoftmaxDecomposition(size_t vector_size); bool run(LinearIR& linear_ir) override; private: diff --git a/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp b/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp new file mode 100644 index 00000000000000..08243c96beedf5 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/validate_shapes.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "pass.hpp" + +#include "snippets/lowered/loop_manager.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +/** + * @interface ValidateShapes + * @brief The pass checks that there are no dynamic shapes in the IR + * @ingroup snippets + */ +class ValidateShapes : public Pass { +public: + OPENVINO_RTTI("ValidateShapes", "Pass") + ValidateShapes() = default; + bool run(LinearIR& linear_ir) override; +}; + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/op/rank_normalization.hpp b/src/common/snippets/include/snippets/op/rank_normalization.hpp new file mode 100644 index 00000000000000..c1ed530ce05832 --- /dev/null +++ b/src/common/snippets/include/snippets/op/rank_normalization.hpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" +#include "snippets/shape_inference/shape_inference.hpp" + +namespace ov { +namespace snippets { +namespace op { + +/** + * @interface RankNormalization + * @brief Generated by Canonicalization for rank normalization purposes. It can prepend input shapes with seve1s only first or last dimensions. + * @arg num_prepend - num `1`s that will be inserted at the beginning of the input shape. Any value is allowed. + * @arg num_append - num `1`s that will be inserted at the end of the input shape. Could be either 0 (default) or 1; + * @ingroup snippets + */ + // Note that technically the same goal could be achieved using op::Unsqueeze operation, + // but RankNormalization has a much narrower semantics, and hence allows for an easier control and a more efficient shape infer. + // +class RankNormalization : public ov::op::Op { +public: + OPENVINO_OP("RankNormalization", "SnippetsOpset"); + + RankNormalization() = default; + RankNormalization(const Output& data, size_t num_prepend, size_t num_append); + + void validate_and_infer_types() override; + bool visit_attributes(AttributeVisitor& visitor) override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + size_t get_num_append() const { return m_num_append; } + size_t get_num_prepend() const { return m_num_prepend; } + + class ShapeInfer : public IShapeInferSnippets { + size_t m_num_prepend = 0; + size_t m_num_append = 0; + public: + explicit ShapeInfer(const std::shared_ptr& n); + IShapeInferSnippets::Result + infer(const std::vector& input_shapes) override; + }; + +private: + size_t m_num_prepend = 0; + size_t m_num_append = 0; +}; + +} // namespace op +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index a9321e957e273c..b17031e2a67d1c 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -12,6 +12,7 @@ #include "openvino/core/rt_info.hpp" #include "snippets/pass_manager.hpp" #include "snippets/shape_inference/shape_inference.hpp" +#include "snippets/lowered/pass/pass.hpp" #include "snippets/generator.hpp" @@ -68,7 +69,8 @@ class Subgraph : public ov::op::util::SubGraphOp { // // D = < 1, 3, 17, 15, 32> < 0, 1, 2, 3, 4> // E = < 1, 3, 17, 1, 32> < 0, 1, 2, 3, 4> - using BlockedShape = std::tuple; + using Layout = std::vector; + using BlockedShape = std::pair; using BlockedShapeVector = std::vector; Subgraph() = default; @@ -94,43 +96,36 @@ class Subgraph : public ov::op::util::SubGraphOp { const std::shared_ptr& get_generator() const { return m_generator; } std::shared_ptr& get_generator() { return m_generator; } - size_t get_buffer_scratchpad_size() const { return m_buffer_scratchpad; } size_t get_virtual_port_count() const { return m_virtual_port_count; } bool is_quantized() const { return config.m_is_quantized; } bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; } - snippets::Schedule generate(const BlockedShapeVector& output_shapes, - const BlockedShapeVector& input_shapes, - const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory = nullptr, - const void* compile_params = nullptr); - snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr); - snippets::Schedule generate(const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory = nullptr, + + snippets::Schedule generate(const BlockedShapeVector& blocked_input_shapes = {}, + const std::vector& input_precisions = {}, + const std::vector& output_precisions = {}, + const std::vector& data_flow_passes = {}, + const lowered::pass::PassPipeline& control_flow_passes_pre_common = {}, + const lowered::pass::PassPipeline& control_flow_passes_post_common = {}, + const std::shared_ptr& factory = nullptr, const void* compile_params = nullptr); - snippets::Schedule generate(const void* compile_params = nullptr); - ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); - ov::PartialShape canonicalized_body_shape_infer(const BlockedShapeVector& input_shapes); - std::vector reshape_body(const std::vector& input_shapes); - std::vector reshape_body(const std::vector& input_shapes); + snippets::Schedule generate_from_linear_ir(const lowered::pass::PassPipeline& backend_passes_pre_common = {}, + const lowered::pass::PassPipeline& backend_passes_post_common = {}, + const void* compile_params = nullptr) const; IShapeInferSnippets::Result shape_infer(const std::vector& input_shapes); // plugin sets generator for a snippet to some specific generator. // it's going to be replaced with Jitters table later void set_generator(std::shared_ptr generator); void set_tile_rank(size_t newRank) {tileRank = newRank;} - void set_virtual_port_count(const size_t count); - void set_min_jit_work_amount(const size_t jit_work_amount); - void set_min_parallel_work_amount(const size_t parallel_work_amount); + void set_virtual_port_count(size_t count); + void set_min_jit_work_amount(size_t jit_work_amount); + void set_min_parallel_work_amount(size_t parallel_work_amount); void print() const; void serialize() const; - void set_master_shape(ov::PartialShape new_shape) {master_shape = std::move(new_shape);} + VectorDims infer_master_shape(); static auto wrap_node_as_subgraph(const std::shared_ptr& node) -> std::shared_ptr; static void fill_empty_output_names(const Output& target_output_node, const Output& replacement_output_node); @@ -143,28 +138,30 @@ class Subgraph : public ov::op::util::SubGraphOp { // Return estimated unique buffer count (upper bound). It's needed for tokenization static auto get_estimated_buffer_count(const ov::NodeVector& ops) -> size_t; static auto is_domain_sensitive_op(const std::shared_ptr& op) -> bool; + + void data_flow_transformations(const BlockedShapeVector& blocked_input_shapes = {}, + const std::vector& input_precisions = {}, + const std::vector& output_precisions = {}, + const std::vector& = {}); std::shared_ptr - convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory = std::make_shared()) const; + convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory = std::make_shared()); + std::shared_ptr clone() const; private: - void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes); - void data_flow_transformations(const std::vector& backend_passes); void control_flow_transformations(lowered::LinearIR& linear_ir, + LoweringResult& lowering_result, const lowered::pass::PassPipeline& backend_passes_pre_common, - const lowered::pass::PassPipeline& backend_passes_post_common); + const lowered::pass::PassPipeline& backend_passes_post_common) const; void init_config(); // Count of Subgraph virtual ports: // - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition) // NOTE: To avoid overheads in each calculation of this count (for example, in validate_and_type_infer()), // we should MANUALLY calculate it where it needed. size_t m_virtual_port_count = 0; - size_t m_buffer_scratchpad = 0lu; Shape exec_domain = {}; std::shared_ptr m_generator = nullptr; - ov::PartialShape master_shape; size_t tileRank = 0; // set by plugin to specify the number of dimensions processed in a single kernel call - size_t maxInputRank = 0; std::vector appendOnesForCanonical; std::shared_ptr m_linear_ir = nullptr; diff --git a/src/common/snippets/include/snippets/pass/align_element_types.hpp b/src/common/snippets/include/snippets/pass/align_element_types.hpp new file mode 100644 index 00000000000000..9a8a5ff880aeab --- /dev/null +++ b/src/common/snippets/include/snippets/pass/align_element_types.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/pass.hpp" +#include "transformations_visibility.hpp" +#include "snippets/op/subgraph.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface AlignElementTypes + * @brief Align body precision with expected input/output precision. Insert op::ConvertSaturation if necessary. + * @ingroup snippets + */ +class AlignElementTypes: public ov::pass::ModelPass { +public: + OPENVINO_RTTI("AlignElementTypes"); + AlignElementTypes(std::vector input_precisions, + std::vector output_precisions); + bool run_on_model(const std::shared_ptr& m) override; + +private: + std::vector m_input_precisions; + std::vector m_output_precisions; +}; + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/pass/canonicalization.hpp b/src/common/snippets/include/snippets/pass/canonicalization.hpp new file mode 100644 index 00000000000000..f57218328ca57c --- /dev/null +++ b/src/common/snippets/include/snippets/pass/canonicalization.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/pass.hpp" +#include "transformations_visibility.hpp" +#include "snippets/op/subgraph.hpp" +#include "snippets/shape_types.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface Canonicalization + * @brief Canonicalization inserts RankNormalization (ov::op::Unsqueeze analogue) operations to account for: + * - input ranks mismatch, then inputs with smaller ranks are prepeneded with 1 + * - layouts mismatch (only planar + blocked is supported), planar shapes are postpended with 1 + * @ingroup snippets + */ +class Canonicalization: public ov::pass::ModelPass { +public: + OPENVINO_RTTI("Canonicalization"); + using BlockedShapeVector = op::Subgraph::BlockedShapeVector; + using Layout = std::vector; + explicit Canonicalization(const BlockedShapeVector& blocked_input_shapes); + bool run_on_model(const std::shared_ptr& m) override; + +private: + std::vector m_in_shapes; + std::vector m_in_layouts; + bool m_has_dynamic_inputs = false; +}; + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/snippets_isa.hpp b/src/common/snippets/include/snippets/snippets_isa.hpp index 87579feebb1796..ba85ae68eeb634 100644 --- a/src/common/snippets/include/snippets/snippets_isa.hpp +++ b/src/common/snippets/include/snippets/snippets_isa.hpp @@ -24,6 +24,7 @@ #include "op/loop.hpp" #include "op/brgemm.hpp" #include "op/vector_buffer.hpp" +#include "op/rank_normalization.hpp" namespace ov { namespace snippets { diff --git a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp index b0a87a8a82a1f9..351770bdab746f 100644 --- a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp +++ b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp @@ -22,6 +22,7 @@ OV_OP(Store, ov::snippets::op) OV_OP(BroadcastMove, ov::snippets::op) OV_OP(Scalar, ov::snippets::op) OV_OP(Nop, ov::snippets::op) +OV_OP(RankNormalization, ov::snippets::op) // Layout-oblivious from opset1 diff --git a/src/common/snippets/include/snippets/target_machine.hpp b/src/common/snippets/include/snippets/target_machine.hpp index a4d15463f2972a..d42779bcd7153c 100644 --- a/src/common/snippets/include/snippets/target_machine.hpp +++ b/src/common/snippets/include/snippets/target_machine.hpp @@ -13,6 +13,15 @@ namespace ov { namespace snippets { + +struct CompiledSnippet { + virtual const uint8_t* get_code() const = 0; + virtual size_t get_code_size() const = 0; + virtual bool empty() const = 0; + virtual ~CompiledSnippet() = default; +}; +using CompiledSnippetPtr = std::shared_ptr; + typedef std::pair(const lowered::ExpressionPtr&)>, std::function(const std::shared_ptr&)>> jitters_value; @@ -33,7 +42,7 @@ class TargetMachine { * @brief finalizes code generation * @return generated kernel binary */ - virtual code get_snippet() const = 0; + virtual CompiledSnippetPtr get_snippet() = 0; /** * @brief gets number of lanes supported by target's vector ISA diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 525de3e03b2118..d10930125e0ed0 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -58,6 +58,7 @@ constexpr inline bool implication(bool cause, bool cond) { VectorDims get_planar_vdims(const VectorDims& shape, const std::vector& layout); VectorDims get_planar_vdims(const snippets::lowered::PortDescriptorPtr& port_desc); VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port); +bool is_dynamic_vdims(const VectorDims& shape); } // namespace utils } // namespace snippets diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index 1d1d733277f99b..cede4c4a6e532c 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -15,7 +15,7 @@ namespace ov { namespace snippets { -Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params) { +void Generator::generate(lowered::LinearIR& linear_ir, LoweringResult& result, const void* compile_params) const { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::Generator::generate") OV_ITT_TASK_CHAIN(GENERATE, ov::pass::itt::domains::SnippetsTransform, "Snippets::Generator", "::Transformations") if (!target->is_supported()) @@ -28,7 +28,6 @@ Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, cons lowered_pipeline.register_pass(reg_type_mapper); lowered_pipeline.register_pass(); lowered_pipeline.run(linear_ir); - linear_ir.init_emitters(target); OV_ITT_TASK_NEXT(GENERATE, "::EmitCode") @@ -45,12 +44,15 @@ Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, cons } OV_ITT_TASK_NEXT(GENERATE, "::GetSnippet") - // todo: we save lowered to access compiled brgemm kernels on execution time (normally lowered is destructed by then) - // remove this when kernel caching is implemented. Don't forget to make generate const method. - if (config.m_save_expressions) - lowered_saved = linear_ir; - - return { target->get_snippet() }; + // Note: some emitters use precompiled kernels. They need to be saved, so the kernels are accessible at runtime. + if (linear_ir.get_config().m_save_expressions) { + for (const auto& expr : linear_ir) { + const auto& emitter = expr->get_emitter(); + if (uses_precompiled_kernel(emitter)) + result.m_saved_emitters.emplace_back(emitter); + } + } + result.compiled_snippet = target->get_snippet(); } std::shared_ptr Generator::get_target_machine() const { @@ -63,7 +65,8 @@ Generator::opRegType Generator::get_op_reg_type(const std::shared_ptr& op) std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) + std::dynamic_pointer_cast(op) || + std::dynamic_pointer_cast(op)) return gpr2gpr; else if (std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op)) diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index cc0ace467dd6e3..adf3894f71b8b7 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -122,6 +122,59 @@ LinearIR::container LinearIR::deep_copy_range(LinearIR::container::const_iterato return result; } +LinearIR LinearIR::deep_copy() const { + // todo: implement the same functionality using standard copy constructor + auto clone_ports_descriptors = [](std::vector& ports) { + std::for_each(ports.begin(), ports.end(), [](PortDescriptorPtr& pd) { pd = pd->clone(); }); + }; + const auto& original_lir = *this; + LinearIR new_lir; + new_lir.m_config = original_lir.m_config; + new_lir.m_shape_infer = original_lir.m_shape_infer; + NodeVector original_nodes; + original_nodes.reserve(original_lir.m_expressions.size()); + std::unordered_map connectors_map; + for (const auto& orig_expr : original_lir) { + original_nodes.push_back(orig_expr->get_node()); + const auto& copy_expr = ExpressionFactory::shallow_copy(orig_expr); + clone_ports_descriptors(copy_expr->m_input_port_descriptors); + clone_ports_descriptors(copy_expr->m_output_port_descriptors); + + for (auto& orig_con : copy_expr->m_output_port_connectors) { + const auto& copy_source = copy_expr->get_output_port(orig_con->get_source().get_index()); + const auto& copy_con = std::make_shared(copy_source); + connectors_map[orig_con] = copy_con; + orig_con = copy_con; + } + for (size_t i = 0; i < copy_expr->get_input_count(); i++) { + const auto& copy_connector = connectors_map[copy_expr->get_input_port_connector(i)]; + const auto& copy_consumer = copy_expr->get_input_port(i); + copy_connector->add_consumer(copy_consumer); + copy_expr->replace_input(i, copy_connector); + } + + if (auto io_expr = std::dynamic_pointer_cast(copy_expr)) + new_lir.m_io_expressions.push_back(io_expr); + new_lir.m_expressions.push_back(copy_expr); + } + // node_map and expr_map map original node pointer (expression) to a new pointer (expression) + ngraph::NodeMap node_map; + OPENVINO_SUPPRESS_DEPRECATED_START + ngraph::clone_nodes(original_nodes, node_map); + OPENVINO_SUPPRESS_DEPRECATED_END + new_lir.m_node2expression_map.clear(); + for (const auto& copy_expr : new_lir.m_expressions) { + copy_expr->m_source_node = node_map[copy_expr->m_source_node.get()]; + new_lir.m_node2expression_map[copy_expr->m_source_node] = copy_expr; + } + new_lir.m_loop_manager = std::make_shared(); + // It's Ok to share shapeInfer factory, since LIR doesn't change it + new_lir.m_shape_infer_factory = m_shape_infer_factory; + // Note: shapeInfer stores expression pointers. we re-create it, so shape inference is performed on cloned exprs. + new_lir.m_shape_infer = std::make_shared(new_lir.m_expressions, new_lir.m_io_expressions); + return new_lir; +} + void LinearIR::debug_print(bool tds_as_pointers) const { auto print_rinfo = [](const RegInfo& rinfo) { std::cerr << " : {"; @@ -320,7 +373,7 @@ VectorDims LinearIR::get_master_shape() const { for (const auto& oe : out_exprs) { const auto& port_desc = oe->get_input_port_descriptor(0); OPENVINO_ASSERT(ov::snippets::broadcast_merge_into(master_shape, port_desc->get_shape()), - "Failed to merge input shapes in OptimizeDomain pass"); + "Failed to merge input shapes in infer_master_shape"); } } return master_shape; @@ -339,6 +392,19 @@ LinearIR::LIRShapeInfer::LIRShapeInfer(container& body_exprs, io_container& io_e OPENVINO_THROW("Invalid io expression type detected"); } } + // Note that if all output shapes are static, as in the case when the first shape infer was performed on nGraph, + // we can treat them as the last result + std::vector outputDims; + outputDims.reserve(m_output_exprs.size()); + for (const auto& expr : m_output_exprs) { + const auto &shape = expr->get_input_port_descriptor(0)->get_shape(); + if (utils::is_dynamic_vdims(shape)) { + outputDims.clear(); + break; + } + outputDims.push_back(shape); + } + m_last_result = {outputDims, ShapeInferStatus::success}; } IShapeInferSnippets::Result LinearIR::LIRShapeInfer::infer(const std::vector& input_shapes) { diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp index 638845ec6929ad..7755cfebe7cc38 100644 --- a/src/common/snippets/src/lowered/pass/assign_registers.cpp +++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp @@ -46,12 +46,21 @@ bool AssignRegisters::run(LinearIR& linear_ir) { for (const auto& expr : expressions) { auto op = expr->get_node(); if (const auto io_expr = std::dynamic_pointer_cast(expr)) { - if (io_expr->get_type() == IOExpression::io_type::INPUT) - manually_assigned_gprs[expr->get_output_port_connector(0)] = io_expr->get_index(); - else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) + if (io_expr->get_type() == IOExpression::io_type::INPUT) { + const auto& out_connector = expr->get_output_port_connector(0); + manually_assigned_gprs[out_connector] = io_expr->get_index(); + const auto& consumer_inputs = out_connector->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + // TODO [96434]: Support RankNormalization (Reshape) in arbitrary place in pipeline, not just after inputs + if (ov::is_type(first_consumer->get_node())) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer"); + manually_assigned_gprs[first_consumer->get_output_port_connector(0)] = io_expr->get_index(); + } + } else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) { manually_assigned_gprs[expr->get_input_port_connector(0)] = num_parameters + io_expr->get_index(); - else + } else { OPENVINO_THROW("Unsupported io_type detected"); + } } else if (const auto& buffer = ov::as_type_ptr(op)) { const auto buffer_id = buffer->get_id(); // All buffers have one common data pointer diff --git a/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp b/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp new file mode 100644 index 00000000000000..a70698580a61e3 --- /dev/null +++ b/src/common/snippets/src/lowered/pass/insert_broadcastmove.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/pass/insert_broadcastmove.hpp" +#include "snippets/utils.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/snippets_isa.hpp" +#include "snippets/itt.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +bool InsertBroadcastMove::run(LinearIR& linear_ir) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertBroadcastMove") + bool modified = false; + const auto& loop_manager = linear_ir.get_loop_manager(); + + auto supports_broadcasting = [](const std::shared_ptr& n) { + return ov::op::util::supports_auto_broadcast(n) || + n->get_autob().m_type == ov::op::AutoBroadcastType::NUMPY || + is_type(n); + }; + auto dont_need_broadcasting = [](const ov::Output& v){ + // We don't need to insert BroadcastMove after the following operations: + // - Scalar has emitter with explicit broadcasting + // - VectorBuffer has scalar output shape to avoid broadcast conflicts and manually shape insertion. + // - Fill can be inserted only after VectorBuffer, and should be ignored as well. + return utils::is_scalar_constant(v.get_node_shared_ptr()) || + ov::is_type(v.get_node_shared_ptr()) || + ov::is_type(v.get_node_shared_ptr()); + }; + for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { + const auto& expr = *expr_it; + const auto& node = expr->get_node(); + const auto& descriptors = expr->get_input_port_descriptors(); + if (!supports_broadcasting(node) || descriptors.size() < 2) + continue; + const auto& connectors = expr->get_input_port_connectors(); + OPENVINO_ASSERT(connectors.size() == descriptors.size(), + "Invalid expression configuration: connectors and descriptors size mismatch"); + + std::vector last_dims(descriptors.size()); + std::transform(descriptors.begin(), descriptors.end(), last_dims.begin(), + [](const std::shared_ptr& d){ + return d->get_shape().back(); + }); + const auto broadcasted_dim = *std::max_element(last_dims.begin(), last_dims.end()); + for (size_t i = 0; i < last_dims.size(); i++) { + const auto& parent_port = connectors[i]->get_source(); + if (last_dims[i] != broadcasted_dim && + !dont_need_broadcasting(parent_port.get_expr()->get_node())) { + OPENVINO_ASSERT(last_dims[i] == 1, + "Attempt to broadcast non-1 dimension. Target dim: ", broadcasted_dim, + " This dim: ", last_dims[i]); + auto input_shape = descriptors[i]->get_shape(); + // Note that input_shape could be empty (aka ngraph scalar), so we can't just replace the last dim + if (input_shape.empty()) + input_shape.resize(1); + input_shape.back() = last_dims[i]; + const auto broadcast = std::make_shared(node->get_input_source_output(i), utils::vdims_to_pshape(input_shape)); + + PortDescriptorUtils::set_port_descriptor_ptr(broadcast->output(0), connectors[i]->get_source().get_descriptor_ptr()->clone()); + const auto broadcast_expr = linear_ir.create_expression(broadcast, {connectors[i]}); + linear_ir.insert(expr_it, broadcast_expr); + linear_ir.replace_input(expr->get_input_port(i), broadcast_expr->get_output_port_connector(0)); + // Note that BroadcastMove modified the next expr input shape, so we need to set update + // expr's input port descriptor to reflect the changes + expr->get_input_port_descriptor(i)->set_shape(broadcast_expr->get_output_port_descriptor(0)->get_shape()); + + // Copy Loop identifies + const auto& loop_ids = expr->get_loop_ids(); + broadcast_expr->set_loop_ids(loop_ids); + loop_manager->update_loops_port(loop_ids, expr->get_input_port(0), {broadcast_expr->get_input_port(0)}, true); + + modified = true; + } + } + } + return modified; +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov + diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index 91cbe55ef98b34..da5ffc11c3169d 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -35,10 +35,9 @@ std::vector get_buffer_loop_ids(const std::vector& lhs, const st ov::Shape compute_allocation_shape(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& buffer_loop_ids, const std::vector& parent_loop_ids, - const ov::Output& parent_output, + const ExpressionPort& expr_port, const int allocation_rank) { - const auto& port = lowered::PortDescriptorUtils::get_port_descriptor_ptr(parent_output); - const auto planar_shape = utils::get_planar_vdims(port); + const auto& planar_shape = utils::get_planar_vdims(expr_port); const size_t rank = allocation_rank >= 0 ? std::min(static_cast(allocation_rank), planar_shape.size()) : planar_shape.size(); ov::Shape allocation_shape(rank); @@ -123,9 +122,9 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& for (const auto& entry_point : loop_entries) { const auto& entry_port = entry_point.expr_port; const auto& expr = entry_port->get_expr(); - const auto port = entry_port->get_index(); + const auto port_idx = entry_port->get_index(); const auto node = expr->get_node(); - const auto& input_connector = expr->get_input_port_connector(port); + const auto& input_connector = expr->get_input_port_connector(port_idx); const auto& parent_expr_output = input_connector->get_source(); const auto& parent_expr = parent_expr_output.get_expr(); const auto parent_port = parent_expr_output.get_index(); @@ -140,7 +139,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto parent_ma = ov::as_type_ptr(parent); const auto node_ma = ov::as_type_ptr(node); bool is_buffer_needed = (parent_ma && parent_ma->is_memory_access_output_port(parent_port)) || - (node_ma && node_ma->is_memory_access_input_port(port)); + (node_ma && node_ma->is_memory_access_input_port(port_idx)); const auto current_loops = expr->get_loop_ids(); const auto parent_loops = parent_expr->get_loop_ids(); const auto buffer_loop_ids = get_buffer_loop_ids(current_loops, parent_loops, is_buffer_needed); @@ -154,7 +153,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto allocation_shape = compute_allocation_shape(loop_manager, buffer_loop_ids, parent_loops, - parent->output(parent_port), + parent_expr_output, m_buffer_allocation_rank); const auto buffer = std::make_shared(parent->output(parent_port), allocation_shape); PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone()); @@ -169,7 +168,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& for (const auto& exit_point : loop_exits) { const auto& exit_port = exit_point.expr_port; const auto& expr = exit_port->get_expr(); - const auto port = exit_port->get_index(); + const auto port_idx = exit_port->get_index(); const auto node = expr->get_node(); const auto output_connector = exit_port->get_port_connector_ptr(); const auto child_exprs_inputs = output_connector->get_consumers(); @@ -200,7 +199,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto child_ma = ov::as_type_ptr(child); const auto node_ma = ov::as_type_ptr(node); bool is_buffer_needed = (child_ma && child_ma->is_memory_access_input_port(child_port)) || - (node_ma && node_ma->is_memory_access_output_port(port)); + (node_ma && node_ma->is_memory_access_output_port(port_idx)); const auto local_buffer_loop_ids = get_buffer_loop_ids(current_loops, child_expr->get_loop_ids(), is_buffer_needed); if (is_buffer_needed) { @@ -247,9 +246,9 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::constExprIt& const auto allocation_shape = compute_allocation_shape(loop_manager, buffer_loop_ids, current_loops, - node->output(port), + *exit_port, m_buffer_allocation_rank); - auto buffer = std::make_shared(node->output(port), allocation_shape); + auto buffer = std::make_shared(node->output(port_idx), allocation_shape); PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), exit_port->get_descriptor_ptr()->clone()); // We cannot insert Node output connector on Buffer output because not all consumers of Node needs Buffer // Example: diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp index 47fa93f699354b..ff75a5be0e6c5c 100644 --- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp +++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp @@ -3,7 +3,7 @@ // #include "snippets/lowered/pass/insert_load_store.hpp" - +#include "snippets/op/rank_normalization.hpp" #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" @@ -30,14 +30,18 @@ size_t InsertLoadStore::get_count(const PortDescriptorPtr& port_desc) const { } bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) { + std::shared_ptr data_expr = *data_expr_it; + auto consumer_inputs = data_expr->get_output_port_connector(0)->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + if (is_type(first_consumer->get_node())) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer"); + data_expr = first_consumer; + } const auto& loop_manager = linear_ir.get_loop_manager(); - const auto& data_expr = *data_expr_it; - const auto& data_node = data_expr->get_node(); + const auto& data_ngraph_output = data_expr->get_node()->output(0); const auto& output_connector = data_expr->get_output_port_connector(0); - const auto consumer_inputs = output_connector->get_consumers(); - bool was_inserted = false; - for (const auto& consumer_input : consumer_inputs) { + for (const auto& consumer_input : output_connector->get_consumers()) { const auto& consumer_expr = consumer_input.get_expr(); const auto port = consumer_input.get_index(); const auto& consumer = consumer_expr->get_node(); @@ -46,7 +50,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr return false; const auto loop_ids = consumer_expr->get_loop_ids(); - const auto load = std::make_shared(data_node->output(0), get_count(data_expr->get_output_port_descriptor(0))); + const auto load = std::make_shared(data_ngraph_output, get_count(data_expr->get_output_port_descriptor(0))); PortDescriptorUtils::set_port_descriptor_ptr(load->output(0), consumer_input.get_descriptor_ptr()->clone()); const auto load_expr = linear_ir.create_expression(load, {output_connector}); linear_ir.insert(linear_ir.find_after(data_expr_it, consumer_expr), load_expr); @@ -55,7 +59,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr load_expr->set_loop_ids(loop_ids); // Need to update all the corresponding Loops with the same Entry Point - const auto prev_entry_point = consumer_input; + const auto& prev_entry_point = consumer_input; const auto new_entry_point = load_expr->get_input_port(0); loop_manager->update_loops_port(loop_ids, prev_entry_point, {new_entry_point}, true); was_inserted = true; @@ -116,20 +120,14 @@ bool InsertLoadStore::run(LinearIR& linear_ir) { const auto& node = expr->get_node(); if (ov::is_type(node)) { modified |= insert_load(linear_ir, expr_it); - continue; - } - if (ov::is_type(node)) { + } else if (ov::is_type(node)) { modified |= insert_store(linear_ir, expr_it); - continue; - } - if (auto buffer = ov::as_type_ptr(node)) { + } else if (auto buffer = ov::as_type_ptr(node)) { modified |= insert_load(linear_ir, expr_it); if (buffer->is_intermediate_memory()) modified |= insert_store(linear_ir, expr_it); - continue; } } - return modified; } diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index 86246ce61f1be6..05d38e111927c4 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -29,7 +29,8 @@ bool MarkLoops::run(LinearIR& linear_ir) { auto is_not_start_point = [](const std::shared_ptr& node) { return ov::is_type(node) || ov::is_type(node) || - ov::is_type(node); + ov::is_type(node) || + ov::is_type(node); }; auto are_conflicted = [](const ExpressionPort& lhs, const ExpressionPort& rhs) { diff --git a/src/common/snippets/src/lowered/pass/optimize_domain.cpp b/src/common/snippets/src/lowered/pass/optimize_domain.cpp index 09061e63250464..f2d2fd43baf96c 100644 --- a/src/common/snippets/src/lowered/pass/optimize_domain.cpp +++ b/src/common/snippets/src/lowered/pass/optimize_domain.cpp @@ -8,6 +8,7 @@ #include "snippets/lowered/linear_ir.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/shape_inference/shape_inference.hpp" +#include "snippets/utils.hpp" namespace ov { @@ -79,18 +80,28 @@ bool OptimizeDomain::run(snippets::lowered::LinearIR& linear_ir) { return false; } OPENVINO_ASSERT(config.m_min_parallel_work_amount != 0, "OptimizeDomain: Min parallel work amount can't equal to zero"); - std::vector> input_exprs; std::vector input_shapes; VectorDims master_shape = linear_ir.get_master_shape(); - for (const auto& expr : linear_ir.get_IO_ops()) { - if (expr->get_type() == snippets::lowered::IOExpression::io_type::INPUT) { - input_exprs.push_back(expr); - const auto& shape = expr->get_output_port_descriptor(0)->get_shape(); + bool blocked_input_shapes = false; + for (const auto& io_expr : linear_ir.get_IO_ops()) { + if (io_expr->get_type() == snippets::lowered::IOExpression::io_type::INPUT) { + auto consumer_inputs = io_expr->get_output_port_connector(0)->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + if (auto rank_norm = as_type_ptr(first_consumer->get_node())) { + // If RankNormalization appends dims, then the appended dims will be broadcasted + // so collapsing is not allowed. We may increment tile rank though. + if (rank_norm->get_num_append() != 0) + blocked_input_shapes = true; + // If RankNormalization prepends dims, then the dims should be ignored during domain optimization + // to avoid passing already incremented shapes to linear_ir.shape_infer() + } + const ExpressionPtr& shape_producing_expr = blocked_input_shapes ? + first_consumer : + io_expr; + const auto& shape = utils::get_planar_vdims(shape_producing_expr->get_output_port_descriptor(0)); OPENVINO_ASSERT(std::none_of(shape.begin(), shape.end(), [](size_t d) {return d == snippets::IShapeInferSnippets::DYNAMIC_DIMENSION; }), "OptimizeDomain pass does not support dynamic shapes"); - OPENVINO_ASSERT(ov::snippets::broadcast_merge_into(master_shape, shape), - "Failed to merge input shapes in OptimizeDomain pass"); input_shapes.emplace_back(shape); } } @@ -98,7 +109,9 @@ bool OptimizeDomain::run(snippets::lowered::LinearIR& linear_ir) { master_shape.end(), (size_t)1, std::multiplies()); - const auto num_dims_collapsed = optimize(input_shapes, + const auto num_dims_collapsed = blocked_input_shapes ? + 0 : + optimize(input_shapes, master_shape, total_work_amount, config.m_min_parallel_work_amount, diff --git a/src/common/snippets/src/lowered/pass/propagate_layout.cpp b/src/common/snippets/src/lowered/pass/propagate_layout.cpp index 7b69c82777d90e..aea3cf99858622 100644 --- a/src/common/snippets/src/lowered/pass/propagate_layout.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_layout.cpp @@ -19,23 +19,25 @@ bool PropagateLayout::run(LinearIR& linear_ir) { if (linear_ir.empty()) return false; - for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) { - const auto& expr = *expr_it; + for (const auto& expr : linear_ir) { const auto io_expr = std::dynamic_pointer_cast(expr); if (!io_expr) continue; const bool is_input = io_expr->get_type() == IOExpression::io_type::INPUT; const auto& connectors = is_input ? expr->get_output_port_connectors() : expr->get_input_port_connectors(); - if (connectors.size() != 1) - OPENVINO_THROW("Parameter/Results should have exactly one output/input"); + OPENVINO_ASSERT(connectors.size() == 1, "Parameter/Results should have exactly one output/input"); // If input - we should be looking downstream, if output - upstream const auto& target_connector = connectors.front(); if (is_input) { - const auto consumer_inputs = target_connector->get_consumers(); // Note that here we consider only the first child (which is usually load), // but often there is another child - LoopEnd + auto consumer_inputs = target_connector->get_consumers(); + const auto& first_consumer = consumer_inputs.begin()->get_expr(); + // If there is a RankNormalization op after a parameter - we should skip it + if (is_type(first_consumer->get_node())) + consumer_inputs = first_consumer->get_output_port_connector(0)->get_consumers(); std::set> child_layouts; for (const auto& child_input : consumer_inputs) { const auto& child = child_input.get_expr(); diff --git a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp index e868d75e5dd5ea..b434e0f974beb3 100644 --- a/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/softmax_decomposition.cpp @@ -44,13 +44,15 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Float constant values in byte representation const auto float_min_constant = uint32_t(0xff7fffff); const auto zero_constant = uint32_t(0x00000000); - + const bool is_dynamic = softmax->is_dynamic(); // We need an iterator to the inserted element - auto push_node = [&linear_ir, &expr_it](const std::shared_ptr& n) { + auto push_node = [&linear_ir, &expr_it, is_dynamic](const std::shared_ptr& n) { const auto expr = linear_ir.insert(expr_it, n); + if (is_dynamic) + expr->get()->updateShapes(); return std::make_pair(expr, n); }; - + const ov::PartialShape broadcasted_shape(softmax_expr->get_input_port_descriptor(0)->get_shape()); // Note: VectorBuffer is a special case, since it should go before the initial Load. So we handle it separately const auto& vector_buffer_max = push_node(std::make_shared()); // Init value of vector buffer for ReduceMax is -FLOAT_MIN. @@ -65,9 +67,8 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { std::vector{(*max.first)->get_input_port(0), (*max.first)->get_input_port(1)}, std::vector{(*max.first)->get_output_port(0)}); - const auto broadcast_horizon_max = push_node( - std::make_shared(horizon_max.second, horizon_max.second->get_input_partial_shape(0))); + std::make_shared(horizon_max.second, broadcasted_shape)); const auto vector_buffer_sum = push_node(std::make_shared()); // Init value of vector buffer for ReduceSum is zero. const auto fill_sum = push_node(std::make_shared(vector_buffer_sum.second, 0, zero_constant)); @@ -89,7 +90,7 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) { // Divide is expensive operation, so we decompose it into 1 / x * y, where 1 / x is executed outside loop const auto pow = push_node(std::make_shared(horizon_sum.second, -1.f)); - const auto broadcast_pow = push_node(std::make_shared(pow.second, horizon_sum.second->get_input_partial_shape(0))); + const auto broadcast_pow = push_node(std::make_shared(pow.second, broadcasted_shape)); // Mul (pseudo-Divide loop) const auto mul = push_node(std::make_shared(exp.second, broadcast_pow.second)); diff --git a/src/common/snippets/src/lowered/pass/validate_shapes.cpp b/src/common/snippets/src/lowered/pass/validate_shapes.cpp new file mode 100644 index 00000000000000..8d12004313e0bf --- /dev/null +++ b/src/common/snippets/src/lowered/pass/validate_shapes.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/pass/validate_shapes.hpp" + +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/shape_inference/shape_inference.hpp" +#include "snippets/itt.hpp" + +namespace ov { +namespace snippets { +namespace lowered { +namespace pass { + +bool ValidateShapes::run(LinearIR& linear_ir) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ValidateShapes") + + for (const auto& expr : linear_ir) { + const auto num_inputs = expr->get_input_count(); + const auto& port_connectors = expr->get_input_port_connectors(); + const auto& port_descriptors = expr->get_input_port_descriptors(); + OPENVINO_ASSERT(port_connectors.size() == num_inputs, "Invalid number of port connectors detected"); + OPENVINO_ASSERT(port_descriptors.size() == num_inputs, "Invalid number of port descriptors detected"); + for (size_t i = 0; i < num_inputs; i++) { + const auto& descr = port_descriptors[i]; + const auto& layout = descr->get_layout(); + const auto& shape = descr->get_shape(); + const auto& n = expr->get_node(); + OPENVINO_ASSERT(std::none_of(shape.begin(), shape.end(), + [](size_t d) {return d == IShapeInferSnippets::DYNAMIC_DIMENSION;}), + "Dynamic dimensions are not allowed at this point of pipeline. ", + "Check the expr for node ", n->get_friendly_name()); + OPENVINO_ASSERT(layout.size() == shape.size(), "Layout and shape sizes must match. ", + "Check the expr for node ", n->get_friendly_name()); + const auto& parent_desc = port_connectors[i]->get_source().get_descriptor_ptr(); + const auto& parent_shape = parent_desc->get_shape(); + OPENVINO_ASSERT(parent_shape == shape, "Parent shape must be equal to the expression shape. ", + "Check the expr for node ", n->get_friendly_name()); + } + } + return false; +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/src/lowered/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp index 96e8c718cc972b..e8c4bdd0626b47 100644 --- a/src/common/snippets/src/lowered/port_descriptor.cpp +++ b/src/common/snippets/src/lowered/port_descriptor.cpp @@ -3,6 +3,7 @@ // #include "snippets/lowered/port_descriptor.hpp" +#include namespace ov { namespace snippets { @@ -12,13 +13,15 @@ size_t PortDescriptor::ServiceDimensions::FULL_DIM = SIZE_MAX; PortDescriptor::PortDescriptor(const ov::Input& in, VectorDims subtensor_shape, std::vector layout) : PortDescriptor(ov::Input(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {} -PortDescriptor::PortDescriptor(const ov::Input& in, VectorDims subtensor_shape, std::vector layout) - : PortDescriptor(in.get_shape(), std::move(subtensor_shape), std::move(layout)) {} + +PortDescriptor::PortDescriptor(const ov::Input& in, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(utils::pshape_to_vdims(in.get_partial_shape()), std::move(subtensor_shape), std::move(layout)) {} PortDescriptor::PortDescriptor(const ov::Output& out, VectorDims subtensor_shape, std::vector layout) : PortDescriptor(ov::Output(out.get_node(), out.get_index()), std::move(subtensor_shape), std::move(layout)) {} -PortDescriptor::PortDescriptor(const ov::Output& out, VectorDims subtensor_shape, std::vector layout) - : PortDescriptor(out.get_shape(), std::move(subtensor_shape), std::move(layout)) {} + +PortDescriptor::PortDescriptor(const ov::Output& out, std::vector subtensor_shape, std::vector layout) + : PortDescriptor(utils::pshape_to_vdims(out.get_partial_shape()), std::move(subtensor_shape), std::move(layout)) {} PortDescriptor::PortDescriptor(VectorDims shape, VectorDims subtensor_shape, std::vector layout) : m_tensor_shape(std::move(shape)), m_layout(std::move(layout)), m_subtensor_shape(std::move(subtensor_shape)) { @@ -30,13 +33,12 @@ void PortDescriptor::validate_arguments() { m_layout.resize(m_tensor_shape.size()); // NCHW layout by default std::iota(m_layout.begin(), m_layout.end(), 0); - } else if (m_layout.size() != m_tensor_shape.size()) { - OPENVINO_THROW("Snippets tensor descriptor: Layout size must be equal to the shape size"); } + OPENVINO_ASSERT(m_layout.size() == m_tensor_shape.size(), "Snippets tensor descriptor: Layout size must be equal to the shape size"); } PortDescriptorPtr PortDescriptor::clone() const { - const auto desc = std::make_shared(m_tensor_shape, m_subtensor_shape, m_layout); + auto desc = std::make_shared(m_tensor_shape, m_subtensor_shape, m_layout); desc->set_reg(m_reg); return desc; } diff --git a/src/common/snippets/src/op/buffer.cpp b/src/common/snippets/src/op/buffer.cpp index 8b703fa0c29a16..615979ec5e3281 100644 --- a/src/common/snippets/src/op/buffer.cpp +++ b/src/common/snippets/src/op/buffer.cpp @@ -46,15 +46,13 @@ bool Buffer::visit_attributes(AttributeVisitor& visitor) { void Buffer::validate_and_infer_types() { INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types); - ov::Shape output_shape; + ov::PartialShape output_shape; if (m_type == Type::NewMemory) { OPENVINO_ASSERT(get_input_size() == 0, "Buffer with new allocated memory must to not have arguments!"); output_shape = m_shape; } else if (m_type == Type::IntermediateMemory) { - const auto& input_shape = get_input_partial_shape(0); - OPENVINO_ASSERT(input_shape.is_static(), "Buffer supports only static input shape"); m_element_type = get_input_element_type(0); - output_shape = input_shape.get_shape(); + output_shape = get_input_partial_shape(0); } else { OPENVINO_THROW("Buffer supports only the following types: NewMemory and IntermediateMemory"); } diff --git a/src/common/snippets/src/op/rank_normalization.cpp b/src/common/snippets/src/op/rank_normalization.cpp new file mode 100644 index 00000000000000..5dfd46492a1946 --- /dev/null +++ b/src/common/snippets/src/op/rank_normalization.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/rank_normalization.hpp" +#include "snippets/utils.hpp" + +namespace ov { +namespace snippets { +namespace op { + +RankNormalization::RankNormalization(const Output& data, size_t num_prepend, size_t num_append) : + Op({data}), m_num_prepend(num_prepend), m_num_append(num_append) { + constructor_validate_and_infer_types(); +} + + +std::shared_ptr RankNormalization::clone_with_new_inputs(const OutputVector& new_args) const { + check_new_args_count(this, new_args); + return std::make_shared(new_args[0], m_num_prepend, m_num_append); +} + +void RankNormalization::validate_and_infer_types() { + auto new_shape = get_input_partial_shape(0); + // Note: other values are not allowed, only planar + blocked layout combination can be normalized. + NODE_VALIDATION_CHECK(this, utils::one_of(m_num_append, 0lu, 1lu), + "num_append could be only 0 or 1, other values are not allowed."); + new_shape.insert(new_shape.begin(), m_num_prepend, Dimension(1)); + new_shape.insert(new_shape.end(), m_num_append, Dimension(1)); + set_output_type(0, get_input_element_type(0), new_shape); +} + +bool RankNormalization::visit_attributes(AttributeVisitor& visitor) { + visitor.on_attribute("num_prepend", m_num_prepend); + visitor.on_attribute("num_append", m_num_append); + return true; +} + +RankNormalization::ShapeInfer::ShapeInfer(const std::shared_ptr& n) { + const auto& rank_norm = as_type_ptr(n); + OPENVINO_ASSERT(rank_norm, "Invalid operation passed to RankNormalization::ShapeInfer: ", n->get_type_info().name); + m_num_append = rank_norm->m_num_append; + m_num_prepend = rank_norm->m_num_prepend; +} + +IShapeInferSnippets::Result +RankNormalization::ShapeInfer::infer(const std::vector& input_shapes) { + OPENVINO_ASSERT(input_shapes.size() == 1, "Invalid number of input shapes passed to RankNormalization::ShapeInfer::infer"); + VectorDims out_shape = input_shapes[0].get(); + out_shape.insert(out_shape.begin(), m_num_prepend, 1); + out_shape.insert(out_shape.end(), m_num_append, 1); + return {{out_shape}, ShapeInferStatus::success}; +} + +} // namespace op +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/op/scalar.cpp b/src/common/snippets/src/op/scalar.cpp index 029a2e613f28d2..4efd1716a6fb94 100644 --- a/src/common/snippets/src/op/scalar.cpp +++ b/src/common/snippets/src/op/scalar.cpp @@ -4,14 +4,17 @@ #include "snippets/op/scalar.hpp" +namespace ov { +namespace snippets { +namespace op { -std::shared_ptr ov::snippets::op::Scalar::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Scalar::clone_with_new_inputs(const OutputVector& new_args) const { check_new_args_count(this, new_args); return std::make_shared(*this); } // Scalar currently supports only one-element constants, this could be changed in the future -void ov::snippets::op::Scalar::validate_and_infer_types() { +void Scalar::validate_and_infer_types() { Constant::validate_and_infer_types(); auto out_pshape = get_output_partial_shape(0); NODE_VALIDATION_CHECK(this, out_pshape.is_static(), "Scalar supports only static input shapes"); @@ -20,7 +23,7 @@ void ov::snippets::op::Scalar::validate_and_infer_types() { " shape"); } -bool ov::snippets::op::Scalar::visit_attributes(AttributeVisitor& visitor) { +bool Scalar::visit_attributes(AttributeVisitor& visitor) { auto shape = get_output_shape(0); auto type = get_output_element_type(0); auto value = cast_vector(); @@ -29,3 +32,7 @@ bool ov::snippets::op::Scalar::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("value", value); return true; } + +} // namespace op +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index dc13bb3e8bb716..fccecfa8ab5f32 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -6,9 +6,7 @@ #include "snippets/remarks.hpp" #include "snippets/op/subgraph.hpp" -#include "snippets/op/convert_saturation.hpp" -#include "snippets/pass/insert_movebroadcast.hpp" #include "snippets/pass/broadcast_to_movebroadcast.hpp" #include "snippets/pass/propagate_precision.hpp" #include "snippets/pass/convert_constants.hpp" @@ -17,6 +15,9 @@ #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/pass/set_softmax_ports.hpp" +#include "snippets/pass/canonicalization.hpp" +#include "snippets/pass/align_element_types.hpp" +#include "snippets/lowered/pass/validate_shapes.hpp" #include "snippets/utils.hpp" @@ -29,6 +30,7 @@ #include "snippets/lowered/pass/init_loops.hpp" #include "snippets/lowered/pass/insert_buffers.hpp" #include "snippets/lowered/pass/insert_load_store.hpp" +#include "snippets/lowered/pass/insert_broadcastmove.hpp" #include "snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp" #include "snippets/lowered/pass/allocate_buffers.hpp" #include "snippets/lowered/pass/propagate_layout.hpp" @@ -61,7 +63,7 @@ namespace snippets { namespace op { void Subgraph::set_generator(std::shared_ptr generator) { - m_generator = generator; + m_generator = std::move(generator); } void Subgraph::set_virtual_port_count(const size_t count) { @@ -171,36 +173,6 @@ std::shared_ptr Subgraph::clone_with_new_inputs(const OutputVector& inputs return make_shared(inputs, body().clone()); } -std::vector Subgraph::reshape_body(const std::vector& input_shapes) { - auto& params = body_ptr()->get_parameters(); - OPENVINO_ASSERT(params.size() == input_shapes.size(), "Got invalid number of input shapes to reshape subgraph body"); - for (size_t i = 0; i < params.size(); ++i) { - params[i]->set_partial_shape(input_shapes[i]); - } - body_ptr()->validate_nodes_and_infer_types(); - std::vector output_shapes; - for (const auto& res : body_ptr()->get_results()) { - output_shapes.emplace_back(res->get_input_partial_shape(0)); - } - return output_shapes; -} - -std::vector Subgraph::reshape_body(const std::vector& input_shapes) { - auto& params = body_ptr()->get_parameters(); - OPENVINO_ASSERT(params.size() == input_shapes.size(), "Got invalid number of input shapes to reshape subgraph body"); - for (size_t i = 0; i < params.size(); ++i) { - params[i]->set_partial_shape(input_shapes[i]); - } - body_ptr()->validate_nodes_and_infer_types(); - std::vector output_shapes; - for (const auto& res : body_ptr()->get_results()) { - auto pshape = res->get_input_partial_shape(0); - OPENVINO_ASSERT(pshape.is_static(), "Subgraph inferred dynamic output shape during reshape with static inputs"); - output_shapes.emplace_back(res->get_input_partial_shape(0).get_shape()); - } - return output_shapes; -} - void Subgraph::validate_and_infer_types() { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::validate_and_infer_types") @@ -311,166 +283,6 @@ auto Subgraph::constant_input_should_be_inside_body(const std::shared_ptr(node); } -/// -/// \brief Canonization transforms original subgraph and to canonical form suitable for code generation. In particular, -/// it handles supported layout conversions, broadcasts inputs and outputs to a single rank and layout. Canonicalization -/// returns master-shape (max rank + max dimensions over all outputs) that can be used for scheduling. -/// Canonicalization currently supports only the following layout conversions: -/// * None: all inputs have the same layout -/// * Planar + blocked: some inputs have blocked, and some have planar layouts, e.g. + -/// Also there is precision aligning inside body of subgraph during canonicalization -ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& outputShapes, - const BlockedShapeVector& inputShapes) { - INTERNAL_OP_SCOPE(Subgraph); - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::canonicalize") - NODE_VALIDATION_CHECK(this, inputShapes.size() == body_ptr()->get_parameters().size(), - "Number of parameters for snippet doesn't match passed to generate method: ", - inputShapes.size(), " vs ", body_ptr()->get_parameters().size(), "."); - - NODE_VALIDATION_CHECK(this, outputShapes.size() == body_ptr()->get_results().size(), - "number of results for snippet doesn't match passed to generate method: ", - outputShapes.size(), " vs ", body_ptr()->get_results().size(), "."); - - auto getMaxRankBlockedShape = [](const BlockedShapeVector& blockedShapes) -> const BlockedShape& { - return *std::max_element(blockedShapes.begin(), blockedShapes.end(), - [&](const BlockedShape& lhs, const BlockedShape& rhs) { - return std::get<0>(lhs).size() < std::get<0>(rhs).size(); - }); - }; - PartialShape baseShape; - AxisVector baseOrder; - std::tie(baseShape, baseOrder, std::ignore) = getMaxRankBlockedShape(inputShapes); - maxInputRank = baseShape.size(); - appendOnesForCanonical.resize(inputShapes.size(), 0); - const bool baseIsBlocked = baseOrder.size() != std::set(baseOrder.begin(), baseOrder.end()).size(); - for (size_t i = 0; i < inputShapes.size(); i++) { - const auto& blockedShape = inputShapes[i]; - PartialShape inShape; - AxisVector inOrder; - element::Type inType; - std::tie(inShape, inOrder, inType) = blockedShape; - const auto inRank = inShape.size(); - NODE_VALIDATION_CHECK(this, inRank <= maxInputRank, "Input rank can't be larger than output rank in snippets."); - if (inRank < maxInputRank) { - appendOnesForCanonical[i] = maxInputRank - inRank; - PartialShape newShape(ov::Shape(maxInputRank, 1)); - // todo: more complicated logics is needed if we want to merge smth else than blocked and planar - if (baseIsBlocked) { - const bool inIsNotBlocked = inOrder.size() == std::set(inOrder.begin(), inOrder.end()).size(); - NODE_VALIDATION_CHECK(this, inIsNotBlocked, "Snippets don't support conversion between blocked layouts of different ranks"); - inShape.insert(inShape.end(), ov::Dimension(1)); - appendOnesForCanonical[i]--; - } - NODE_VALIDATION_CHECK(this, PartialShape::broadcast_merge_into(newShape, inShape, ov::op::AutoBroadcastType::NUMPY), - "Failed to broadcast_merge inputs in snippets canonicalization"); - inShape = std::move(newShape); - } else { - // todo: 4d blocked + 5d planar layouts are not supported: + - NODE_VALIDATION_CHECK(this, - equal(baseOrder.begin(), baseOrder.end(), inOrder.begin()), - "Snippets canonicalization got input shapes of equal ranks but different layouts, which is not supported"); - } - ov::PartialShape tmpPShape(baseShape); - // todo: we need to generalize canonicalization for domain-sensitive ops. E.g. MatMul inputs can't be broadcasted one to another - if (!config.m_has_domain_sensitive_ops) - NODE_VALIDATION_CHECK(this, - PartialShape::broadcast_merge_into(tmpPShape, inShape, ::ov::op::AutoBroadcastType::NUMPY), - "Failed to create broadcastable shapes in snippets canonicalization"); - const auto paramShape = body_ptr()->get_parameters()[i]->get_partial_shape(); - const auto paramType = body_ptr()->get_parameters()[i]->get_element_type(); - if (paramShape.size() != inShape.size() || !equal(paramShape.begin(), paramShape.end(), inShape.begin())) - body_ptr()->replace_parameter(i, std::make_shared(paramType, inShape)); - } - body_ptr()->validate_nodes_and_infer_types(); - - auto skipStartEndOnes = [](const PartialShape& shape) { - auto begin = shape.begin(); - auto end = shape.end(); - while (begin != end && *begin == 1) - begin++; - while (begin != end && *(end - 1) == 1) - end--; - - PartialShape trimmedShape(std::vector(end - begin, 1)); - std::copy(begin, end, trimmedShape.begin()); - return trimmedShape; - }; - - // Check that output shapes are broadcastable => can be scheduled - const auto& body_results = body_ptr()->get_results(); - PartialShape outPShape = body_results[0]->get_input_partial_shape(0); - // todo: we need a slightly more general approach for backward ROI propagation - const auto& result_parent = body_results[0]->get_input_node_shared_ptr(0); - if (body_results.size() == 1 && - ov::is_type(result_parent) && - ov::is_type(result_parent->get_input_node_shared_ptr(0))) { - outPShape = result_parent->get_input_partial_shape(0); - } else { - for (size_t i = 0; i < body_results.size(); i++) { - auto shape_i = body_results[i]->get_input_partial_shape(0); - auto outputShape_i = std::get<0>(outputShapes[i]); - // Check that the produced output shape corresponds to the passed shape - // Some produced shapes may have been changed to be broadcastable (e.g. blocked + planar outputs), - // so we need to remove leading and trailing "1" before the comparison - PartialShape pShape_i(skipStartEndOnes(shape_i)); - bool compatibleWithPassedShape = PartialShape::broadcast_merge_into(pShape_i, - skipStartEndOnes(outputShape_i), - ::ov::op::AutoBroadcastType::NUMPY); - NODE_VALIDATION_CHECK(this, compatibleWithPassedShape, - "Inferred and passed results shapes are incompatible for snippet "); - // Check that output shapes are broadcastable to each other => can be scheduled - bool compatibleWithOtherOutputs = PartialShape::broadcast_merge_into(outPShape, shape_i, - ::ov::op::AutoBroadcastType::NUMPY); - NODE_VALIDATION_CHECK(this, compatibleWithOtherOutputs, - "Snippets output shapes must be numpy broadcastable"); - } - } - - // We should insert Converts after Parameters and Constant and before Results - // to align precision inside Subgraph body that is supported by Plugin - align_element_types(outputShapes, inputShapes); - - master_shape = outPShape; - return master_shape; -} - -ov::PartialShape snippets::op::Subgraph::canonicalized_body_shape_infer(const BlockedShapeVector& inputShapes) { - std::vector normInputShapes; - for (size_t i = 0; i < inputShapes.size(); i++) { - PartialShape inShape = std::get<0>(inputShapes[i]); - const auto inRank = inShape.size(); - if (inRank < maxInputRank) { - PartialShape newShape(ov::Shape(maxInputRank, 1)); - for (size_t ir = 0; ir < inRank; ir++) { - newShape[appendOnesForCanonical[i] + ir] = inShape[ir]; - } - normInputShapes.push_back(newShape.get_shape()); - } else { - normInputShapes.push_back(inShape.get_shape()); - } - } - reshape_body(normInputShapes); - - const auto& body_results = body_ptr()->get_results(); - PartialShape outPShape = body_results[0]->get_input_partial_shape(0); - const auto& result_parent = body_results[0]->get_input_node_shared_ptr(0); - if (body_results.size() == 1 && - ov::is_type(result_parent) && - ov::is_type(result_parent->get_input_node_shared_ptr(0))) { - outPShape = result_parent->get_input_partial_shape(0); - } else { - for (size_t i = 0; i < body_results.size(); i++) { - auto shape_i = body_results[i]->get_input_partial_shape(0); - bool compatibleWithOtherOutputs = PartialShape::broadcast_merge_into(outPShape, shape_i, - ::ov::op::AutoBroadcastType::NUMPY); - NODE_VALIDATION_CHECK(this, compatibleWithOtherOutputs, - "Snippets output shapes must be numpy broadcastable"); - } - } - master_shape = outPShape; - return master_shape; -} - bool Subgraph::check_broadcast(const std::shared_ptr& node) noexcept { const auto elementwise = std::dynamic_pointer_cast(node); return @@ -503,8 +315,40 @@ IShapeInferSnippets::Result Subgraph::OVShapeInfer::infer(const std::vector output_dims; + if (is_dynamic()) { + // Note that in case of dynamic implementation shapeInfer() is called before PrepareParams, + // so there must be last_result available + // In principle, we can instantiate shape_infer here, but it's not an intended pipeline behavior. + OPENVINO_ASSERT(m_shape_infer, "Can't calculate master_shape when shapeInfer is not initialized"); + output_dims = m_shape_infer->get_last_result().dims; + OPENVINO_ASSERT(!output_dims.empty(), "Can't calculate master_shape before the first shape inference"); + } else { + for (const auto& res : body_ptr()->get_results()) { + const auto& res_input = res->input(0); + OPENVINO_ASSERT(res_input.get_partial_shape().is_static(), "Result have dynamic shape in static pipeline"); + // We need to account to the shape's layout stored in Output rt_info + const auto& planar_shape = utils::get_planar_pshape(res_input.get_source_output()); + output_dims.emplace_back(planar_shape.get_shape()); + } + } + + if (output_dims.size() == 1) + return output_dims.front(); + + const auto& default_broadcasting = std::make_shared(); + // Note: we have to convert vector to vector> + // because of shape inference interface + std::vector> inputs; + inputs.reserve(output_dims.size()); + for (const auto& d : output_dims) + inputs.emplace_back(d); + return default_broadcasting->infer(inputs).dims.front(); +} + std::shared_ptr -Subgraph::convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory) const { +Subgraph::convert_body_to_linear_ir(const std::shared_ptr& shape_infer_factory) { lowered::Config lowering_config; lowering_config.m_save_expressions = config.m_has_domain_sensitive_ops; lowering_config.m_need_fill_tail_register = config.m_has_domain_sensitive_ops; @@ -513,89 +357,44 @@ Subgraph::convert_body_to_linear_ir(const std::shared_ptr(body_ptr(), shape_infer_factory, lowering_config); + m_linear_ir = std::make_shared(body_ptr(), shape_infer_factory, lowering_config); + m_shape_infer = m_linear_ir->get_shape_infer_instance(); + return m_linear_ir; } -void Subgraph::align_element_types(const BlockedShapeVector& outputShapes, - const BlockedShapeVector& inputShapes) { - // We should insert Convert before Results to set original output element type if needed - const auto& body_results = body_ptr()->get_results(); - for (size_t i = 0; i < outputShapes.size(); i++) { - const auto needed_out_type = std::get<2>(outputShapes[i]); - if (body_results[i]->get_input_element_type(0) != needed_out_type) { - auto parent_output = body_results[i]->get_input_source_output(0); - std::shared_ptr consumer = body_results[i]; - - // Snippets supports Transpose only after Parameter or before Result nodes - // So we have to insert Convert before Transpose (if there is) on Subgraph outputs - const auto transpose = ov::as_type_ptr(parent_output.get_node_shared_ptr()); - if (transpose) { - OPENVINO_ASSERT(parent_output.get_target_inputs().size() == 1, - "If Result has Transpose on input, this Result must be single consumer of the Transpose"); - parent_output = transpose->get_input_source_output(0); - consumer = transpose; - } - - const auto convert = std::make_shared(parent_output, needed_out_type); - ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); - - consumer->set_argument(0, convert); - consumer->validate_and_infer_types(); - if (consumer != body_results[i]) - body_results[i]->validate_and_infer_types(); - } - } - - // We should change existing element type to original for Parameters if needed - const auto& parameters = body_ptr()->get_parameters(); - for (size_t i = 0; i < inputShapes.size(); ++i) { - const auto needed_in_type = std::get<2>(inputShapes[i]); - const auto& parameter = parameters[i]; - const auto original_type = parameter->get_element_type(); - if (original_type != needed_in_type) { - parameter->set_element_type(needed_in_type); - parameter->validate_and_infer_types(); - - auto parent_output = parameter->output(0); - auto consumer_inputs = parent_output.get_target_inputs(); - - // Snippets supports Transpose only after Parameter or before Result nodes - // So we have to insert Convert after Transpose (if there is) on Subgraph inputs - if (std::any_of(consumer_inputs.cbegin(), consumer_inputs.cend(), - [](const ov::Input& input) { return ov::is_type(input.get_node()); })) { - OPENVINO_ASSERT(consumer_inputs.size() == 1, - "If Parameter has Transpose on output, this Transpose must be single consumer of the Parameter"); - const auto transpose = consumer_inputs.begin()->get_node()->shared_from_this(); - transpose->validate_and_infer_types(); - - parent_output = transpose; - consumer_inputs = parent_output.get_target_inputs(); - } - - const auto convert = std::make_shared(parent_output, original_type); - ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); - - for (const auto input : consumer_inputs) { - const auto& input_node = input.get_node(); - if (input_node == convert.get()) { - continue; - } - input_node->set_argument(input.get_index(), convert->output(0)); - } - } +std::shared_ptr Subgraph::clone() const { + ov::OutputVector subgraph_node_inputs; + for (const auto &input : input_values()) { + auto new_input = std::make_shared(input.get_element_type(), input.get_partial_shape()); + subgraph_node_inputs.push_back(new_input); } -} - -void Subgraph::data_flow_transformations(const std::vector& backend_passes) { + std::shared_ptr new_body = body_ptr()->clone(); + auto result = std::make_shared(subgraph_node_inputs, new_body); + // Note: ov::copy_runtime_info accepts only shared_ptr as "from" but never modifies it, + // so we have to cast away constness to copy runtime info + ov::copy_runtime_info(const_pointer_cast(shared_from_this()), result); + result->set_friendly_name(get_friendly_name()); + if (m_linear_ir) + result->m_linear_ir = std::make_shared(m_linear_ir->deep_copy()); + // Note: we don't update shapeInfer here, since it's initialized in the constructor + if (m_generator) + result->m_generator = m_generator->clone(); + return result; +} + +void Subgraph::data_flow_transformations(const BlockedShapeVector& blocked_input_shapes, + const std::vector& input_precisions, + const std::vector& output_precisions, + const std::vector& backend_passes) { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::data_flow_transformations") - const auto& params = body_ptr()->get_parameters(); - bool inputs_has_dynamic_last_dims = std::any_of(params.begin(), params.end(), - [](const shared_ptr& p) { - return p->get_partial_shape().rbegin()->is_dynamic(); - }); - snippets::pass::Manager manager; + ov::snippets::pass::Manager manager; + if (!blocked_input_shapes.empty()) + manager.register_pass(blocked_input_shapes); + if (!input_precisions.empty() && !output_precisions.empty()) + manager.register_pass(input_precisions, output_precisions); + if (config.m_has_domain_sensitive_ops) { manager.register_pass(); manager.register_pass(); @@ -605,14 +404,6 @@ void Subgraph::data_flow_transformations(const std::vector(); manager.register_pass(); manager.register_pass(); - // todo: presently dynamic pipeline is activated even if the last two dimension are static - // In general, we can use static kernels in this case, but several parameters (src and dst memory pointers for example) - // should be passed as run-time args, so it's a mixed mode: kernel is shape-aware, but some additional runtime args are required - // Presently Broadcasting is organized in the following way: - // * ALL last dims are static => broadcasting is handled via MoveBroadcast and pointer arithmetics (even for dynamic upper dims) - if (!inputs_has_dynamic_last_dims) { - manager.register_pass(); - } manager.register_pass(m_generator->get_target_machine()); manager.register_pass(); @@ -623,8 +414,9 @@ void Subgraph::data_flow_transformations(const std::vector(buffer_allocation_rank); common_pipeline.register_pass(vector_size); common_pipeline.register_pass(); + common_pipeline.register_pass(); common_pipeline.register_pass(); + + common_pipeline.register_pass(); + common_pipeline.register_pass(); common_pipeline.register_pass(); common_pipeline.register_pass(); @@ -669,57 +465,44 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir, final_pipeline.register_pass(); final_pipeline.run(linear_ir); - m_buffer_scratchpad = buffer_allocation_pass->get_scratchpad_size(); + lowering_result.buffer_scratchpad_size = buffer_allocation_pass->get_scratchpad_size(); } -snippets::Schedule Subgraph::generate(const BlockedShapeVector& output_shapes, - const BlockedShapeVector& input_shapes, +snippets::Schedule Subgraph::generate(const BlockedShapeVector& blocked_input_shapes, + const std::vector& input_precisions, + const std::vector& output_precisions, + const std::vector& data_flow_backend_passes, + const lowered::pass::PassPipeline& backend_passes_pre_common, + const lowered::pass::PassPipeline& backend_passes_post_common, + const std::shared_ptr& factory, const void* compile_params) { - canonicalize(output_shapes, input_shapes); - return generate(compile_params); + data_flow_transformations(blocked_input_shapes, input_precisions, output_precisions, data_flow_backend_passes); + convert_body_to_linear_ir(factory); + return generate_from_linear_ir(backend_passes_pre_common, backend_passes_post_common, compile_params); } -snippets::Schedule Subgraph::generate(const BlockedShapeVector& output_shapes, - const BlockedShapeVector& input_shapes, - const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory, - const void* compile_params) { - canonicalize(output_shapes, input_shapes); - return generate(data_flow_passes, control_flow_passes_pre_common, control_flow_passes_post_common, - shape_infer_factory, compile_params); -} - -snippets::Schedule Subgraph::generate(const void* compile_params) { - return generate({}, {}, {}, nullptr, compile_params); -} - -snippets::Schedule Subgraph::generate(const std::vector& data_flow_passes, - const lowered::pass::PassPipeline& control_flow_passes_pre_common, - const lowered::pass::PassPipeline& control_flow_passes_post_common, - const std::shared_ptr& shape_infer_factory, - const void* compile_params) { +snippets::Schedule Subgraph::generate_from_linear_ir(const lowered::pass::PassPipeline& backend_passes_pre_common, + const lowered::pass::PassPipeline& backend_passes_post_common, + const void* compile_params) const { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::generate") OPENVINO_ASSERT(m_generator != nullptr, "generate is called while generator is not set"); - data_flow_transformations(data_flow_passes); - - lowered::LinearIR linear_ir = *convert_body_to_linear_ir(shape_infer_factory); - control_flow_transformations(linear_ir, control_flow_passes_pre_common, control_flow_passes_post_common); - // actual code emission - const auto& lowering_result = m_generator->generate(linear_ir, linear_ir.get_config(), compile_params); - const auto ptr = lowering_result.binary_code; - + // Note: some transformations performed in the generator, e.g. tail insertion, can break shape propagation + // until we fix this behavior, we have to make a copy of LIR before giving it to the generator. + OPENVINO_ASSERT(m_linear_ir, "Attempt to call generate, when linear IR was not initialized"); + auto linear_ir = m_linear_ir->deep_copy(); + LoweringResult lowering_result; + control_flow_transformations(linear_ir, lowering_result, backend_passes_pre_common, backend_passes_post_common); + m_generator->generate(linear_ir, lowering_result, compile_params); VectorDims parallel_exec_domain = linear_ir.get_master_shape(); const size_t loop_depth = linear_ir.get_config().m_loop_depth; for (size_t i = 0; i < loop_depth; i++) parallel_exec_domain[parallel_exec_domain.size() - 1 - i] = 1; - return {parallel_exec_domain, ptr}; + return {parallel_exec_domain, std::move(lowering_result)}; } void Subgraph::print() const { diff --git a/src/common/snippets/src/pass/align_element_types.cpp b/src/common/snippets/src/pass/align_element_types.cpp new file mode 100644 index 00000000000000..da1ab1cb2c038f --- /dev/null +++ b/src/common/snippets/src/pass/align_element_types.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/align_element_types.hpp" +#include "snippets/itt.hpp" + +namespace ov { +namespace snippets { + +pass::AlignElementTypes::AlignElementTypes(std::vector input_precisions, + std::vector output_precisions) : + m_input_precisions(std::move(input_precisions)), + m_output_precisions(std::move(output_precisions)) { +} + +bool pass::AlignElementTypes::run_on_model(const std::shared_ptr& m) { + RUN_ON_MODEL_SCOPE(AlignElementTypes); + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::AlignElementTypes") + bool is_modified = false; + const auto& results = m->get_results(); + const auto& params = m->get_parameters(); + OPENVINO_ASSERT(m_input_precisions.size() == params.size() && m_output_precisions.size() == results.size(), + "Number of parameters for snippet doesn't match passed to the Canonicalization pass. "); + + // We should insert Convert before Results to set original output element type if needed + for (size_t i = 0; i < m_output_precisions.size(); i++) { + const auto needed_out_type = m_output_precisions[i]; + if (results[i]->get_input_element_type(0) != needed_out_type) { + std::shared_ptr consumer = results[i]; + auto parent_output = consumer->get_input_source_output(0); + + // Snippets supports Transpose only after Parameter or before Result nodes + // So we have to insert Convert before Transpose (if there is) on Subgraph outputs + const auto transpose = ov::as_type_ptr(parent_output.get_node_shared_ptr()); + if (transpose) { + OPENVINO_ASSERT(parent_output.get_target_inputs().size() == 1, + "If Result has Transpose on input, this Result must be single consumer of the Transpose"); + parent_output = transpose->get_input_source_output(0); + consumer = transpose; + } + + const auto convert = std::make_shared(parent_output, needed_out_type); + ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + + consumer->set_argument(0, convert); + consumer->validate_and_infer_types(); + if (transpose) + results[i]->validate_and_infer_types(); + is_modified = true; + } + } + + // We should change existing element type to original for Parameters if needed + for (size_t i = 0; i < m_input_precisions.size(); ++i) { + const auto needed_in_type = m_input_precisions[i]; + const auto& parameter = params[i]; + const auto original_type = parameter->get_element_type(); + if (original_type != needed_in_type) { + parameter->set_element_type(needed_in_type); + parameter->validate_and_infer_types(); + + auto parent_output = parameter->output(0); + auto consumer_inputs = parent_output.get_target_inputs(); + + const auto& first_child = consumer_inputs.begin()->get_node()->shared_from_this(); + // Note: RankNormalization of is designed for shape-inference purposes only. + // It does not process any data (nor does it emit any code), so it doesn't require Convert operations + if (is_type(first_child)) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer"); + parent_output = first_child->output(0); + consumer_inputs = parent_output.get_target_inputs(); + } + + // Snippets supports Transpose only after Parameter or before Result nodes + // So we have to insert Convert after Transpose (if there is) on Subgraph inputs + if (std::any_of(consumer_inputs.cbegin(), consumer_inputs.cend(), + [](const ov::Input& input) { return ov::is_type(input.get_node()); })) { + OPENVINO_ASSERT(consumer_inputs.size() == 1, + "If Parameter has Transpose on output, this Transpose must be single consumer of the Parameter"); + const auto transpose = consumer_inputs.begin()->get_node()->shared_from_this(); + transpose->validate_and_infer_types(); + + parent_output = transpose; + consumer_inputs = parent_output.get_target_inputs(); + } + + const auto& convert = std::make_shared(parent_output, original_type); + ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + + for (const auto input : consumer_inputs) { + const auto& input_node = input.get_node(); + if (input_node == convert.get()) { + continue; + } + input_node->set_argument(input.get_index(), convert->output(0)); + } + + is_modified = true; + } + } + return is_modified; +} + +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp b/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp index 65fbbc162a8ada..cd803b163b5bbf 100644 --- a/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp +++ b/src/common/snippets/src/pass/broadcast_to_movebroadcast.cpp @@ -5,7 +5,7 @@ #include "snippets/itt.hpp" #include "snippets/pass/broadcast_to_movebroadcast.hpp" -#include "snippets/pass/insert_movebroadcast.hpp" +#include "snippets/op/broadcastmove.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/opsets/opset1.hpp" @@ -30,15 +30,19 @@ ov::snippets::pass::BroadcastToMoveBroadcast::BroadcastToMoveBroadcast() { const auto target_shape = root->get_output_partial_shape(0); const auto value_shape = root->get_input_partial_shape(0); - if (target_shape.is_dynamic() || value_shape.is_dynamic()) { - return false; + OPENVINO_ASSERT(target_shape.is_static() && value_shape.rank().is_static(), "Broadcast with dynamic target shape is not supported in Snippets"); + // Insert BroadcastMove only if the last dimension needs to be broadcasted. Higher-level dims broadcasting + // will be handled by pointer arithmetics. Note that this behavior should be changed in case of full op::Boradcast support. + Output in_value = root->input_value(0); + if (*target_shape.rbegin() != *value_shape.rbegin()) { + auto broadcasted_shape = value_shape; + *broadcasted_shape.rbegin() = *target_shape.rbegin(); + const auto& broadcast_node = std::make_shared(in_value, broadcasted_shape); + in_value = broadcast_node->output(0); } - const auto broadcast_node = ov::snippets::pass::InsertMoveBroadcast::BroadcastNodeLastDim(root->input_value(0), - target_shape.get_shape(), - value_shape.get_shape()); - replace_output_update_name(root->output(0), broadcast_node); - ov::copy_runtime_info(root, broadcast_node.get_node_shared_ptr()); + replace_output_update_name(root->output(0), in_value); + ov::copy_runtime_info(root, in_value.get_node_shared_ptr()); return true; }; diff --git a/src/common/snippets/src/pass/canonicalization.cpp b/src/common/snippets/src/pass/canonicalization.cpp new file mode 100644 index 00000000000000..23414d2925bf36 --- /dev/null +++ b/src/common/snippets/src/pass/canonicalization.cpp @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/canonicalization.hpp" +#include "snippets/op/rank_normalization.hpp" +#include "snippets/itt.hpp" +#include "snippets/utils.hpp" +#include "snippets/lowered/port_descriptor.hpp" + +namespace ov { +namespace snippets { + +pass::Canonicalization::Canonicalization(const BlockedShapeVector& blocked_input_shapes) { + m_in_shapes.reserve(blocked_input_shapes.size()); + m_in_layouts.reserve(blocked_input_shapes.size()); + for (const auto& bs : blocked_input_shapes) { + m_has_dynamic_inputs |= utils::is_dynamic_vdims(bs.first); + m_in_shapes.emplace_back(bs.first); + m_in_layouts.emplace_back(bs.second); + // Note: Blocking (if any) must be accounted for in input shapes + OPENVINO_ASSERT(m_in_shapes.back().size() == m_in_layouts.back().size(), "Input shapes and layouts must have the same rank"); + } +} + +bool pass::Canonicalization::run_on_model(const std::shared_ptr& m) { + RUN_ON_MODEL_SCOPE(Canonicalization); + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::Canonicalization") + bool is_modified = false; + const ParameterVector& params = m->get_parameters(); + OPENVINO_ASSERT(m_in_shapes.size() == params.size(), + "Number of parameters for snippet doesn't match passed to the Canonicalization pass. ", + "Expected: ", m_in_shapes.size(), " Got: ", params.size(), "."); + + // Note that shape rank also incorporates layout, so NCHW16c would have shape rank 5 + auto is_blocked_layout = [](const Layout& l) { + return l.size() != std::set(l.begin(), l.end()).size(); + }; + auto compare_ranks = [](const Layout& l, const Layout& r) { + return l.size() < r.size(); + }; + // Layout with the max rank + const auto& max_rank_it = std::max_element(m_in_layouts.begin(), m_in_layouts.end(), compare_ranks); + Layout base_layout = *max_rank_it; + size_t max_rank = base_layout.size(); + const bool base_is_blocked = is_blocked_layout(base_layout); + + for (size_t i = 0; i < m_in_layouts.size(); i++) { + const auto& i_layout = m_in_layouts[i]; + const auto& i_shape = m_in_shapes[i]; + const auto i_rank = i_layout.size(); + const bool i_is_blocked = is_blocked_layout(i_layout); + // Canonicalization logic briefly: + // * If this input is blocked => Reshape corresponding input parameter, so the following transformations + // will work with a shape of a larger rank. In dynamic case, this shape will be updated during shapeInfer() + // call, but the important thing is that the shape rank won't change. + // * If some of the input shapes is blocked (=> base_is_blocked), but this input is planar, + // then insert RankNormalization op after this input. This is needed, so all shapes inside the body have + // similar ranks. + if (i_is_blocked) { + OPENVINO_ASSERT(base_is_blocked && i_rank == max_rank, "If this shape is blocked, base must also be blocked"); + params[i]->set_partial_shape(snippets::utils::vdims_to_pshape(i_shape)); + is_modified = true; + } else if (i_rank < max_rank) { + size_t num_append = base_is_blocked; + OPENVINO_ASSERT(max_rank >= i_rank + num_append, "Unsupported blocked shapes combination in canonicalization"); + size_t num_prepend = max_rank - i_rank - num_append; + const auto& out = params[i]->output(0); + const auto& target_inputs = out.get_target_inputs(); + auto rank_norm = std::make_shared(out, num_prepend, num_append); + for (auto& in : target_inputs) + in.replace_source_output(rank_norm); + is_modified = true; + } else { + // todo: 4d blocked + 5d planar layouts are not supported: + + OPENVINO_ASSERT(equal(base_layout.begin(), base_layout.end(), i_layout.begin()), + "Canonicalization got input shapes of equal ranks but different layouts, which is not supported"); + } + } + return is_modified; +} + +} // namespace snippets +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/src/pass/convert_constants.cpp b/src/common/snippets/src/pass/convert_constants.cpp index b5fb81b77dd98a..c374ee010d3446 100644 --- a/src/common/snippets/src/pass/convert_constants.cpp +++ b/src/common/snippets/src/pass/convert_constants.cpp @@ -24,8 +24,7 @@ ov::snippets::pass::ConvertConstantsToScalars::ConvertConstantsToScalars() { // Note that all Constants {1,1,1,1} are converted to Scalar {1} here // This is needed to simplify shape inference, otherwise {1,1,1,1} Constants can increase output rank // Also some operations support only scalar shapes, so we need separate scalars and shape [1] - const auto shape = constant->get_output_shape(0).size() == 0 ? ov::Shape{} : ov::Shape{1}; - auto scalar = std::make_shared(ov::op::v0::Constant(*constant, shape)); + auto scalar = std::make_shared(ov::op::v0::Constant(*constant, ov::Shape{1})); scalar->set_friendly_name(constant->get_friendly_name()); ov::copy_runtime_info(constant, scalar); ov::replace_node(constant, scalar); diff --git a/src/common/snippets/src/pass/set_softmax_ports.cpp b/src/common/snippets/src/pass/set_softmax_ports.cpp index 1651a6d6217495..f8d38d51ec59b5 100644 --- a/src/common/snippets/src/pass/set_softmax_ports.cpp +++ b/src/common/snippets/src/pass/set_softmax_ports.cpp @@ -25,11 +25,9 @@ ov::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() { auto root = m.get_match_root(); const auto& pshape = root->get_input_partial_shape(0); - if (pshape.is_dynamic()) - return false; - const auto shape = pshape.get_shape(); - const auto rank = shape.size(); + OPENVINO_ASSERT(!pshape.rank().is_dynamic(), "SetSoftmaxPorts doesn't support dynamic ranks"); + const auto rank = pshape.rank().get_length(); int64_t axis; if (const auto softmax_v8 = ov::as_type_ptr(root)) { @@ -44,7 +42,7 @@ ov::snippets::pass::SetSoftmaxPorts::SetSoftmaxPorts() { OPENVINO_ASSERT(axis < static_cast(rank), "Softmax has incorrect axis"); std::vector subtensor(rank, 1); - for (size_t i = axis; i < rank; ++i) + for (auto i = axis; i < rank; ++i) subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM; lowered::PortDescriptorUtils::set_port_descriptor_ptr(root->input(0), std::make_shared(root->input(0), subtensor)); diff --git a/src/common/snippets/src/shape_inference/shape_inference.cpp b/src/common/snippets/src/shape_inference/shape_inference.cpp index 22470a13d3443f..0b9117d05d0477 100644 --- a/src/common/snippets/src/shape_inference/shape_inference.cpp +++ b/src/common/snippets/src/shape_inference/shape_inference.cpp @@ -63,6 +63,7 @@ const IShapeInferSnippetsFactory::TRegistry IShapeInferSnippetsFactory::registry SHAPE_INFER_PREDEFINED(ov::op::v0::Result, EmptyShapeInfer), // SHAPE_INFER_OP_SPECIFIC(op::LoadReshape), + SHAPE_INFER_OP_SPECIFIC(op::RankNormalization), SHAPE_INFER_OP_SPECIFIC(op::BroadcastLoad), SHAPE_INFER_OP_SPECIFIC(op::BroadcastMove), }; diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index df894604d11693..242391b908dc03 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -92,7 +92,8 @@ VectorDims pshape_to_vdims(const PartialShape& pshape) { result.reserve(pshape.size()); for (const auto& d : pshape) result.push_back(d.is_dynamic() ? IShapeInferSnippets::DYNAMIC_DIMENSION : d.get_length()); - return result; + // Note: PartialShape could be empty which designates scalar value. However, Scalars are represented as {1} in Snippets + return result.empty() ? VectorDims {1} : result; } ov::PartialShape vdims_to_pshape(const VectorDims& vdims) { @@ -132,6 +133,10 @@ VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port) return get_planar_vdims(expr_port.get_descriptor_ptr()); } +bool is_dynamic_vdims(const VectorDims& shape) { + return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return v == IShapeInferSnippets::DYNAMIC_DIMENSION; }); +} + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp index a419e6575a5de5..f2c872f725b7d6 100644 --- a/src/common/snippets/tests/include/lowering_utils.hpp +++ b/src/common/snippets/tests/include/lowering_utils.hpp @@ -6,6 +6,8 @@ #include #include "snippets/op/subgraph.hpp" #include "snippets_helpers.hpp" +#include "snippets/pass_manager.hpp" +#include "snippets/shape_inference/shape_inference.hpp" namespace ov { namespace test { @@ -23,11 +25,17 @@ class DummyEmitter : public ov::snippets::Emitter { void emit_data() const override {} }; +struct DummyCompiledSnippet : public ov::snippets::CompiledSnippet { + const uint8_t* get_code() const override { return nullptr; } + size_t get_code_size() const override { return 0; } + bool empty() const override { return true; } +}; + class DummyTargetMachine : public ov::snippets::TargetMachine { public: DummyTargetMachine(const std::vector& custom_opset = {}); bool is_supported() const override { return true; } - ov::snippets::code get_snippet() const override { return nullptr; } + ov::snippets::CompiledSnippetPtr get_snippet() override { return std::make_shared(); } size_t get_lanes() const override { return 10; } }; @@ -35,6 +43,7 @@ class DummyGenerator : public ov::snippets::Generator { public: DummyGenerator() : ov::snippets::Generator(std::make_shared()) {} DummyGenerator(const std::shared_ptr& t) : ov::snippets::Generator(t) {} + std::shared_ptr clone() const override { return std::make_shared(target); } protected: opRegType get_specific_op_reg_type(const std::shared_ptr& op) const override { return vec2vec; }; @@ -48,13 +57,15 @@ class LoweringTests : public TransformationTestsF { void TearDown() override; static std::shared_ptr getSubgraph(const std::shared_ptr& f); + using IShapeInferSnippetsFactory = ov::snippets::IShapeInferSnippetsFactory; static std::shared_ptr getLoweredSubgraph(const std::shared_ptr& f, const ov::PartialShape& master_shape, const std::vector& backend_passes = {}, const ov::snippets::lowered::pass::PassPipeline& lowered_pre_common = {}, const ov::snippets::lowered::pass::PassPipeline& lowered_post_common = {}, - const std::shared_ptr& generator = nullptr); + const std::shared_ptr& generator = nullptr, + const std::shared_ptr& factory = std::make_shared()); static std::shared_ptr getTokenizedSubgraph(const std::shared_ptr& f); protected: diff --git a/src/common/snippets/tests/include/pass/canonicalization.hpp b/src/common/snippets/tests/include/pass/canonicalization.hpp index 88f6ebc0336a80..bcb27d08cb1395 100644 --- a/src/common/snippets/tests/include/pass/canonicalization.hpp +++ b/src/common/snippets/tests/include/pass/canonicalization.hpp @@ -5,36 +5,25 @@ #pragma once #include "lowering_utils.hpp" +#include "snippets/op/subgraph.hpp" #include "snippets_helpers.hpp" +#include "snippets/shape_types.hpp" +#include "snippets/pass/canonicalization.hpp" namespace ov { namespace test { namespace snippets { -using BlockedShape = ov::snippets::op::Subgraph::BlockedShape; -using BlockedShapeVector = ov::snippets::op::Subgraph::BlockedShapeVector; - -// todo: implement tests with 3 inputs and two outputs (aka SnippetsCanonicalizationParams3Inputs) -// Note that the expected output shape isn't necessary equal to one of the output blocked_shapes. -// For example, consider the following graph: (1, 2, 2, 1, 8) + (1, 2, 1, 1, 8) + (1, 2, 1, 5, 8) => (1, 2, 2, 1, 8) + (1, 2, 1, 5, 8). -typedef std::tuple< - std::tuple, // Shape & BlockedShape for input 0 - std::tuple, // Shape & BlockedShape for input 0 - BlockedShape, // BlockedShape output shape passed to canonicalize() - Shape // expected output Shape -> canonicalizationParams; - - -class CanonicalizationTests : public LoweringTests, public testing::WithParamInterface { +class CanonicalizationTests : public TransformationTestsF { public: - static std::string getTestCaseName(testing::TestParamInfo obj); + using VectorDims = ov::snippets::VectorDims; + using Layout = std::vector; + virtual void run(); protected: - void SetUp() override; - std::shared_ptr snippets_model; - Shape expected_output_shape; - BlockedShapeVector input_blocked_shapes; - BlockedShapeVector output_blocked_shapes; + std::vector m_input_shapes; + std::vector m_input_layouts; + void prepare_functions(const std::vector& shapes); }; } // namespace snippets diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp index 83207244ac031f..5d49d38a6af2e7 100644 --- a/src/common/snippets/tests/src/lowering_utils.cpp +++ b/src/common/snippets/tests/src/lowering_utils.cpp @@ -106,13 +106,13 @@ std::shared_ptr const std::vector& backend_passes, const ov::snippets::lowered::pass::PassPipeline& lowered_pre_common, const ov::snippets::lowered::pass::PassPipeline& lowered_post_common, - const std::shared_ptr& generator) { + const std::shared_ptr& generator, + const std::shared_ptr& factory) { auto subgraph = getTokenizedSubgraph(f); subgraph->set_generator(generator == nullptr ? std::make_shared() : generator); - subgraph->set_master_shape(master_shape); subgraph->set_tile_rank(2); // Note: lowered_pipeline would have no effect on subgraph body, since it's applied on linear IR - subgraph->generate(backend_passes, lowered_pre_common, lowered_post_common); + subgraph->generate({}, {}, {}, backend_passes, lowered_pre_common, lowered_post_common, factory); return subgraph; } diff --git a/src/common/snippets/tests/src/pass/canonicalization.cpp b/src/common/snippets/tests/src/pass/canonicalization.cpp index 4981b4f8d8e139..a9311b67598263 100644 --- a/src/common/snippets/tests/src/pass/canonicalization.cpp +++ b/src/common/snippets/tests/src/pass/canonicalization.cpp @@ -5,101 +5,84 @@ #include #include "pass/canonicalization.hpp" #include "common_test_utils/common_utils.hpp" -#include +#include "snippets/pass/canonicalization.hpp" +#include "snippets/op/rank_normalization.hpp" +#include namespace ov { namespace test { namespace snippets { -using ov::snippets::op::Subgraph; - -class SKIP_CanonicalizationTests : public CanonicalizationTests { -public: - void SetUp() override { - GTEST_SKIP(); - } - void TearDown() override{}; -}; - -std::string CanonicalizationTests::getTestCaseName(testing::TestParamInfo obj) { - std::vector> inputs(2); - Subgraph::BlockedShape output; - Shape expectedOutput; - std::tie(inputs[0], inputs[1], output, expectedOutput) = obj.param; - std::ostringstream result; - for (size_t i = 0; i < inputs.size(); i++) { - const auto& blockedshape = std::get<1>(inputs[i]); - // input shape - result << "IS[" << i << "]=" << ov::test::utils::vec2str(std::get<0>(inputs[i])) << "_"; - // input blocked shape - result << "IBS[" << i << "]=" << ov::test::utils::partialShape2str({std::get<0>(blockedshape)}) << "_"; - // input blocked order - result << "IBO[" << i << "]=" << ov::test::utils::vec2str(std::get<1>(blockedshape)) << "_"; - } - // output blocked shape - result << "OBS[0]=" << ov::test::utils::partialShape2str({std::get<0>(output)}) << "_"; - // output blocked order - result << "OBO[0]=" << ov::test::utils::vec2str(std::get<1>(output)) << "_"; - result << "ExpOS[0]=" << ov::test::utils::vec2str(expectedOutput) << "_"; - return result.str(); +namespace { +void normalizeParameter(const std::shared_ptr& par, size_t num_prepend, size_t num_append) { + auto target_inputs = par->get_output_target_inputs(0); + auto rank_norm = std::make_shared(par, + num_prepend, + num_append); + for (auto& t : target_inputs) + t.replace_source_output(rank_norm); } +} // namespace -void CanonicalizationTests::SetUp() { - TransformationTestsF::SetUp(); - std::vector> inputs(2); - output_blocked_shapes.resize(1); - std::tie(inputs[0], inputs[1], output_blocked_shapes[0], expected_output_shape) = this->GetParam(); +void CanonicalizationTests::prepare_functions(const std::vector& shapes) { + std::vector pshapes; + pshapes.reserve(shapes.size()); + for (const auto& v : shapes ) + pshapes.emplace_back(v); + const auto &f = AddFunction(pshapes); + model = f.getOriginal(); + model_ref = model->clone(); +} - input_blocked_shapes = {std::get<1>(inputs[0]), std::get<1>(inputs[1])}; - snippets_model = std::make_shared(std::vector{std::get<0>(inputs[0]), std::get<0>(inputs[1])}); +void CanonicalizationTests::run() { + ASSERT_TRUE(model); + ASSERT_EQ(m_input_shapes.size(), m_input_layouts.size()); + BlockedShapeVector blocked_input_shapes; + blocked_input_shapes.reserve(m_input_shapes.size()); + for (size_t i = 0; i < m_input_shapes.size(); i++) + blocked_input_shapes.emplace_back(m_input_shapes[i], m_input_layouts[i]); + manager.register_pass(blocked_input_shapes); + disable_rt_info_check(); } -TEST_P(CanonicalizationTests, Add) { - model = snippets_model->getOriginal(); - model_ref = snippets_model->getReference(); - auto subgraph = getTokenizedSubgraph(model); - subgraph->set_generator(std::make_shared()); - auto canonical_output_shape = subgraph->canonicalize(output_blocked_shapes, input_blocked_shapes); - ASSERT_TRUE(canonical_output_shape.is_static()); - ASSERT_DIMS_EQ(canonical_output_shape.get_shape(), expected_output_shape); +TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_0) { + m_input_shapes = {{2, 3, 10, 64}, {2, 3, 10, 64}}; + m_input_layouts = {{0, 1, 2, 3}, {0, 1, 2, 3}}; + prepare_functions(m_input_shapes); + run(); } namespace CanonicalizationTestsInstantiation { -using ov::snippets::op::Subgraph; -std::vector input_shapes; -Shape expected_output_shape; - -using ov::Shape; -ov::element::Type_t prec = ov::element::f32; -std::tuple blockedInput0{{1, 64, 2, 5}, - {{1, 4, 2, 5, 16}, {0, 1, 2, 3, 1}, prec}}; -Subgraph::BlockedShape output{{1, 4, 2, 5, 16}, {0, 1, 2, 3, 1}, prec}; -Shape canonical_shape{1, 4, 2, 5, 16}; - -std::vector> blockedInput1{{{1, 1, 2, 5}, {{1, 1, 2, 5, 1}, {0, 1, 2, 3, 1}, prec}}, - {{1, 1, 2, 1}, {{1, 1, 2, 1, 1}, {0, 1, 2, 3, 1}, prec}}, - {{1, 64, 1, 1}, {{1, 4, 1, 1, 16}, {0, 1, 2, 3, 1}, prec}}}; +TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_1) { + m_input_shapes = {{2, 3, 10, 64}, + {10, 64}}; + m_input_layouts = {{0, 1, 2, 3}, + {0, 1}}; + prepare_functions(m_input_shapes); + normalizeParameter(model_ref->get_parameters()[1], 2, 0); + run(); +} -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastBlocked, - SKIP_CanonicalizationTests /* CVS-114607 */, - ::testing::Combine(::testing::Values(blockedInput0), - ::testing::ValuesIn(blockedInput1), - ::testing::Values(output), - ::testing::Values(canonical_shape)), - CanonicalizationTests::getTestCaseName); +TEST_F(CanonicalizationTests, smoke_Snippets_Canonicalization_2) { + m_input_shapes = {{2, 3, 10, 64, 16}, + {1, 10, 64}}; + m_input_layouts = {{0, 1, 2, 3, 1}, + {0, 1, 2}}; + prepare_functions({{2, 48, 10, 64}, + {1, 10, 64}}); + const auto& params = model_ref->get_parameters(); + // Note: We can't create functions with mismatching input shapes, + // so we have to set Parameter shapes after the functions were created + // This reproduces Snippets pipeline well, since blocked shapes are set after the tokenization + params[0]->set_partial_shape(PartialShape(m_input_shapes[0])); + model->get_parameters()[0]->set_partial_shape(PartialShape(m_input_shapes[0])); -std::vector> planarInput1{{{1, 1, 2, 5}, {{1, 2, 5}, {0, 1, 2}, prec}}, - {{1, 1, 2, 5}, {{2, 5}, {0, 1}, prec}}, - {{1, 2, 5}, {{2, 5}, {0, 1}, prec}}, - {{2, 5}, {{2, 5}, {0, 1}, prec}}, - {{5}, {{5}, {0}, prec}}}; + normalizeParameter(params[1], 1, 1); + // need to trigger validate..(...) manually to propagate new blocked shapes, + // this is correct since RankNormalization ops re-enables shape propagation for blocked shapes + model_ref->validate_nodes_and_infer_types(); + run(); +} -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastPlanar, - SKIP_CanonicalizationTests /* CVS-114607 */, - ::testing::Combine(::testing::Values(blockedInput0), - ::testing::ValuesIn(planarInput1), - ::testing::Values(output), - ::testing::Values(canonical_shape)), - CanonicalizationTests::getTestCaseName); } // namespace CanonicalizationTestsInstantiation } // namespace snippets } // namespace test diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp index b23e068244512a..da6e2e39442777 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp @@ -25,25 +25,25 @@ #include -using namespace std; +namespace ov { #define CREATE_SNIPPETS_EMITTER(e_type) { \ - [this](const ov::snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ + [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ return std::make_shared(h.get(), isa, expr); \ }, \ [](const std::shared_ptr& n) -> std::set> { \ return e_type::get_supported_precisions(n); \ } \ -}; +} #define CREATE_CPU_EMITTER(e_type) { \ - [this](const ov::snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ + [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ return std::make_shared(h.get(), isa, expr->get_node()); \ }, \ - [](const std::shared_ptr& n) -> std::set> { \ + [](const std::shared_ptr& n) -> std::set> { \ return e_type::get_supported_precisions(n); \ } \ -}; +} class jit_snippet : public dnnl::impl::cpu::x64::jit_generator { public: @@ -58,94 +58,95 @@ class jit_snippet : public dnnl::impl::cpu::x64::jit_generator { } }; -ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa) +intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa) : TargetMachine(), h(new jit_snippet()), isa(host_isa) { // data movement - jitters[ov::op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); - jitters[ov::op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); + jitters[op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); + jitters[op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); jitters[snippets::op::Buffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); jitters[snippets::op::VectorBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); - // jitters[ov::op::v1::Constant::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported + jitters[snippets::op::RankNormalization::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); + // jitters[op::v1::Constant::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported jitters[snippets::op::Load::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadEmitter); jitters[snippets::op::LoadReshape::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadEmitter); jitters[snippets::op::BroadcastLoad::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BroadcastLoadEmitter); - jitters[ov::intel_cpu::LoadConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); - jitters[ov::intel_cpu::LoadConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); + jitters[intel_cpu::LoadConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); + jitters[intel_cpu::LoadConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoadConvertEmitter); jitters[snippets::op::Store::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreEmitter); - jitters[ov::intel_cpu::StoreConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); - jitters[ov::intel_cpu::StoreConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); + jitters[intel_cpu::StoreConvertSaturation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); + jitters[intel_cpu::StoreConvertTruncation::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(StoreConvertEmitter); jitters[snippets::op::Scalar::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(ScalarEmitter); jitters[snippets::op::BroadcastMove::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BroadcastMoveEmitter); // jitters[snippets::op::Nop::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(NopEmitter); // Not supported - // jitters[ov::op::v1::Broadcast::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported + // jitters[op::v1::Broadcast::get_type_info_static()] = CREATE_CPU_EMITTER(); // Not supported - jitters[snippets::op::ConvertTruncation::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_convert_truncation_emitter); - jitters[snippets::op::ConvertSaturation::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_convert_saturation_emitter); - // jitters[ov::op::v1::FakeQuantize::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[snippets::op::ConvertTruncation::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_convert_truncation_emitter); + jitters[snippets::op::ConvertSaturation::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_convert_saturation_emitter); + // jitters[op::v1::FakeQuantize::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported // ternary - jitters[ov::op::v1::Select::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_select_emitter); - jitters[ov::intel_cpu::FusedMulAdd::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_mul_add_emitter); + jitters[op::v1::Select::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_select_emitter); + jitters[intel_cpu::FusedMulAdd::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_mul_add_emitter); // binary - jitters[ov::op::v1::Add::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_add_emitter); - jitters[ov::op::v1::Divide::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_divide_emitter); - jitters[ov::op::v1::Equal::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_equal_emitter); - jitters[ov::op::v1::FloorMod::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_floor_mod_emitter); - jitters[ov::op::v1::Greater::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_greater_emitter); - jitters[ov::op::v1::GreaterEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_greater_equal_emitter); - jitters[ov::op::v1::Less::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_less_emitter); - jitters[ov::op::v1::LessEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_less_equal_emitter); - jitters[ov::op::v1::LogicalAnd::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_and_emitter); - jitters[ov::op::v1::LogicalOr::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_or_emitter); - jitters[ov::op::v1::LogicalXor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_xor_emitter); - jitters[ov::op::v1::Maximum::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_maximum_emitter); - jitters[ov::op::v1::Minimum::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_minimum_emitter); - jitters[ov::op::v1::Mod::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_mod_emitter); - jitters[ov::op::v1::Multiply::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_multiply_emitter); - jitters[ov::op::v1::NotEqual::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_not_equal_emitter); - jitters[snippets::op::PowerStatic::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_power_static_emitter); - jitters[ov::op::v1::Power::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_power_dynamic_emitter); - jitters[ov::op::v0::PRelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_prelu_emitter); - jitters[ov::op::v0::SquaredDifference::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_squared_difference_emitter); - jitters[ov::op::v1::Subtract::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_subtract_emitter); - jitters[ov::op::v0::Xor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_xor_emitter); + jitters[op::v1::Add::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_add_emitter); + jitters[op::v1::Divide::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_divide_emitter); + jitters[op::v1::Equal::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_equal_emitter); + jitters[op::v1::FloorMod::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_floor_mod_emitter); + jitters[op::v1::Greater::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_greater_emitter); + jitters[op::v1::GreaterEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_greater_equal_emitter); + jitters[op::v1::Less::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_less_emitter); + jitters[op::v1::LessEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_less_equal_emitter); + jitters[op::v1::LogicalAnd::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_and_emitter); + jitters[op::v1::LogicalOr::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_or_emitter); + jitters[op::v1::LogicalXor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_xor_emitter); + jitters[op::v1::Maximum::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_maximum_emitter); + jitters[op::v1::Minimum::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_minimum_emitter); + jitters[op::v1::Mod::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_mod_emitter); + jitters[op::v1::Multiply::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_multiply_emitter); + jitters[op::v1::NotEqual::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_not_equal_emitter); + jitters[snippets::op::PowerStatic::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_power_static_emitter); + jitters[op::v1::Power::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_power_dynamic_emitter); + jitters[op::v0::PRelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_prelu_emitter); + jitters[op::v0::SquaredDifference::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_squared_difference_emitter); + jitters[op::v1::Subtract::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_subtract_emitter); + jitters[op::v0::Xor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_xor_emitter); // unary - jitters[ov::op::v0::Abs::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_abs_emitter); - // jitters[ov::op::v1::Acos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Asin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Atan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Ceiling::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_ceiling_emitter); - jitters[ov::op::v0::Clamp::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_clamp_emitter); - // jitters[ov::op::v1::Cos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Cosh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Elu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_elu_emitter); - jitters[ov::op::v0::Erf::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_erf_emitter); - jitters[ov::op::v0::Exp::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_exp_emitter); - jitters[ov::op::v0::Floor::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_floor_emitter); - jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_round_emitter); - // jitters[ov::op::v1::Log::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v1::LogicalNot::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_logical_not_emitter); - jitters[ov::op::v0::Negative::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_negative_emitter); - jitters[ov::op::v0::Relu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_relu_emitter); - // jitters[ov::op::v1::Sign::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Sigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_sigmoid_emitter); - // jitters[ov::op::v1::Sin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Sinh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Sqrt::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_sqrt_emitter); - // jitters[ov::op::v1::Tan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ov::op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_tanh_emitter); - - jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_swish_emitter); - jitters[ngraph::op::v4::HSwish::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_hswish_emitter); - // jitters[ov::op::v1::HardSigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - // jitters[ov::op::v1::Selu::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported - jitters[ngraph::op::v0::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_gelu_v0_emitter); - jitters[ngraph::op::v7::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_gelu_v7_emitter); + jitters[op::v0::Abs::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_abs_emitter); + // jitters[op::v1::Acos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Asin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Atan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Ceiling::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_ceiling_emitter); + jitters[op::v0::Clamp::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_clamp_emitter); + // jitters[op::v1::Cos::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Cosh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Elu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_elu_emitter); + jitters[op::v0::Erf::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_erf_emitter); + jitters[op::v0::Exp::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_exp_emitter); + jitters[op::v0::Floor::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_floor_emitter); + jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_round_emitter); + // jitters[op::v1::Log::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v1::LogicalNot::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_logical_not_emitter); + jitters[op::v0::Negative::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_negative_emitter); + jitters[op::v0::Relu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_relu_emitter); + // jitters[op::v1::Sign::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Sigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_sigmoid_emitter); + // jitters[op::v1::Sin::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Sinh::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Sqrt::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_sqrt_emitter); + // jitters[op::v1::Tan::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_tanh_emitter); + + jitters[intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_swish_emitter); + jitters[ngraph::op::v4::HSwish::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_hswish_emitter); + // jitters[op::v1::HardSigmoid::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + // jitters[op::v1::Selu::get_type_info_static()] = CREATE_CPU_EMITTER(); // not supported + jitters[ngraph::op::v0::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_gelu_v0_emitter); + jitters[ngraph::op::v7::Gelu::get_type_info_static()] = CREATE_CPU_EMITTER(intel_cpu::jit_gelu_v7_emitter); jitters[snippets::op::Fill::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(FillEmitter); jitters[snippets::op::HorizonMax::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(HorizonEmitter); @@ -154,11 +155,11 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_ jitters[snippets::op::Kernel::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(KernelEmitter); jitters[snippets::op::LoopBegin::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoopBeginEmitter); jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(LoopEndEmitter); - jitters[ov::intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmEmitter); - jitters[ov::intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmCopyBEmitter); + jitters[intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmEmitter); + jitters[intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(BrgemmCopyBEmitter); } -size_t ov::intel_cpu::CPUTargetMachine::get_lanes() const { +size_t intel_cpu::CPUTargetMachine::get_lanes() const { switch (isa) { case dnnl::impl::cpu::x64::avx2 : return dnnl::impl::cpu::x64::cpu_isa_traits::vlen / sizeof(float); case dnnl::impl::cpu::x64::sse41 : return dnnl::impl::cpu::x64::cpu_isa_traits::vlen / sizeof(float); @@ -167,28 +168,62 @@ size_t ov::intel_cpu::CPUTargetMachine::get_lanes() const { } } -bool ov::intel_cpu::CPUTargetMachine::is_supported() const { +dnnl::impl::cpu::x64::cpu_isa_t intel_cpu::CPUTargetMachine::get_isa() const { + return isa; +} + +bool intel_cpu::CPUTargetMachine::is_supported() const { return dnnl::impl::cpu::x64::mayiuse(isa); } -ov::snippets::code ov::intel_cpu::CPUTargetMachine::get_snippet() const { +snippets::CompiledSnippetPtr intel_cpu::CPUTargetMachine::get_snippet() { if (h->create_kernel() != dnnl::impl::status::success) { IE_THROW() << "Failed to create jit_kernel in get_snippet()"; } - return h->jit_ker(); + const auto& result = std::make_shared(std::unique_ptr(h.release())); + // Note that we reset all the generated code, since it was copied into CompiledSnippetCPU + h.reset(new jit_snippet()); + return result; +} + +intel_cpu::CompiledSnippetCPU::CompiledSnippetCPU(std::unique_ptr h) : h_compiled(std::move(h)) { + OPENVINO_ASSERT(h_compiled && h_compiled->jit_ker(), "Got invalid jit generator or kernel was nopt compiled"); } -ov::intel_cpu::CPUGenerator::CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa_) : Generator(std::make_shared(isa_)) { +const uint8_t* intel_cpu::CompiledSnippetCPU::get_code() const { + return h_compiled->jit_ker(); } -ov::snippets::Generator::opRegType ov::intel_cpu::CPUGenerator::get_specific_op_reg_type(const std::shared_ptr& op) const { - if (std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) +size_t intel_cpu::CompiledSnippetCPU::get_code_size() const { + return h_compiled->getSize(); +} + +bool intel_cpu::CompiledSnippetCPU::empty() const { + return get_code_size() == 0; +} + +intel_cpu::CPUGenerator::CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa_) : Generator(std::make_shared(isa_)) { +} + +std::shared_ptr intel_cpu::CPUGenerator::clone() const { + const auto& cpu_target_machine = std::dynamic_pointer_cast(target); + OPENVINO_ASSERT(cpu_target_machine, "Failed to clone CPUGenerator: the instance contains incompatible TargetMachine type"); + return std::make_shared(cpu_target_machine->get_isa()); +} + +snippets::Generator::opRegType intel_cpu::CPUGenerator::get_specific_op_reg_type(const std::shared_ptr& op) const { + if (std::dynamic_pointer_cast(op) || + std::dynamic_pointer_cast(op)) return gpr2gpr; else if ( - std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) + std::dynamic_pointer_cast(op) || + std::dynamic_pointer_cast(op)) return vec2vec; else OPENVINO_THROW("Register type of the operation " + std::string(op->get_type_name()) + " isn't determined!"); } +bool intel_cpu::CPUGenerator::uses_precompiled_kernel(const std::shared_ptr& e) const { + return std::dynamic_pointer_cast(e) || + std::dynamic_pointer_cast(e); +} +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp index 96ccbb4b0db97f..fa3528df6c9e6d 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp @@ -13,13 +13,23 @@ namespace ov { namespace intel_cpu { +class CompiledSnippetCPU : public snippets::CompiledSnippet { + const std::unique_ptr h_compiled; +public: + const uint8_t* get_code() const override; + size_t get_code_size() const override; + bool empty() const override; + explicit CompiledSnippetCPU(std::unique_ptr h); +}; + class CPUTargetMachine : public snippets::TargetMachine { public: - CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa); + explicit CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t host_isa); bool is_supported() const override; - snippets::code get_snippet() const override; + snippets::CompiledSnippetPtr get_snippet() override; size_t get_lanes() const override; + dnnl::impl::cpu::x64::cpu_isa_t get_isa() const; private: std::unique_ptr h; @@ -29,8 +39,10 @@ class CPUTargetMachine : public snippets::TargetMachine { class CPUGenerator : public snippets::Generator { public: CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa); + std::shared_ptr clone() const override; protected: + bool uses_precompiled_kernel(const std::shared_ptr& emitter) const override; opRegType get_specific_op_reg_type(const std::shared_ptr& op) const override; }; diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 24493334f1d675..072c3f7edcf60b 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -11,6 +11,7 @@ #include "snippets/lowered/port_connector.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op//brgemm_cpu.hpp" +#include "snippets/op/rank_normalization.hpp" using namespace InferenceEngine; using namespace Xbyak; @@ -121,7 +122,12 @@ KernelEmitter::KernelEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt element::Type etype; switch (expr->get_type()) { case snippets::lowered::IOExpression::io_type::INPUT: { - desc = expr->get_output_port_descriptor(0); + const auto first_consumer = expr->get_output_port_connector(0)->get_consumers().begin()->get_expr(); + if (ov::is_type(first_consumer->get_node())) { + desc = first_consumer->get_output_port_descriptor(0); + } else { + desc = expr->get_output_port_descriptor(0); + } etype = expr->get_node()->get_output_element_type(0); num_inputs++; break; diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index 71c84045e12848..1da8e866f7c4a1 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -157,6 +157,7 @@ std::map Extension::getOpSets() { NGRAPH_OP(Store, ov::snippets::op) NGRAPH_OP(Subgraph, ov::snippets::op) NGRAPH_OP(VectorBuffer, ov::snippets::op) + NGRAPH_OP(RankNormalization, ov::snippets::op) NGRAPH_OP_X64(LoadConvertSaturation, ov::intel_cpu) NGRAPH_OP_X64(LoadConvertTruncation, ov::intel_cpu) NGRAPH_OP_X64(StoreConvertSaturation, ov::intel_cpu) diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index c20ecbea76cdca..58c6c4f595a95f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -13,7 +13,6 @@ #include #include -#include #include #include @@ -119,67 +118,36 @@ bool SnippetKey::operator==(const SnippetKey& rhs) const { return true; } -snippets::op::Subgraph::BlockedShapeVector getBlockedShapes(const std::vector>& memBlockedDims, - const std::vector>& memOrders, const std::vector& memPrecs) { - size_t numShapes = memBlockedDims.size(); - if (memOrders.size() != numShapes || memPrecs.size() != numShapes) - IE_THROW(Unexpected) << "Number of shapes is mismacthed for dimensions, orders and precisions"; - snippets::op::Subgraph::BlockedShapeVector blockedShapes(numShapes); - for (size_t i = 0; i < numShapes; i++) { - size_t dimSize = memBlockedDims[i].size(); - std::vector dims(dimSize); - for (size_t j = 0; j < dimSize; j++) { - dims[j] = memBlockedDims[i][j]; - } - ov::PartialShape shape(dims); - ov::AxisVector order(memOrders[i]); - ov::element::Type precision = InferenceEngine::details::convertPrecision(memPrecs[i]); - - blockedShapes[i] = snippets::op::Subgraph::BlockedShape{shape, order, precision}; - } - - return blockedShapes; -} } // namespace Snippet::Snippet(const std::shared_ptr& op, const GraphContext::CPtr& context) : Node(op, context, SnippetShapeInferFactory(op)) { host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ? dnnl::impl::cpu::x64::avx512_core : dnnl::impl::cpu::x64::avx2; - original_snippet = ov::as_type_ptr(op); - if (!original_snippet) { - IE_THROW(NotImplemented) << "Node is not an instance of snippets::op::Subgraph"; - } - init_body_hash(); - is_dynamic = isDynamicNgraphNode(op); -} + const auto& tmp_snippet = ov::as_type_ptr(op); + OPENVINO_ASSERT(tmp_snippet, "Attempt to create Snippet node from an invalid op type"); + snippetAttrs.snippet = tmp_snippet->clone(); + snippetAttrs.bodyHash = get_body_hash(tmp_snippet); -void Snippet::copy_snippet() const { - ov::OutputVector subgraph_node_inputs; - for (const auto &input : original_snippet->input_values()) { - auto new_input = std::make_shared(input.get_element_type(), input.get_partial_shape()); - subgraph_node_inputs.push_back(new_input); - } - std::shared_ptr new_body = original_snippet->body_ptr()->clone(); - snippetAttrs.snippet = std::make_shared(subgraph_node_inputs, new_body); - ov::copy_runtime_info(original_snippet, snippetAttrs.snippet); - snippetAttrs.snippet->set_friendly_name(original_snippet->get_friendly_name()); #if defined(OPENVINO_ARCH_X86_64) snippetAttrs.snippet->set_generator(std::make_shared(host_isa)); #else - IE_THROW(NotImplemented) << "CPU plugin: code-generation is not supported on non-x64 platforms"; + OPENVINO_THROW("CPU plugin: Snippets code-generator is not supported on non-x64 platforms"); #endif // OPENVINO_ARCH_X86_64 + + // Note: we have to update shapeInfer, so it uses the per-thread op::Subgraph copy + shapeInference = SnippetShapeInferFactory(snippetAttrs.snippet).makeShapeInfer(); + is_dynamic = isDynamicNgraphNode(op); } -void Snippet::init_body_hash() { +uint64_t Snippet::get_body_hash(const std::shared_ptr& snippet) { uint64_t seed = 0; ov::snippets::pass::Hash hash_function(seed); - hash_function.run_on_model(original_snippet->body_ptr()); - snippetAttrs.bodyHash = seed; + hash_function.run_on_model(snippet->body_ptr()); + return seed; } void Snippet::initSupportedPrimitiveDescriptors() { - copy_snippet(); if (!supportedPrimitiveDescriptors.empty()) return; @@ -315,16 +283,29 @@ void Snippet::selectOptimalPrimitiveDescriptor() { } void Snippet::initOptimalPrimitiveDescriptor() { + const auto isPlanar = [](const VectorDims& order ) { + for (size_t i = 0; i < order.size(); ++i) + if (order[i] != i) + return false; + return true; + }; Node::initOptimalPrimitiveDescriptor(); // memory order and precision is determined now, there is no need to prepare for each dynamic shapes. const auto config = getSelectedPrimitiveDescriptor()->getConfig(); inputNum = config.inConfs.size(); + snippets::op::Subgraph::BlockedShapeVector in_blocked_shapes; snippetAttrs.inMemPrecs.resize(inputNum); snippetAttrs.inMemOrders.resize(inputNum); + in_blocked_shapes.reserve(inputNum); + snippetAttrs.has_non_planar_inputs = false; for (size_t i = 0; i < inputNum; i++) { const auto& memDesc = config.inConfs[i].getMemDesc(); snippetAttrs.inMemPrecs[i] = memDesc->getPrecision(); - snippetAttrs.inMemOrders[i] = memDesc->as()->getOrder(); + const auto& blockedDesc = memDesc->as(); + const auto& order = blockedDesc->getOrder(); + snippetAttrs.inMemOrders[i] = order; + snippetAttrs.has_non_planar_inputs |= !isPlanar(order); + in_blocked_shapes.emplace_back(blockedDesc->getBlockDims(), order); } outputNum = config.outConfs.size(); snippetAttrs.outMemPrecs.resize(outputNum); @@ -338,6 +319,52 @@ void Snippet::initOptimalPrimitiveDescriptor() { snippetAttrs.outMemBlockedDims.resize(outputNum); srcMemPtrs.resize(inputNum); dstMemPtrs.resize(outputNum); + + // here we should perform all shape-agnostic snippets passes + // * canonicalization (RankNormalization insert) + // * precision propagation & align element types + // * data flow optimizations + // The result of these transformations will be reused by all shapes + using Manager = snippets::pass::Manager; + std::vector backend_passes; +#if defined(OPENVINO_ARCH_X86_64) + using PassPosition = snippets::pass::Manager::PassPosition; + using Place = snippets::pass::Manager::PassPosition::Place; +# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) \ + backend_passes.emplace_back(PASS_POS, std::make_shared(__VA_ARGS__)) +#else +# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) +#endif // OPENVINO_ARCH_X86_64 + + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ConvertToSwishCPU); + if (context->getConfig().inferencePrecision == ov::element::bf16 && snippetAttrs.snippet->has_domain_sensitive_ops()) { + // enforce BF16 precisions to supported operations + // MatMul has to be decomposed to Brgemm operations before enforcement + // Note, MatMul decomposition will be run later again for case if BF16 enforcement is not happened + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ov::snippets::pass::MatMulToBrgemm); + SNIPPETS_REGISTER_PASS(PassPosition(Place::After, "MatMulToBrgemm"), pass::EnforcePrecision, element::f32, element::bf16); + } + + SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::BrgemmToBrgemmCPU); + SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::SetBrgemmCPUBlockingParams); + + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::RemoveConverts); + SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::MulAddToFMA); + +#undef SNIPPETS_REGISTER_PASS + + std::vector input_precisions; + std::vector output_precisions; + input_precisions.reserve(inputNum); + for (const auto& p : snippetAttrs.inMemPrecs) { + input_precisions.push_back(InferenceEngine::details::convertPrecision(p)); + } + output_precisions.reserve(outputNum); + for (const auto& p : snippetAttrs.outMemPrecs) + output_precisions.push_back(InferenceEngine::details::convertPrecision(p)); + + snippetAttrs.snippet->data_flow_transformations(in_blocked_shapes, input_precisions, output_precisions, backend_passes); + snippetAttrs.snippet->convert_body_to_linear_ir(std::make_shared()); } InferenceEngine::Precision Snippet::getRuntimePrecision() const { @@ -361,9 +388,8 @@ void Snippet::prepareParams() { SnippetKey key = {snippetAttrs}; auto builder = [this](const SnippetKey& key) -> std::shared_ptr { - std::shared_ptr executor = std::make_shared(key.attrs, is_canonicalized, - is_dynamic, context->getConfig().inferencePrecision == ov::element::bf16); - is_canonicalized = true; + std::shared_ptr executor = + std::make_shared(key.attrs, is_dynamic, context->getConfig().inferencePrecision == ov::element::bf16); return executor; }; @@ -426,15 +452,17 @@ void Snippet::executeDynamicImpl(dnnl::stream strm) { } void Snippet::SnippetJitExecutor::exec(const std::vector& inMemPtrs, const std::vector& outMemPtrs) { - if (schedule.ptr == nullptr) { + if (schedule.lowering_result.compiled_snippet->empty()) { IE_THROW() << "Snippet can't use Optimized implementation and can't fallback to reference"; } auto initStartMemoryOffsets = [this, &inMemPtrs, &outMemPtrs]() { for (size_t i = 0; i < numInput; i++) { - start_offset_in[i] = inMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i]; + start_offset_in[i] = + static_cast(inMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i]); } for (size_t i = 0; i < numOutput; i++) { - start_offset_out[i] = outMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i + numInput]; + start_offset_out[i] = + static_cast(outMemPtrs[i]->getDescWithType()->getOffsetPadding() * dataSize[i + numInput]); } }; // initialize start offsets to src and dst memory @@ -465,13 +493,13 @@ void Snippet::SnippetJitExecutor::update_ptrs(jit_snippets_call_args& call_args, void Snippet::SnippetJitExecutor::schedule_6d(const std::vector& inMemPtrs, const std::vector& outMemPtrs) { const auto& dom = parallel_exec_domain; // < N, C, H, W > < 1, 1, N, C*H*W> + const auto& callable = schedule.get_callable(); parallel_for5d(dom[0], dom[1], dom[2], dom[3], dom[4], [&](int64_t d0, int64_t d1, int64_t d2, int64_t d3, int64_t d4) { int64_t indexes[] = {d0, d1, d2, d3, d4}; jit_snippets_call_args call_args; update_ptrs(call_args, inMemPtrs, outMemPtrs); - - schedule.get_callable()(indexes, &call_args); + callable(indexes, &call_args); }); } @@ -487,8 +515,8 @@ void Snippet::SnippetJitExecutor::schedule_nt(const std::vector& inMe std::vector indexes(work_size.size() - 1, 0); for (size_t iwork = start; iwork < end; ++iwork) { size_t tmp = iwork; - for (ptrdiff_t j = work_size.size() - 2; j >= 0; j--) { - indexes[j] = tmp % work_size[j]; + for (ptrdiff_t j = static_cast(work_size.size()) - 2; j >= 0; j--) { + indexes[j] = static_cast(tmp % work_size[j]); tmp /= work_size[j]; } @@ -497,49 +525,25 @@ void Snippet::SnippetJitExecutor::schedule_nt(const std::vector& inMe }); } -Snippet::SnippetExecutor::SnippetExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16) - : snippetAttrs(attrs), is_canonicalized(is_canonicalized), is_dynamic(is_dynamic), enforceBF16(enforceBF16) {} +Snippet::SnippetExecutor::SnippetExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16) + : snippetAttrs(std::move(attrs)), is_dynamic(is_dynamic), enforceBF16(enforceBF16) {} -Snippet::SnippetJitExecutor::SnippetJitExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16) : - SnippetExecutor(attrs, is_canonicalized, is_dynamic, enforceBF16) { +Snippet::SnippetJitExecutor::SnippetJitExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16) : + SnippetExecutor(std::move(attrs), is_dynamic, enforceBF16) { numInput = snippetAttrs.inMemBlockedDims.size(); numOutput = snippetAttrs.outMemBlockedDims.size(); start_offset_in.resize(numInput); start_offset_out.resize(numOutput); - auto local_copy = [this]() { - ov::OutputVector subgraph_node_inputs; - for (size_t i = 0; i < numInput; i++) { - const auto paramShape = snippetAttrs.snippet->body_ptr()->get_parameters()[i]->get_shape(); - const auto paramType = snippetAttrs.snippet->body_ptr()->get_parameters()[i]->get_element_type(); - auto new_input = std::make_shared(paramType, paramShape); - subgraph_node_inputs.push_back(new_input); - } - std::shared_ptr new_body = snippetAttrs.snippet->body_ptr()->clone(); - - snippet_for_generation = std::make_shared(subgraph_node_inputs, new_body); - ov::copy_runtime_info(snippetAttrs.snippet, snippet_for_generation); - snippet_for_generation->set_friendly_name(snippetAttrs.snippet->get_friendly_name()); -#if defined(OPENVINO_ARCH_X86_64) - auto host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) - ? dnnl::impl::cpu::x64::avx512_core - : dnnl::impl::cpu::x64::avx2; - snippet_for_generation->set_generator(std::make_shared(host_isa)); -#else - IE_THROW(NotImplemented) << "CPU plugin: code-generation is not supported on non-x64 platforms"; -#endif // OPENVINO_ARCH_X86_64 - }; - // is_canonicalized is ture means just reshape canonicalized graph with new input shapes, and get updated master shape, - // false means canonicalization, determine master_shape on snippetAttrs.snippet. - ov::PartialShape canonicalShape = canonicalizeBody(is_canonicalized); - - if (is_dynamic) { - // we need a local snippets for generation, which will be adjusted based on input shapes possibily. - // The adjustment may be not compatible with new input shape in dynamic node, such as broadcastMove inserted. - local_copy(); - } else { - snippet_for_generation = snippetAttrs.snippet; + // todo: snippets don't support backend-provided blocking, so we need to reshape body + // using blocked shapes first. This can be removed after [121670] + if (snippetAttrs.has_non_planar_inputs) { + std::vector in_shapes; + for (const auto& s : snippetAttrs.inMemBlockedDims) + in_shapes.emplace_back(s); + snippetAttrs.snippet->shape_infer(in_shapes); } + const VectorDims& canonicalShape = snippetAttrs.snippet->infer_master_shape(); // initialize by maximum output dimension. Dimensions of outputs should be broadcastable tensorRank = std::max(static_cast(rank6D), canonicalShape.size()); @@ -552,85 +556,39 @@ Snippet::SnippetJitExecutor::SnippetJitExecutor(const SnippetAttrs& attrs, bool }; initDataSizes(); - if (canonicalShape.is_dynamic()) + if (std::any_of(canonicalShape.begin(), canonicalShape.end(), + [](size_t x){return x == snippets::IShapeInferSnippets::DYNAMIC_DIMENSION;})) IE_THROW() << "Snippets: Canonicalization returned dynamic shape in static pipeline"; - snippet_for_generation->set_min_parallel_work_amount(static_cast(parallel_get_max_threads())); + snippetAttrs.snippet->set_min_parallel_work_amount(static_cast(parallel_get_max_threads())); // Note: minimal JIT work amount is a predefined value that describes the number of kernel iterations (work amount) // needed to cover kernel call overhead. It is used for balancing between parallel and JIT work amounts in domain optimization. - snippet_for_generation->set_min_jit_work_amount(256); + snippetAttrs.snippet->set_min_jit_work_amount(256); // generate jit_snippets_compile_args jcp; jcp.parallel_executor_ndims = tensorRank; generate(&jcp); - buffer_scratchpad_size = snippet_for_generation->get_buffer_scratchpad_size(); + buffer_scratchpad_size = schedule.lowering_result.buffer_scratchpad_size; buffer_scratchpad.resize(buffer_scratchpad_size * parallel_get_max_threads(), 0); parallel_exec_domain = schedule.parallel_exec_domain; harnessWorkAmount = std::accumulate(parallel_exec_domain.begin(), parallel_exec_domain.end(), 1, std::multiplies()); parallel_exec_domain = getNormalizedDimsBySize(parallel_exec_domain, tensorRank); } -ov::PartialShape Snippet::SnippetJitExecutor::canonicalizeBody(bool reshape) { - ov::snippets::op::Subgraph::BlockedShapeVector input_blocked_shapes = getBlockedShapes( - snippetAttrs.inMemBlockedDims, snippetAttrs.inMemOrders, snippetAttrs.inMemPrecs); - if (reshape) { - const auto& canonicalShape = snippetAttrs.snippet->canonicalized_body_shape_infer(input_blocked_shapes); - return canonicalShape; - } else { - ov::snippets::op::Subgraph::BlockedShapeVector output_blocked_shapes = getBlockedShapes( - snippetAttrs.outMemBlockedDims, snippetAttrs.outMemOrders, snippetAttrs.outMemPrecs); - - const auto& canonicalShape = snippetAttrs.snippet->canonicalize(output_blocked_shapes, input_blocked_shapes); - return canonicalShape; - } -} - void Snippet::SnippetJitExecutor::generate(const jit_snippets_compile_args* jcp) { - using Manager = snippets::pass::Manager; - std::vector backend_passes; -#if defined(OPENVINO_ARCH_X86_64) - using PassPosition = snippets::pass::Manager::PassPosition; - using Place = snippets::pass::Manager::PassPosition::Place; -# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) \ - backend_passes.emplace_back(PASS_POS, std::make_shared(__VA_ARGS__)) -#else -# define SNIPPETS_REGISTER_PASS(PASS_POS, PASS, ...) -#endif // OPENVINO_ARCH_X86_64 - - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ConvertToSwishCPU); - if (enforceBF16 && snippet_for_generation->has_domain_sensitive_ops()) { - // enforce BF16 precisions to supported operations - // MatMul has to be decomposed to Brgemm operations before enforcement - // Note, MatMul decomposition will be run later again for case if BF16 enforcement is not happened - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineStart), ov::snippets::pass::MatMulToBrgemm); - SNIPPETS_REGISTER_PASS(PassPosition(Place::After, "MatMulToBrgemm"), pass::EnforcePrecision, element::f32, element::bf16); - } - - SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::BrgemmToBrgemmCPU); - SNIPPETS_REGISTER_PASS(PassPosition(Place::Before, "PropagatePrecision"), ov::intel_cpu::pass::SetBrgemmCPUBlockingParams); - - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::RemoveConverts); - SNIPPETS_REGISTER_PASS(PassPosition(Place::PipelineEnd), ov::intel_cpu::pass::MulAddToFMA); - -#undef SNIPPETS_REGISTER_PASS - ov::snippets::lowered::pass::PassPipeline control_flow_markup_pipeline; CPU_REGISTER_PASS_X64(control_flow_markup_pipeline, ov::intel_cpu::pass::BrgemmBlocking) ov::snippets::lowered::pass::PassPipeline control_flow_pipeline; CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::FuseLoadStoreConvert) CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::SetBrgemmCopyBBuffersShape); - // Note: we need to pass valid shapeInfer factory to generate, so it can be used in OptimizeDomain pass - // in all other cases nGraph shape inference will be used until ticket # 113209 (PR 18563) is merged - schedule = snippet_for_generation->generate(backend_passes, - control_flow_markup_pipeline, - control_flow_pipeline, - std::make_shared(), - reinterpret_cast(jcp)); + schedule = snippetAttrs.snippet->generate_from_linear_ir(control_flow_markup_pipeline, + control_flow_pipeline, + reinterpret_cast(jcp)); } bool Snippet::SnippetJitExecutor::schedule_created() { - return schedule.ptr != nullptr; + return !schedule.lowering_result.compiled_snippet->empty(); } } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.h b/src/plugins/intel_cpu/src/nodes/subgraph.h index 086e84e15ba631..0979aeee807d8f 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.h +++ b/src/plugins/intel_cpu/src/nodes/subgraph.h @@ -48,31 +48,24 @@ class Snippet : public Node { // Local copy of subgraph node for canonization & code generation std::shared_ptr snippet; uint64_t bodyHash; - std::vector> inMemBlockedDims; - std::vector> inMemOrders; + std::vector inMemBlockedDims; + std::vector inMemOrders; std::vector inMemPrecs; - std::vector> outMemBlockedDims; - std::vector> outMemOrders; + std::vector outMemBlockedDims; + std::vector outMemOrders; std::vector outMemPrecs; + // todo: used flag if we need extra shape infer, can be removed after [121670] + bool has_non_planar_inputs; }; private: - static const size_t rank6D {6}; - typedef void (*kernel)(const void *, const void *); - // Create a deep local copy of the input snippet to perform canonicalization & code generation - // TODO: Probably better to implement a proper copy constructor - void copy_snippet() const; - void init_body_hash(); + static uint64_t get_body_hash(const std::shared_ptr& snippet); size_t inputNum = 0; size_t outputNum = 0; - // Original subgraph node - std::shared_ptr original_snippet; - mutable std::shared_ptr local_snippet; - // Holds ISA version used is codeGeneration target dnnl::impl::cpu::x64::cpu_isa_t host_isa; @@ -80,18 +73,17 @@ class Snippet : public Node { std::vector dstMemPtrs = {}; mutable SnippetAttrs snippetAttrs; - mutable bool is_canonicalized = false; bool is_dynamic = false; class SnippetExecutor { public: - SnippetExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16); + SnippetExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16); virtual void exec(const std::vector& inMemPtrs, const std::vector& outMemPtrs) = 0; virtual ~SnippetExecutor() = default; + std::shared_ptr shapeInference = nullptr; protected: SnippetAttrs snippetAttrs; - bool is_canonicalized = false; bool is_dynamic = false; bool enforceBF16 = false; }; @@ -100,7 +92,7 @@ class Snippet : public Node { class SnippetJitExecutor : public SnippetExecutor { public: - SnippetJitExecutor(const SnippetAttrs& attrs, bool is_canonicalized, bool is_dynamic, bool enforceBF16); + SnippetJitExecutor(SnippetAttrs attrs, bool is_dynamic, bool enforceBF16); void exec(const std::vector& inMemPtrs, const std::vector& outMemPtrs) override; bool schedule_created(); @@ -113,16 +105,12 @@ class Snippet : public Node { size_t numInput = 0; size_t numOutput = 0; - ov::PartialShape canonicalizeBody(bool reshape); - void generate(const jit_snippets_compile_args*); inline void update_ptrs(jit_snippets_call_args&, const std::vector& inMemPtrs, const std::vector& outMemPtrs); // Evaluates generated snippet using parallel backend void schedule_6d(const std::vector& inMemPtrs, const std::vector& outMemPtrs); void schedule_nt(const std::vector& inMemPtrs, const std::vector& outMemPtrs); - std::shared_ptr snippet_for_generation; - // Holds generated snippet with information about how to schedule it snippets::Schedule schedule; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp index 64db193d7773f1..bae9b2b1dd43cc 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.cpp @@ -43,3 +43,8 @@ void FusedMulAdd::validate_and_infer_types() { } set_output_type(0, element_type, pshape); } + +const ov::op::AutoBroadcastSpec& FusedMulAdd::get_autob() const { + static ov::op::AutoBroadcastSpec autob_spec(ov::op::AutoBroadcastType::NUMPY); + return autob_spec; +} diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp index e55741986a2473..68a730c95aabd2 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/op/fused_mul_add.hpp @@ -24,6 +24,7 @@ class FusedMulAdd : public ngraph::op::Op { bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; void validate_and_infer_types() override; + const ov::op::AutoBroadcastSpec& get_autob() const override; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp index 45aaf9f4eeafdf..bd2f1aad832550 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp @@ -68,6 +68,9 @@ std::vector> inShapesAddPair { {{{}, {{1, 128, 9, 30}}}, {{}, {{1, 128, 1, 30}}}}, {{{}, {{1, 128, 9, 1}}}, {{}, {{1, 128, 1, 30}}}}, {{{}, {{1, 128, 9, 16}}}, {{}, {{1, 128, 9, 1}}}}, + // Test Canonicalization and Dimension collapsing + {{{}, {{2, 17, 3, 4}}}, {{}, {{1, 3, 4}}}}, + {{{}, {{2, 17, 3, 4}}}, {{}, {{1, 4}}}}, // DS {{{1, -1, {1, 10}, {1, 33}}, {{1, 128, 1, 1}, {1, 128, 1, 9}, {1, 128, 1, 17}, {1, 128, 1, 29}, {1, 128, 9, 1}, {1, 128, 1, 1}}}, {{{1, 1}, {128, 128}, {1, 10}, {1, 33}}, {{1, 128, 1, 1}, {1, 128, 1, 9}, {1, 128, 1, 17}, {1, 128, 1, 29}, {1, 128, 1, 30}, {1, 128, 1, 1}}}}, diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp index 7a6ef6e67c1d21..3a760050d0159a 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp @@ -6,8 +6,10 @@ #include #include #include +#include #include "snippets/op/scalar.hpp" #include "lowering_utils.hpp" +#include "common_test_utils/common_utils.hpp" #include "snippets/pass_manager.hpp" namespace ov { @@ -61,7 +63,7 @@ class EltwiseWithMulAddFunction : public SnippetsFunctionBase { ParameterVector parameters{data0, data1}; std::shared_ptr data2; if (scalar_input) { - data2 = std::make_shared(precision, Shape{}, 2.f); + data2 = std::make_shared(precision, Shape{1}, 2.f); } else { auto parameter = std::make_shared(precision, input_shapes[2]); parameters.push_back(parameter); @@ -110,8 +112,8 @@ class MulAddToFMATests : public LoweringTests, public testing::WithParamInterfac std::ostringstream result; for (size_t i = 0; i < inputShapes.size(); i++) - result << "IS[" << i << "]=" << inputShapes[i] << "_"; - result << "MS=" << master_shape << "_"; + result << "IS[" << i << "]=" << ov::test::utils::partialShape2str({inputShapes[i]}) << "_"; + result << "MS=" << ov::test::utils::partialShape2str({master_shape}) << "_"; result << "add_input_idx=" << add_input_idx; return result.str(); } @@ -146,7 +148,8 @@ TEST_P(MulAddToFMATests, MulAddToFMATests) { backend_passes, {}, {}, - generator); + generator, + std::make_shared()); model = subgraph->body_ptr(); model_ref = snippets_model->getLowered(); } From d490ab68d1b98d46cda93e33df3376943eeade9c Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 24 Oct 2023 13:31:03 +0800 Subject: [PATCH 012/275] [CPU][MLAS] FC uses L2 cache for thread partition (#20436) --- src/plugins/intel_cpu/thirdparty/mlas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/thirdparty/mlas b/src/plugins/intel_cpu/thirdparty/mlas index c7c8a631315000..f6425b13943348 160000 --- a/src/plugins/intel_cpu/thirdparty/mlas +++ b/src/plugins/intel_cpu/thirdparty/mlas @@ -1 +1 @@ -Subproject commit c7c8a631315000f17c650af34431009d2f22129c +Subproject commit f6425b1394334822390fcd9da12788c9cd0d11da From afda7ad70f5f61c83a70488aeea6ff0bbe713176 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Tue, 24 Oct 2023 07:44:26 +0200 Subject: [PATCH 013/275] [Transformations] FuseU4WeightsAndZeroPoint transformation (#20503) --- ...onvert_u4_weights_zero_point_to_scalar.hpp | 26 +++ ...onvert_u4_weights_zero_point_to_scalar.cpp | 80 +++++++ .../moc_transformations.cpp | 2 + .../src/transformations/utils/utils.cpp | 4 + ...onvert_u4_weights_zero_point_to_scalar.cpp | 208 ++++++++++++++++++ src/core/src/graph_util.cpp | 3 +- 6 files changed, 322 insertions(+), 1 deletion(-) create mode 100644 src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp create mode 100644 src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp create mode 100644 src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp new file mode 100644 index 00000000000000..0b8d31b404090e --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertU4WeightsZeroPointToScalar; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief Converts U4 weights zero point to scalar if all values are equal + */ +class ov::pass::ConvertU4WeightsZeroPointToScalar : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertU4WeightsZeroPointToScalar", "0"); + ConvertU4WeightsZeroPointToScalar(); +}; diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp new file mode 100644 index 00000000000000..6313db127ac406 --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/reference/autobroadcast_binop.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertU4WeightsZeroPointToScalar::ConvertU4WeightsZeroPointToScalar() { + MATCHER_SCOPE(ConvertU4WeightsZeroPointToScalar); + auto weights_m = pattern::wrap_type(pattern::type_matches(ov::element::u4)); + auto convert_m = pattern::wrap_type({weights_m}, pattern::consumers_count(1)); + + auto float_zp_predicate = [](ov::Output output) -> bool { + return pattern::type_matches_any({ov::element::f32, ov::element::f16})(output) && + pattern::consumers_count(1)(output); + }; + auto float_zero_point_m = pattern::wrap_type(float_zp_predicate); + + auto u4_zp_predicate = [](ov::Output output) -> bool { + return pattern::type_matches(ov::element::u4)(output) && pattern::consumers_count(1)(output); + }; + auto u4_zero_point_m = pattern::wrap_type(u4_zp_predicate); + auto zero_point_convert_m = pattern::wrap_type({u4_zero_point_m}, float_zp_predicate); + + auto zero_point_m = std::make_shared(OutputVector{float_zero_point_m, zero_point_convert_m}); + auto subtract_m = pattern::wrap_type({convert_m, zero_point_m}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + auto& pattern_map = m.get_pattern_value_map(); + auto weights = ov::as_type_ptr(pattern_map.at(weights_m).get_node_shared_ptr()); + std::shared_ptr zero_point; + if (pattern_map.count(float_zero_point_m)) { + const auto& float_zp = pattern_map.at(float_zero_point_m); + zero_point = ov::as_type_ptr(float_zp.get_node_shared_ptr()); + } else { + const auto& u4_zp = pattern_map.at(u4_zero_point_m); + zero_point = ov::as_type_ptr(u4_zp.get_node_shared_ptr()); + } + if (!weights || !zero_point) + return false; + // Due to the matcher specific and Subtract branches similarity, + // weights and zero_point might be mixed up with each other + if (ov::shape_size(weights->get_shape()) < ov::shape_size(zero_point->get_shape())) + std::swap(zero_point, weights); + + auto zero_point_shape = zero_point->get_shape(); + if (ov::shape_size(zero_point_shape) == 1) + return false; + + const auto& weights_shape = weights->get_shape(); + const size_t weights_rank = weights_shape.size(); + const size_t zero_point_rank = zero_point_shape.size(); + // Zero point constant can be converted into scalar only if this does not affect Subtract output shape + if (weights_rank < zero_point_rank) + return false; + + zero_point_shape.insert(zero_point_shape.begin(), weights_rank - zero_point_rank, 1); + for (size_t i = 0; i < weights_rank; ++i) { + if (zero_point_shape[i] > weights_shape[i]) + return false; + } + + float zp_value; + if (!ov::op::util::get_single_value(zero_point, zp_value)) + return false; + const auto new_zp = ov::op::v0::Constant::create(zero_point->get_element_type(), {}, {zp_value}); + return ov::replace_node_update_name(zero_point, new_zp); + }; + + auto m = std::make_shared(subtract_m, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 068e1f27a291e9..86746f176cad7f 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -21,6 +21,7 @@ #include "transformations/common_optimizations/conv_to_binary_conv.hpp" #include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" +#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp" #include "transformations/common_optimizations/convolution_to_group_convolution_fusion.hpp" #include "transformations/common_optimizations/depth_to_space_fusion.hpp" #include "transformations/common_optimizations/dilated_convolution_converter.hpp" @@ -212,6 +213,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr ADD_MATCHER(common_fusions, ShuffleChannelsFusion, !m_use_shapes) ADD_MATCHER(common_fusions, NonZeroHorizontalFusion) ADD_MATCHER(common_fusions, AdaptivePoolToReduce) + ADD_MATCHER(common_fusions, ConvertU4WeightsZeroPointToScalar) common_fusions->set_name("ov::pass::CommonFusions"); REGISTER_PASS(manager, BinarizeWeights) diff --git a/src/common/transformations/src/transformations/utils/utils.cpp b/src/common/transformations/src/transformations/utils/utils.cpp index 62b1765e7ba275..b7cde395a66eb5 100644 --- a/src/common/transformations/src/transformations/utils/utils.cpp +++ b/src/common/transformations/src/transformations/utils/utils.cpp @@ -31,6 +31,8 @@ bool get_single_value(const std::shared_ptr& const_node, float return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::f64: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); + case element::Type_t::i4: + return util::normalize_single_value(const_node->cast_vector(), value, check_value_range); case element::Type_t::i8: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::i16: @@ -39,6 +41,8 @@ bool get_single_value(const std::shared_ptr& const_node, float return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::i64: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); + case element::Type_t::u4: + return util::normalize_single_value(const_node->cast_vector(), value, check_value_range); case element::Type_t::u8: return util::normalize_single_value(const_node->get_vector(), value, check_value_range); case element::Type_t::u16: diff --git a/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp new file mode 100644 index 00000000000000..8fc896065e9001 --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp @@ -0,0 +1,208 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/convert_u4_weights_zero_point_to_scalar.hpp" + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/pass/manager.hpp" + +using namespace testing; +using namespace ov; + +TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalar) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + { + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8.1f}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); + } + { + ov::Shape scalar_shape{}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, scalar_shape, {8.1f}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model_ref = std::make_shared(NodeVector{multiply}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::ACCURACY); + comparator.enable(FunctionsComparator::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalar) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + { + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); + } + { + ov::Shape scalar_shape{}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, scalar_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model_ref = std::make_shared(NodeVector{multiply}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::ACCURACY); + comparator.enable(FunctionsComparator::CONST_VALUES); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalarWeightsWithBiggerRank) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{64}; + { + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); + } + { + ov::Shape scalar_shape{}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, scalar_shape, {8}); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model_ref = std::make_shared(NodeVector{multiply}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::ACCURACY); + comparator.enable(FunctionsComparator::CONST_VALUES); +} + +TEST_F(TransformationTestsF, FuseU4WeightsAndZeroPointNotScalarLikeZP) { + auto weights_precision = ov::element::u8; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + std::vector zero_point_values(ov::shape_size(decompression_shape), 8); + zero_point_values.back() = 6; + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, zero_point_values); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, FuseU4WeightsAndZeroPointNotU4Weights) { + auto weights_precision = ov::element::u8; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsFloatZeroPointToScalarAdditionalZPConsumer) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {8}); + auto zero_point_consumer = std::make_shared(zero_point); + auto subtract = std::make_shared(convert, zero_point); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_consumer}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarAdditionalZPConsumer) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_consumer = std::make_shared(zero_point); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_consumer}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarAdditionalZPConvertConsumer) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto zero_point_convert_consumer = std::make_shared(zero_point_convert); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_convert_consumer}, ParameterVector{}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertU4WeightsU4ZeroPointToScalarZPWithBiggerRank) { + auto weights_precision = ov::element::u4; + auto decompression_precision = ov::element::f32; + ov::Shape weights_shape{32, 128, 64}; + ov::Shape decompression_shape{1, 32, 1, 64}; + auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); + auto convert = std::make_shared(weights, decompression_precision); + auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, {8}); + auto zero_point_convert = std::make_shared(zero_point, decompression_precision); + auto zero_point_convert_consumer = std::make_shared(zero_point_convert); + auto subtract = std::make_shared(convert, zero_point_convert); + auto scale = ov::op::v0::Constant::create(decompression_precision, decompression_shape, {3.f}); + auto multiply = std::make_shared(subtract, scale); + model = std::make_shared(NodeVector{multiply, zero_point_convert_consumer}, ParameterVector{}); + manager.register_pass(); +} diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp index 8001678dab2601..4c6a4d0f33e516 100644 --- a/src/core/src/graph_util.cpp +++ b/src/core/src/graph_util.cpp @@ -319,7 +319,8 @@ bool replace_output_update_name(Output output, const Output& replace bool replace_node_update_name(const std::shared_ptr& target, const std::shared_ptr& replacement) { for (auto& output : target->output(0).get_target_inputs()) { - if (ov::as_type(replacement->input_value(0).get_node()) && + if (replacement->get_input_size() > 0 && + ov::as_type(replacement->input_value(0).get_node()) && ov::as_type(output.get_node())) { return false; } From 84a0994ec598aec8dc4c1622d40666a6e7af8c0d Mon Sep 17 00:00:00 2001 From: Yuan Hu Date: Tue, 24 Oct 2023 13:59:08 +0800 Subject: [PATCH 014/275] [core] fix memory leak issue imported by #18868 (#19832) * try to fix memory leak issue cpustreamer is released, but there are still thread id in t_stream_count_map * fix threadlocal affect all threads Signed-off-by: HU Yuan2 * add comment for local() function to avoid mistaken modification in the future Signed-off-by: HU Yuan2 * use custom stread id Signed-off-by: HU Yuan2 * fix review comments Signed-off-by: HU Yuan2 * fix format issue Signed-off-by: HU Yuan2 * create shared_ptr before assert Signed-off-by: HU Yuan2 --------- Signed-off-by: HU Yuan2 --- .../dev/threading/cpu_streams_executor.cpp | 63 ++++++++++++++++--- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp index e61893e132dfeb..691a3951615460 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp @@ -4,6 +4,7 @@ #include "openvino/runtime/threading/cpu_streams_executor.hpp" +#include #include #include #include @@ -22,8 +23,6 @@ namespace ov { namespace threading { -// maybe there are two CPUStreamsExecutors in the same thread. -thread_local std::map> t_stream_count_map; struct CPUStreamsExecutor::Impl { struct Stream { #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO @@ -335,12 +334,58 @@ struct CPUStreamsExecutor::Impl { // will be counted by thread_local t_stream_count_map. // when the customer's thread is destoryed, the stream's count will became 1, // Call local() will reuse one of them, and release others. + // it's only a workaround for ticket CVS-111490, please be carefully when need to modify + // CustomeThreadLocal::local(), especially like operations that will affect the count of + // CustomThreadLocal::ThreadId class CustomThreadLocal : public ThreadLocal> { + class ThreadTracker { + public: + explicit ThreadTracker(const std::thread::id& id) + : _id(id), + _count_ptr(std::make_shared(1)) {} + ~ThreadTracker() { + _count_ptr->fetch_sub(1); + } + std::shared_ptr fetch() { + auto new_ptr = std::shared_ptr(new ThreadTracker(*this)); + auto pre_valule = new_ptr.get()->_count_ptr->fetch_add(1); + OPENVINO_ASSERT(pre_valule == 1, "this value must be 1, please check code CustomThreadLocal::local()"); + return new_ptr; + } + const std::thread::id& get_id() const { + return _id; + } + int count() const { + return *(_count_ptr.get()); + } + + private: + // disable all copy and move semantics, user only can use fetch() + // to create a new instance with a shared count num; + ThreadTracker(ThreadTracker const&) = default; + ThreadTracker(ThreadTracker&&) = delete; + ThreadTracker& operator=(ThreadTracker const&) = delete; + ThreadTracker& operator=(ThreadTracker&&) = delete; + std::thread::id _id; + std::shared_ptr _count_ptr; + }; + public: CustomThreadLocal(std::function()> callback_construct, Impl* impl) : ThreadLocal>(callback_construct), _impl(impl) {} std::shared_ptr local() { + // maybe there are two CPUStreamsExecutors in the same thread. + static thread_local std::map> t_stream_count_map; + // fix the memory leak issue that CPUStreamsExecutor is already released, + // but still exists CustomThreadLocal::ThreadTracker in t_stream_count_map + for (auto it = t_stream_count_map.begin(); it != t_stream_count_map.end();) { + if (this != it->first && it->second->count() == 1) { + t_stream_count_map.erase(it++); + } else { + it++; + } + } auto id = std::this_thread::get_id(); auto search = _thread_ids.find(id); if (search != _thread_ids.end()) { @@ -348,14 +393,13 @@ struct CPUStreamsExecutor::Impl { } std::lock_guard guard(_stream_map_mutex); for (auto& item : _stream_map) { - if (*(item.first.get()) == id) { - t_stream_count_map[(void*)this] = item.first; + if (item.first->get_id() == id) { return item.second; } } std::shared_ptr stream = nullptr; for (auto it = _stream_map.begin(); it != _stream_map.end();) { - if (it->first.use_count() == 1) { + if (it->first->count() == 1) { if (stream == nullptr) { stream = it->second; } @@ -367,9 +411,10 @@ struct CPUStreamsExecutor::Impl { if (stream == nullptr) { stream = std::make_shared(_impl); } - auto id_ptr = std::make_shared(id); - t_stream_count_map[(void*)this] = id_ptr; - _stream_map[id_ptr] = stream; + auto tracker_ptr = std::make_shared(id); + t_stream_count_map[(void*)this] = tracker_ptr; + auto new_tracker_ptr = tracker_ptr->fetch(); + _stream_map[new_tracker_ptr] = stream; return stream; } @@ -382,7 +427,7 @@ struct CPUStreamsExecutor::Impl { private: std::set _thread_ids; Impl* _impl; - std::map, std::shared_ptr> _stream_map; + std::map, std::shared_ptr> _stream_map; std::mutex _stream_map_mutex; }; From 7ceff55b71910f021bff5d2b58ae8a32a2e7d60f Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 24 Oct 2023 10:13:23 +0400 Subject: [PATCH 015/275] Add AlignedBuffer to OpenVINO developer API (#20532) * Add AlignedBuffer to OpenVINO developer API * Fixed build * Fixed code style and remove opset deprecation * Fixed Windows build * Fixed GNA * Fixed comment --- .../python/src/pyopenvino/core/common.cpp | 5 +- src/common/snippets/src/pass/hash.cpp | 12 +++ .../openvino/runtime/aligned_buffer.hpp | 75 +++++++++++++++++++ .../openvino/runtime/shared_buffer.hpp | 31 ++++++++ .../include/ngraph/op/util/op_annotations.hpp | 1 - .../include/ngraph/op/util/slice_plan.hpp | 1 + src/core/include/ngraph/opsets/opset.hpp | 31 ++++---- src/core/include/openvino/op/constant.hpp | 43 +++++------ src/core/reference/src/op/strided_slice.cpp | 8 +- src/core/src/op/constant.cpp | 36 ++++++--- src/core/src/pass/serialize.cpp | 14 ++++ src/core/src/runtime/aligned_buffer.cpp | 51 +++++++++++++ src/core/tests/aligned_buffer.cpp | 15 ++-- src/core/tests/bfloat16.cpp | 30 ++------ src/core/tests/constant.cpp | 14 ++-- src/core/tests/visitors/visitors.hpp | 19 ++--- src/frontends/ir/src/frontend.cpp | 23 +++--- src/frontends/ir/src/input_model.cpp | 14 ++-- src/frontends/ir/src/input_model.hpp | 6 +- src/frontends/ir/src/ir_deserializer.cpp | 32 +++----- src/frontends/ir/src/ir_deserializer.hpp | 18 ++--- .../onnx/frontend/src/core/tensor.hpp | 5 +- .../src/utils/tensor_external_data.cpp | 14 ++-- .../src/utils/tensor_external_data.hpp | 9 +-- .../tensorflow/src/op/var_handle.cpp | 10 +-- src/inference/src/ie_network_reader.cpp | 5 +- src/inference/src/model_reader.cpp | 10 ++- .../src/convert_function_to_cnn_network.cpp | 6 ++ .../common_test_utils/graph_comparator.hpp | 5 +- .../src/graph_comparator.cpp | 12 +-- 30 files changed, 351 insertions(+), 204 deletions(-) create mode 100644 src/core/dev_api/openvino/runtime/aligned_buffer.hpp create mode 100644 src/core/dev_api/openvino/runtime/shared_buffer.hpp diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index b78d3ea4c37bff..7b473929a63396 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -8,6 +8,7 @@ #include "Python.h" #include "openvino/core/except.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/common_util.hpp" #define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_ @@ -170,13 +171,12 @@ ov::op::v0::Constant create_copied(ov::Tensor& tensor) { return ov::op::v0::Constant(tensor.get_element_type(), tensor.get_shape(), const_cast(tensor.data())); } -OPENVINO_SUPPRESS_DEPRECATED_START template <> ov::op::v0::Constant create_shared(py::array& array) { // Check if passed array has C-style contiguous memory layout. // If memory is going to be shared it needs to be contiguous before passing to the constructor. if (array_helpers::is_contiguous(array)) { - auto memory = std::make_shared>( + auto memory = std::make_shared>( static_cast(array.ndim() == 0 ? array.mutable_data() : array.mutable_data(0)), array.ndim() == 0 ? array.itemsize() : array.nbytes(), array); @@ -185,7 +185,6 @@ ov::op::v0::Constant create_shared(py::array& array) { // If passed array is not C-style, throw an error. OPENVINO_THROW("SHARED MEMORY MODE FOR THIS CONSTANT IS NOT APPLICABLE! Passed numpy array must be C contiguous."); } -OPENVINO_SUPPRESS_DEPRECATED_END template <> ov::op::v0::Constant create_shared(ov::Tensor& tensor) { diff --git a/src/common/snippets/src/pass/hash.cpp b/src/common/snippets/src/pass/hash.cpp index 2f975ef2cbccee..cea21e37e861cf 100644 --- a/src/common/snippets/src/pass/hash.cpp +++ b/src/common/snippets/src/pass/hash.cpp @@ -15,6 +15,7 @@ #include "openvino/core/model.hpp" #include "openvino/op/util/framework_node.hpp" #include "openvino/opsets/opset1.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "transformations/rt_info/primitives_priority_attribute.hpp" namespace ov { @@ -180,6 +181,17 @@ class SnippetsHasher : public ov::AttributeVisitor { m_hash = hash_combine(m_hash, data[i]); } } + } else if (const auto& a = + ov::as_type>>(&adapter)) { + if (name == "value" && m_node_type_name == "Constant") { + m_hash = hash_combine(m_hash, AttrType::constant); + const int64_t size = a->get()->size(); + m_hash = hash_combine(hash_combine(m_hash, AttrType::size), size); + auto data = static_cast(a->get()->get_ptr()); + for (int64_t i = 0; i < size; i++) { + m_hash = hash_combine(m_hash, data[i]); + } + } } else if (const auto& a = ov::as_type>(&adapter)) { const auto& attrs = a->get(); // Update node attributes in data field diff --git a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp new file mode 100644 index 00000000000000..7611744f7c4e58 --- /dev/null +++ b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp @@ -0,0 +1,75 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "openvino/core/attribute_adapter.hpp" +#include "openvino/core/core_visibility.hpp" + +namespace ov { +/// \brief Allocates a block of memory on the specified alignment. The actual size of the +/// allocated memory is larger than the requested size by the alignment, so allocating 1 +/// byte +/// on 64 byte alignment will allocate 65 bytes. +class OPENVINO_API AlignedBuffer { +public: + // Allocator objects and the allocation interfaces are owned by the + // creators of AlignedBuffers. They need to ensure that the lifetime of + // allocator exceeds the lifetime of this AlignedBuffer. + AlignedBuffer(size_t byte_size, size_t alignment = 64); + + AlignedBuffer(); + virtual ~AlignedBuffer(); + + AlignedBuffer(AlignedBuffer&& other); + AlignedBuffer& operator=(AlignedBuffer&& other); + + size_t size() const { + return m_byte_size; + } + void* get_ptr(size_t offset) const { + return m_aligned_buffer + offset; + } + void* get_ptr() { + return m_aligned_buffer; + } + const void* get_ptr() const { + return m_aligned_buffer; + } + template + T* get_ptr() { + return reinterpret_cast(m_aligned_buffer); + } + template + const T* get_ptr() const { + return reinterpret_cast(m_aligned_buffer); + } + + template + explicit operator T*() { + return get_ptr(); + } + +private: + AlignedBuffer(const AlignedBuffer&) = delete; + AlignedBuffer& operator=(const AlignedBuffer&) = delete; + +protected: + char* m_allocated_buffer; + char* m_aligned_buffer; + size_t m_byte_size; +}; + +template <> +class OPENVINO_API AttributeAdapter> + : public DirectValueAccessor> { +public: + AttributeAdapter(std::shared_ptr& value); + + OPENVINO_RTTI("AttributeAdapter"); +}; + +} // namespace ov diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp new file mode 100644 index 00000000000000..1b51bfa07b7ee3 --- /dev/null +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/aligned_buffer.hpp" + +namespace ov { + +/// \brief SharedBuffer class to store pointer to pre-acclocated buffer. +template +class SharedBuffer : public ov::AlignedBuffer { +public: + SharedBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) { + m_allocated_buffer = data; + m_aligned_buffer = data; + m_byte_size = size; + } + + virtual ~SharedBuffer() { + m_aligned_buffer = nullptr; + m_allocated_buffer = nullptr; + m_byte_size = 0; + } + +private: + T _shared_object; +}; + +} // namespace ov diff --git a/src/core/include/ngraph/op/util/op_annotations.hpp b/src/core/include/ngraph/op/util/op_annotations.hpp index dad2aa3b43431a..dec2879f9c837f 100644 --- a/src/core/include/ngraph/op/util/op_annotations.hpp +++ b/src/core/include/ngraph/op/util/op_annotations.hpp @@ -30,7 +30,6 @@ struct NGRAPH_API_DEPRECATED oi_pair { }; /// \brief Base class for annotations added to graph ops - class NGRAPH_API_DEPRECATED NGRAPH_API OpAnnotations { NGRAPH_SUPPRESS_DEPRECATED_START public: diff --git a/src/core/include/ngraph/op/util/slice_plan.hpp b/src/core/include/ngraph/op/util/slice_plan.hpp index a0f99cccaed30c..e47e4ecd80e4f0 100644 --- a/src/core/include/ngraph/op/util/slice_plan.hpp +++ b/src/core/include/ngraph/op/util/slice_plan.hpp @@ -38,6 +38,7 @@ namespace ngraph { // // A SlicePlan is used to collect parameters for these ops. // +// This class is moved to dev API struct NGRAPH_API_DEPRECATED NGRAPH_API SlicePlan { // Parameters for the Slice std::vector begins; diff --git a/src/core/include/ngraph/opsets/opset.hpp b/src/core/include/ngraph/opsets/opset.hpp index 26c21e237b16c3..3f65437c6d3801 100644 --- a/src/core/include/ngraph/opsets/opset.hpp +++ b/src/core/include/ngraph/opsets/opset.hpp @@ -31,9 +31,7 @@ namespace ngraph { class NGRAPH_API OpSet : public ov::OpSet { public: explicit OpSet(const ov::OpSet& opset); - NGRAPH_SUPPRESS_DEPRECATED_START OpSet(const ngraph::OpSet& opset); - NGRAPH_SUPPRESS_DEPRECATED_END OpSet() = default; /// \brief Insert an op into the opset with a particular name and factory void insert(const std::string& name, const NodeTypeInfo& type_info, FactoryRegistry::Factory factory) { @@ -56,19 +54,20 @@ class NGRAPH_API OpSet : public ov::OpSet { } }; -const NGRAPH_API OpSet& get_opset1(); -const NGRAPH_API OpSet& get_opset2(); -const NGRAPH_API OpSet& get_opset3(); -const NGRAPH_API OpSet& get_opset4(); -const NGRAPH_API OpSet& get_opset5(); -const NGRAPH_API OpSet& get_opset6(); -const NGRAPH_API OpSet& get_opset7(); -const NGRAPH_API OpSet& get_opset8(); -const NGRAPH_API OpSet& get_opset9(); -const NGRAPH_API OpSet& get_opset10(); -const NGRAPH_API OpSet& get_opset11(); -const NGRAPH_API OpSet& get_opset12(); -const NGRAPH_API OpSet& get_opset13(); -const NGRAPH_API std::map>& get_available_opsets(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset1(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset2(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset3(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset4(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset5(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset6(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset7(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset8(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset9(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset10(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset11(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset12(); +NGRAPH_API_DEPRECATED const NGRAPH_API OpSet& get_opset13(); +NGRAPH_API_DEPRECATED const NGRAPH_API std::map>& +get_available_opsets(); } // namespace ngraph NGRAPH_SUPPRESS_DEPRECATED_END diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index a482fd12bb4c68..100ed2f7e18d6f 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -12,7 +12,6 @@ # define WAS_OV_LIBRARY_DEFINED_CONSTANT #endif -#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/shared_buffer.hpp" @@ -21,11 +20,14 @@ # undef WAS_OV_LIBRARY_DEFINED_CONSTANT #endif #include "openvino/core/coordinate_diff.hpp" -#include "openvino/core/node.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/core/type/element_type_traits.hpp" +#include "openvino/op/op.hpp" namespace ov { + +class AlignedBuffer; + namespace op { namespace v0 { /// \brief Class for constants. @@ -177,13 +179,20 @@ class OPENVINO_API Constant : public Op { /// \param shape The shape of the tensor constant. /// \param data A pointer to pre-allocated shared data. template + OPENVINO_DEPRECATED("This constructor is deprecated and will be removed in 2024.0 release") Constant(const element::Type& type, const Shape& shape, std::shared_ptr> data) : m_element_type(type), m_shape(shape) { - m_data = data; + m_data = legacy_to_ov_aligned_buffer(data); constructor_validate_and_infer_types(); } OPENVINO_SUPPRESS_DEPRECATED_END + Constant(const element::Type& type, const Shape& shape, const std::shared_ptr& data) + : m_element_type(type), + m_shape(shape) { + m_data = data; + constructor_validate_and_infer_types(); + } Constant(const Constant& other); Constant(const Constant& other, const Shape& new_shape); @@ -241,11 +250,7 @@ class OPENVINO_API Constant : public Op { AxisSet get_axis_set_val() const; /// \brief Return data size in bytes - size_t get_byte_size() const { - OPENVINO_SUPPRESS_DEPRECATED_START - return m_data->size(); - OPENVINO_SUPPRESS_DEPRECATED_END - } + size_t get_byte_size() const; /// \brief Wrapper around constructing a shared_ptr of a Constant /// @@ -370,11 +375,8 @@ class OPENVINO_API Constant : public Op { return rc; } - const void* get_data_ptr() const { - OPENVINO_SUPPRESS_DEPRECATED_START - return (m_data ? m_data->get_ptr() : nullptr); - OPENVINO_SUPPRESS_DEPRECATED_END - } + const void* get_data_ptr() const; + template const T* get_data_ptr() const { OPENVINO_ASSERT(sizeof(T) <= m_element_type.size() || shape_size(m_shape) <= 0, "Buffer over-read"); @@ -406,6 +408,11 @@ class OPENVINO_API Constant : public Op { private: Constant(bool memset_allocation, const element::Type& type, const Shape& shape); + OPENVINO_SUPPRESS_DEPRECATED_START + std::shared_ptr legacy_to_ov_aligned_buffer( + const std::shared_ptr& buffer); + OPENVINO_SUPPRESS_DEPRECATED_END + template , typename std::enable_ifget_ptr() : nullptr); - OPENVINO_SUPPRESS_DEPRECATED_END - } + void* get_data_ptr_nc(); template typename element_type_traits::value_type* get_data_ptr_nc() { @@ -853,9 +856,7 @@ class OPENVINO_API Constant : public Op { element::Type m_element_type; Shape m_shape{}; - OPENVINO_SUPPRESS_DEPRECATED_START - std::shared_ptr m_data; - OPENVINO_SUPPRESS_DEPRECATED_END + std::shared_ptr m_data; mutable std::atomic_bool m_all_elements_bitwise_identical{false}; mutable std::atomic_bool m_all_elements_bitwise_identical_checked{false}; bool m_alloc_buffer_on_visit_attributes = true; diff --git a/src/core/reference/src/op/strided_slice.cpp b/src/core/reference/src/op/strided_slice.cpp index 6e83305e653059..cad9dee20d02be 100644 --- a/src/core/reference/src/op/strided_slice.cpp +++ b/src/core/reference/src/op/strided_slice.cpp @@ -8,10 +8,10 @@ #include -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/reference/reshape.hpp" #include "openvino/reference/reverse.hpp" #include "openvino/reference/slice.hpp" +#include "openvino/runtime/aligned_buffer.hpp" namespace ov { namespace reference { @@ -30,8 +30,7 @@ void strided_slice(const char* arg, return; } - OPENVINO_SUPPRESS_DEPRECATED_START - ngraph::runtime::AlignedBuffer slice_out_buffer(shape_size(sp.reshape_in_shape) * elem_type); + ov::AlignedBuffer slice_out_buffer(shape_size(sp.reshape_in_shape) * elem_type); slice(reinterpret_cast(arg), slice_out_buffer.get_ptr(), arg_shape, @@ -41,7 +40,7 @@ void strided_slice(const char* arg, sp.reshape_in_shape, elem_type); - ngraph::runtime::AlignedBuffer reshape_out_buffer(shape_size(sp.reshape_out_shape) * elem_type); + ov::AlignedBuffer reshape_out_buffer(shape_size(sp.reshape_out_shape) * elem_type); reshape(slice_out_buffer.get_ptr(), reshape_out_buffer.get_ptr(), sp.reshape_in_shape, elem_type); reverse(reshape_out_buffer.get_ptr(), @@ -50,7 +49,6 @@ void strided_slice(const char* arg, sp.reshape_out_shape, sp.reverse_axes, elem_type); - OPENVINO_SUPPRESS_DEPRECATED_END } } // namespace reference } // namespace ov diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index 27d9e000b64dec..2fe3d024fd9551 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -10,8 +10,10 @@ #include #include "itt.hpp" +#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/tensor.hpp" +#include "openvino/runtime/shared_buffer.hpp" template static inline std::string to_cpp_string(T value) { @@ -27,6 +29,14 @@ static inline std::string to_cpp_string(T value) { } return rc; } +OPENVINO_SUPPRESS_DEPRECATED_START +std::shared_ptr ov::op::v0::Constant::legacy_to_ov_aligned_buffer( + const std::shared_ptr& buffer) { + return std::make_shared>>(buffer->get_ptr(), + buffer->size(), + buffer); +} +OPENVINO_SUPPRESS_DEPRECATED_END OPENVINO_SUPPRESS_DEPRECATED_START ov::op::v0::Constant::Constant(const std::shared_ptr& tensor) { @@ -35,7 +45,7 @@ ov::op::v0::Constant::Constant(const std::shared_ptr& t // Share data from HostTensor if we work with it // And copy data in other cas if (auto hostTensor = std::dynamic_pointer_cast(tensor)) { - m_data = std::make_shared>>( + m_data = std::make_shared>>( static_cast(hostTensor->get_data_ptr()), tensor->get_size_in_bytes(), tensor); @@ -51,12 +61,10 @@ OPENVINO_SUPPRESS_DEPRECATED_END ov::op::v0::Constant::Constant(const ov::Tensor& tensor) { m_element_type = tensor.get_element_type(); m_shape = tensor.get_shape(); - OPENVINO_SUPPRESS_DEPRECATED_START // Share data from ov::Tensor - m_data = std::make_shared>(static_cast(tensor.data()), - tensor.get_byte_size(), - tensor); - OPENVINO_SUPPRESS_DEPRECATED_END + m_data = std::make_shared>(static_cast(tensor.data()), + tensor.get_byte_size(), + tensor); constructor_validate_and_infer_types(); } @@ -211,12 +219,10 @@ ov::op::v0::Constant::Constant(bool memset_allocation, const element::Type& type } void ov::op::v0::Constant::allocate_buffer(bool memset_allocation) { - OPENVINO_SUPPRESS_DEPRECATED_START - m_data = std::make_shared(mem_size(), host_alignment()); + m_data = std::make_shared(mem_size(), host_alignment()); if (memset_allocation) { std::memset(m_data->get_ptr(), 0, m_data->size()); } - OPENVINO_SUPPRESS_DEPRECATED_END } ov::op::v0::Constant::Constant(const element::Type& type, const ov::Shape& shape, const void* data) @@ -316,6 +322,18 @@ std::string ov::op::v0::Constant::convert_value_to_string(size_t index) const { return rc; } +size_t ov::op::v0::Constant::get_byte_size() const { + return m_data->size(); +} + +const void* ov::op::v0::Constant::get_data_ptr() const { + return (m_data ? m_data->get_ptr() : nullptr); +} + +void* ov::op::v0::Constant::get_data_ptr_nc() { + return (m_data ? m_data->get_ptr() : nullptr); +} + std::vector ov::op::v0::Constant::get_value_strings() const { std::vector rc; diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index dc1263d7991deb..c879e8780f1370 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -21,6 +21,7 @@ #include "openvino/opsets/opset1.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/reference/convert.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/util/file_util.hpp" #include "pugixml.hpp" #include "transformations/hash.hpp" @@ -532,6 +533,19 @@ class XmlSerializer : public ov::AttributeVisitor { m_xml_node.append_attribute("offset").set_value(static_cast(offset)); m_xml_node.append_attribute("size").set_value(static_cast(new_size)); } + } else if (const auto& a = ov::as_type>>(&adapter)) { + if (name == "value" && translate_type_name(m_node_type_name) == "Const") { + const int64_t size = a->get()->size(); + size_t new_size; + int64_t offset = m_constant_write_handler.write(static_cast(a->get()->get_ptr()), + size, + &new_size, + m_compress_to_fp16, + m_output_element_type); + + m_xml_node.append_attribute("offset").set_value(static_cast(offset)); + m_xml_node.append_attribute("size").set_value(static_cast(new_size)); + } } else if (const auto& a = ov::as_type>(&adapter)) { const auto& attrs = a->get(); diff --git a/src/core/src/runtime/aligned_buffer.cpp b/src/core/src/runtime/aligned_buffer.cpp index d7c5229fcc0efa..4207eefe5db9b7 100644 --- a/src/core/src/runtime/aligned_buffer.cpp +++ b/src/core/src/runtime/aligned_buffer.cpp @@ -8,6 +8,7 @@ #include #include "ngraph/util.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/util/log.hpp" NGRAPH_SUPPRESS_DEPRECATED_START @@ -64,3 +65,53 @@ AttributeAdapter>::AttributeAdap std::shared_ptr& value) : DirectValueAccessor>(value) {} } // namespace ov +NGRAPH_SUPPRESS_DEPRECATED_END + +namespace ov { +AlignedBuffer::AlignedBuffer() : m_allocated_buffer(nullptr), m_aligned_buffer(nullptr), m_byte_size(0) {} + +AlignedBuffer::AlignedBuffer(size_t byte_size, size_t alignment) : m_byte_size(byte_size) { + m_byte_size = std::max(1, byte_size); + size_t allocation_size = m_byte_size + alignment; + m_allocated_buffer = new char[allocation_size]; + m_aligned_buffer = m_allocated_buffer; + size_t mod = (alignment != 0) ? reinterpret_cast(m_aligned_buffer) % alignment : 0; + + if (mod != 0) { + m_aligned_buffer += (alignment - mod); + } +} + +AlignedBuffer::AlignedBuffer(AlignedBuffer&& other) + : m_allocated_buffer(other.m_allocated_buffer), + m_aligned_buffer(other.m_aligned_buffer), + m_byte_size(other.m_byte_size) { + other.m_allocated_buffer = nullptr; + other.m_aligned_buffer = nullptr; + other.m_byte_size = 0; +} + +AlignedBuffer::~AlignedBuffer() { + if (m_allocated_buffer != nullptr) { + delete[] m_allocated_buffer; + } +} + +AlignedBuffer& AlignedBuffer::operator=(AlignedBuffer&& other) { + if (this != &other) { + if (m_allocated_buffer != nullptr) { + delete[] m_allocated_buffer; + } + m_allocated_buffer = other.m_allocated_buffer; + m_aligned_buffer = other.m_aligned_buffer; + m_byte_size = other.m_byte_size; + other.m_allocated_buffer = nullptr; + other.m_aligned_buffer = nullptr; + other.m_byte_size = 0; + } + return *this; +} + +AttributeAdapter>::AttributeAdapter(std::shared_ptr& value) + : DirectValueAccessor>(value) {} +} // namespace ov diff --git a/src/core/tests/aligned_buffer.cpp b/src/core/tests/aligned_buffer.cpp index fae5929ba3db2a..604d153eeb5c36 100644 --- a/src/core/tests/aligned_buffer.cpp +++ b/src/core/tests/aligned_buffer.cpp @@ -2,31 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/runtime/aligned_buffer.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "gtest/gtest.h" -using namespace ngraph; +using namespace ov; -OPENVINO_SUPPRESS_DEPRECATED_START TEST(aligned_buffer, alignment) { - runtime::AlignedBuffer buffer(100, 64); + AlignedBuffer buffer(100, 64); size_t addr = reinterpret_cast(buffer.get_ptr()) % 64; EXPECT_EQ(addr, 0); } TEST(aligned_buffer, move) { { - runtime::AlignedBuffer buffer1(100, 64); - runtime::AlignedBuffer buffer2(std::move(buffer1)); + AlignedBuffer buffer1(100, 64); + AlignedBuffer buffer2(std::move(buffer1)); EXPECT_EQ(buffer1.size(), 0); EXPECT_EQ(buffer1.get_ptr(), nullptr); EXPECT_EQ(buffer2.size(), 100); EXPECT_NE(buffer2.get_ptr(), nullptr); } { - runtime::AlignedBuffer buffer1(100, 64); - runtime::AlignedBuffer buffer2; + AlignedBuffer buffer1(100, 64); + AlignedBuffer buffer2; buffer2 = std::move(buffer1); EXPECT_EQ(buffer1.size(), 0); EXPECT_EQ(buffer1.get_ptr(), nullptr); diff --git a/src/core/tests/bfloat16.cpp b/src/core/tests/bfloat16.cpp index bb3a35dc9953a3..f031d01226cf41 100644 --- a/src/core/tests/bfloat16.cpp +++ b/src/core/tests/bfloat16.cpp @@ -10,7 +10,7 @@ #include #include "common_test_utils/float_util.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/util/log.hpp" using namespace std; @@ -140,9 +140,8 @@ TEST(bfloat16, numeric_limits) { } TEST(benchmark, bfloat16) { - OPENVINO_SUPPRESS_DEPRECATED_START size_t buffer_size = 128 * 3 * 224 * 224; - ngraph::runtime::AlignedBuffer data(buffer_size * sizeof(float), 4096); + ov::AlignedBuffer data(buffer_size * sizeof(float), 4096); float* f = static_cast(data.get_ptr()); // vector data(buffer_size); std::mt19937 rng(2112); @@ -153,53 +152,36 @@ TEST(benchmark, bfloat16) { OPENVINO_INFO << "buffer size " << buffer_size << " floats or " << data.size() << " bytes"; { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 ctor " << timer.get_milliseconds() << "ms"; } { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16::truncate(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 truncate " << timer.get_milliseconds() << "ms"; } { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16::round_to_nearest(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 round to nearest " << timer.get_milliseconds() << "ms"; } { - ngraph::runtime::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); + ov::AlignedBuffer bf_data(buffer_size * sizeof(bfloat16), 4096); bfloat16* p = static_cast(bf_data.get_ptr()); - ngraph::stopwatch timer; - timer.start(); for (size_t i = 0; i < buffer_size; ++i) { p[i] = bfloat16::round_to_nearest_even(f[i]); } - timer.stop(); - OPENVINO_INFO << "float to bfloat16 round to nearest even " << timer.get_milliseconds() << "ms"; } - OPENVINO_SUPPRESS_DEPRECATED_END } TEST(bfloat16, assigns) { diff --git a/src/core/tests/constant.cpp b/src/core/tests/constant.cpp index 0feefb84bed8a5..010bb83b3e75d9 100644 --- a/src/core/tests/constant.cpp +++ b/src/core/tests/constant.cpp @@ -10,6 +10,8 @@ #include "common_test_utils/type_prop.hpp" #include "openvino/core/except.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" using namespace ov; using namespace std; @@ -1726,14 +1728,12 @@ TEST(constant, lazy_bitwise_identical) { auto shape = Shape{10, 1000, 1000}; auto type = element::i32; auto byte_size = shape_size(shape) * sizeof(int32_t); - OPENVINO_SUPPRESS_DEPRECATED_START - auto aligned_weights_buffer = std::make_shared(byte_size); + auto aligned_weights_buffer = std::make_shared(byte_size); std::memset(aligned_weights_buffer->get_ptr(), 1, byte_size); - auto weights = std::make_shared>>( - aligned_weights_buffer->get_ptr(), - aligned_weights_buffer->size(), - aligned_weights_buffer); - OPENVINO_SUPPRESS_DEPRECATED_END + auto weights = + std::make_shared>>(aligned_weights_buffer->get_ptr(), + aligned_weights_buffer->size(), + aligned_weights_buffer); using namespace std::chrono; auto create_constant = [&]() { diff --git a/src/core/tests/visitors/visitors.hpp b/src/core/tests/visitors/visitors.hpp index 838eade854181b..893d982a59b3eb 100644 --- a/src/core/tests/visitors/visitors.hpp +++ b/src/core/tests/visitors/visitors.hpp @@ -10,12 +10,13 @@ #include #include "ngraph/factory.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/core/attribute_visitor.hpp" #include "openvino/core/deprecated.hpp" #include "openvino/op/util/framework_node.hpp" #include "openvino/op/util/sub_graph_base.hpp" #include "openvino/op/util/variable.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/tensor.hpp" namespace ov { namespace test { @@ -217,10 +218,9 @@ class DeserializeAttributeVisitor : public AttributeVisitor { } void on_adapter(const std::string& name, ValueAccessor& adapter) override { - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { - auto& data = m_values.get(name); - data->read(a->get()->get_ptr(), a->get()->size()); + if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { + auto& data = m_values.get(name); + std::memcpy(a->get()->get_ptr(), data.data(), a->get()->size()); } else if (auto a = ov::as_type< ov::AttributeAdapter>>>( &adapter)) { @@ -240,7 +240,6 @@ class DeserializeAttributeVisitor : public AttributeVisitor { } else { OPENVINO_THROW("Attribute \"", name, "\" cannot be unmarshalled"); } - OPENVINO_SUPPRESS_DEPRECATED_END } // The remaining adapter methods fall back on the void adapter if not implemented void on_adapter(const std::string& name, ValueAccessor& adapter) override { @@ -309,10 +308,9 @@ class SerializeAttributeVisitor : public AttributeVisitor { } void on_adapter(const std::string& name, ValueAccessor& adapter) override { - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { - ngraph::HostTensorPtr data = std::make_shared(element::u8, Shape{a->get()->size()}); - data->write(a->get()->get_ptr(), a->get()->size()); + if (auto a = ::ov::as_type<::ov::AttributeAdapter>>(&adapter)) { + ov::Tensor data(element::u8, Shape{a->get()->size()}); + std::memcpy(data.data(), a->get()->get_ptr(), a->get()->size()); m_values.insert(name, data); } else if (auto a = ov::as_type< ov::AttributeAdapter>>>( @@ -333,7 +331,6 @@ class SerializeAttributeVisitor : public AttributeVisitor { } else { OPENVINO_THROW("Attribute \"", name, "\" cannot be marshalled"); } - OPENVINO_SUPPRESS_DEPRECATED_END } // The remaining adapter methods fall back on the void adapter if not implemented void on_adapter(const std::string& name, ValueAccessor& adapter) override { diff --git a/src/frontends/ir/src/frontend.cpp b/src/frontends/ir/src/frontend.cpp index 8b8dca4d995ffb..ba515b5560641f 100644 --- a/src/frontends/ir/src/frontend.cpp +++ b/src/frontends/ir/src/frontend.cpp @@ -9,10 +9,10 @@ #include #include "input_model.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" -#include "ngraph/runtime/shared_buffer.hpp" #include "openvino/core/any.hpp" #include "openvino/core/so_extension.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/mmap_object.hpp" #include "transformations/resolve_names_collisions.hpp" @@ -116,8 +116,7 @@ void FrontEnd::add_extension(const ov::Extension::Ptr& ext) { InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const { std::ifstream local_model_stream; std::istream* provided_model_stream = nullptr; - OPENVINO_SUPPRESS_DEPRECATED_START - std::shared_ptr weights; + std::shared_ptr weights; auto create_extensions_map = [&]() -> std::unordered_map { std::unordered_map exts; @@ -180,8 +179,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const } else if (variant.is()) { weights_path = variant.as(); #endif - } else if (variant.is>()) { - weights = variant.as>(); + } else if (variant.is>()) { + weights = variant.as>(); } } bool enable_mmap = variants[variants.size() - 1].is() ? variants[variants.size() - 1].as() : false; @@ -204,10 +203,9 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const if (!weights_path.empty()) { if (enable_mmap) { auto mapped_memory = ov::load_mmap_object(weights_path); - weights = - std::make_shared>>(mapped_memory->data(), - mapped_memory->size(), - mapped_memory); + weights = std::make_shared>>(mapped_memory->data(), + mapped_memory->size(), + mapped_memory); } else { std::ifstream bin_stream; bin_stream.open(weights_path.c_str(), std::ios::binary); @@ -222,17 +220,16 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const size_t file_size = bin_stream.tellg(); bin_stream.seekg(0, std::ios::beg); - auto aligned_weights_buffer = std::make_shared(file_size); + auto aligned_weights_buffer = std::make_shared(file_size); bin_stream.read(aligned_weights_buffer->get_ptr(), aligned_weights_buffer->size()); bin_stream.close(); - weights = std::make_shared>>( + weights = std::make_shared>>( aligned_weights_buffer->get_ptr(), aligned_weights_buffer->size(), aligned_weights_buffer); } } - OPENVINO_SUPPRESS_DEPRECATED_END return create_input_model(); } diff --git a/src/frontends/ir/src/input_model.cpp b/src/frontends/ir/src/input_model.cpp index 2f58a68c94f89b..6a32b22f786b52 100644 --- a/src/frontends/ir/src/input_model.cpp +++ b/src/frontends/ir/src/input_model.cpp @@ -18,10 +18,9 @@ #include "openvino/util/common_util.hpp" #include "utils.hpp" -OPENVINO_SUPPRESS_DEPRECATED_START namespace { void parse_pre_process(pugi::xml_node& root, - std::shared_ptr weights, + std::shared_ptr weights, std::shared_ptr model) { /* Preprocessing block can have two preprocessing types: * @@ -183,7 +182,9 @@ void parse_pre_process(pugi::xml_node& root, const char* data = weights->get_ptr() + offset; per_channel_values[item.first] = ov::op::v0::Constant::create(input_type, mean_shape, data); } + OPENVINO_SUPPRESS_DEPRECATED_START auto const_node = get_constant_from_source(std::make_shared(per_channel_values, 0)); + OPENVINO_SUPPRESS_DEPRECATED_END OPENVINO_ASSERT(const_node); const auto& consumers = input_node->output(0).get_target_inputs(); auto add = std::make_shared(input_node, const_node); @@ -193,15 +194,13 @@ void parse_pre_process(pugi::xml_node& root, } } } // namespace -OPENVINO_SUPPRESS_DEPRECATED_END namespace ov { namespace frontend { namespace ir { -OPENVINO_SUPPRESS_DEPRECATED_START class InputModel::InputModelIRImpl { - std::shared_ptr m_weights; + std::shared_ptr m_weights; std::unordered_map m_extensions; std::unordered_map m_opsets; pugi::xml_node m_root; @@ -209,7 +208,7 @@ class InputModel::InputModelIRImpl { public: InputModelIRImpl(std::istream& stream, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& extensions) : m_weights(weights), m_extensions(extensions) { @@ -227,11 +226,10 @@ class InputModel::InputModelIRImpl { }; InputModel::InputModel(std::istream& stream, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& extensions) { _impl = std::make_shared(stream, weights, extensions); } -OPENVINO_SUPPRESS_DEPRECATED_END std::shared_ptr InputModel::convert() { return _impl->convert(); diff --git a/src/frontends/ir/src/input_model.hpp b/src/frontends/ir/src/input_model.hpp index 1b4da95f098b64..d5a9b64abaf0f8 100644 --- a/src/frontends/ir/src/input_model.hpp +++ b/src/frontends/ir/src/input_model.hpp @@ -7,9 +7,9 @@ #include #include -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/frontend/manager.hpp" #include "openvino/frontend/visibility.hpp" +#include "openvino/runtime/aligned_buffer.hpp" namespace ov { namespace frontend { @@ -20,11 +20,9 @@ class InputModel : public ov::frontend::InputModel { std::shared_ptr _impl; public: - OPENVINO_SUPPRESS_DEPRECATED_START InputModel(std::istream& stream, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& extensions); - OPENVINO_SUPPRESS_DEPRECATED_END std::shared_ptr convert(); }; diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 42be66281d5d24..d245301633e4e3 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -20,6 +20,8 @@ #include "openvino/op/util/read_value_base.hpp" #include "openvino/op/util/sub_graph_base.hpp" #include "openvino/op/util/variable.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "rt_info_deserializer.hpp" #include "transformations/rt_info/attributes.hpp" #include "utils.hpp" @@ -258,7 +260,6 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< if (skip_names.count(name) && !getStrAttribute(m_node.child("data"), name, val)) return; - OPENVINO_SUPPRESS_DEPRECATED_START if (auto a = ov::as_type>(&adapter)) { static_cast(*a) = ov::element::Type(val); } else if (auto a = ov::as_type>(&adapter)) { @@ -322,7 +323,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_id}); } a->set(m_variables[variable_id]); - } else if (auto a = ov::as_type>>(&adapter)) { + } else if (auto a = ov::as_type>>(&adapter)) { std::string value; pugi::xml_node dn = m_node.child("data"); auto type = pugixml::utils::get_str_attr(m_node, "type"); @@ -331,7 +332,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< OPENVINO_THROW("No attrtibutes defined for ", type, " op!"); if (getStrAttribute(dn, name, value)) { - auto buffer = std::make_shared(value.size()); + auto buffer = std::make_shared(value.size()); auto data = static_cast(buffer->get_ptr()); value.copy(data, value.size()); a->set(buffer); @@ -356,11 +357,7 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< OPENVINO_THROW("Attribute and shape size are inconsistent for ", type, " op!"); char* data = m_weights->get_ptr() + offset; - auto buffer = - std::make_shared>>( - data, - size, - m_weights); + auto buffer = std::make_shared>>(data, size, m_weights); a->set(buffer); } } else if (auto a = ov::as_type>(&adapter)) { @@ -388,7 +385,6 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< } else { OPENVINO_THROW("Error IR reading. Attribute adapter can not be found for ", name, " parameter"); } - OPENVINO_SUPPRESS_DEPRECATED_END } void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor>& adapter) { @@ -409,10 +405,8 @@ void ov::XmlDeserializer::on_adapter(const std::string& name, ov::ValueAccessor< adapter.set(model); } -OPENVINO_SUPPRESS_DEPRECATED_START -std::shared_ptr ov::XmlDeserializer::parse_function( - const pugi::xml_node& root, - const std::shared_ptr& weights) { +std::shared_ptr ov::XmlDeserializer::parse_function(const pugi::xml_node& root, + const std::shared_ptr& weights) { // OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse"); struct FunctionNodes { @@ -553,7 +547,6 @@ std::shared_ptr ov::XmlDeserializer::parse_function( return function; } -OPENVINO_SUPPRESS_DEPRECATED_END class MetaDataParser : public ov::Meta { public: @@ -751,12 +744,10 @@ static const std::string& translate_type_name(const std::string& name) { return name; } -OPENVINO_SUPPRESS_DEPRECATED_START -std::shared_ptr ov::XmlDeserializer::create_node( - const std::vector>& inputs, - const pugi::xml_node& node, - const std::shared_ptr& weights, - const GenericLayerParams& params) { +std::shared_ptr ov::XmlDeserializer::create_node(const std::vector>& inputs, + const pugi::xml_node& node, + const std::shared_ptr& weights, + const GenericLayerParams& params) { // Check that inputs are correctly defined for (size_t i = 0; i < inputs.size(); i++) { if (!inputs[i].get_node()) @@ -959,4 +950,3 @@ std::shared_ptr ov::XmlDeserializer::create_node( return ovNode; } -OPENVINO_SUPPRESS_DEPRECATED_END diff --git a/src/frontends/ir/src/ir_deserializer.hpp b/src/frontends/ir/src/ir_deserializer.hpp index f2062393f2986a..0b0d606ea4170b 100644 --- a/src/frontends/ir/src/ir_deserializer.hpp +++ b/src/frontends/ir/src/ir_deserializer.hpp @@ -10,11 +10,11 @@ #include #include "input_model.hpp" -#include "ngraph/runtime/aligned_buffer.hpp" #include "openvino/core/attribute_visitor.hpp" #include "openvino/core/op_extension.hpp" #include "openvino/op/loop.hpp" #include "openvino/op/util/sub_graph_base.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "utils.hpp" namespace ov { @@ -58,9 +58,8 @@ struct GenericLayerParams { class XmlDeserializer : public ov::AttributeVisitor { public: - OPENVINO_SUPPRESS_DEPRECATED_START explicit XmlDeserializer(const pugi::xml_node& node, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const std::unordered_map& opsets, const std::unordered_map& extensions, std::unordered_map>& variables, @@ -71,7 +70,6 @@ class XmlDeserializer : public ov::AttributeVisitor { m_extensions(extensions), m_variables(variables), m_version(version) {} - OPENVINO_SUPPRESS_DEPRECATED_END void on_adapter(const std::string& name, ov::ValueAccessor& value) override { std::string val; @@ -164,14 +162,12 @@ class XmlDeserializer : public ov::AttributeVisitor { // TODO consider to call only once per layer/TI-Loop node IoMap updated_io_map(const pugi::xml_node& node, const pugi::xml_node& body_node); - OPENVINO_SUPPRESS_DEPRECATED_START /// \brief Traverses xml node representation in order to create ov function for it. /// \param node xml node representation /// \param weights weights attached to current node /// \return shared pointer to function representing input node std::shared_ptr parse_function(const pugi::xml_node& root, - const std::shared_ptr& weights); - OPENVINO_SUPPRESS_DEPRECATED_END + const std::shared_ptr& weights); /// \brief Traverses xml node representation in order to get the purpose attribute of /// inputs/outputs in the body of Loop op. \param node xml node representation \return struct /// with value of purpuse attribute @@ -179,12 +175,10 @@ class XmlDeserializer : public ov::AttributeVisitor { GenericLayerParams parse_generic_params(const pugi::xml_node& node); - OPENVINO_SUPPRESS_DEPRECATED_START std::shared_ptr create_node(const ov::OutputVector& inputs, const pugi::xml_node& node, - const std::shared_ptr& weights, + const std::shared_ptr& weights, const GenericLayerParams& params); - OPENVINO_SUPPRESS_DEPRECATED_END void read_meta_data(const std::shared_ptr& model, const pugi::xml_node& meta_section); @@ -194,9 +188,7 @@ class XmlDeserializer : public ov::AttributeVisitor { // -- DATA -- const pugi::xml_node m_node; - OPENVINO_SUPPRESS_DEPRECATED_START - const std::shared_ptr& m_weights; - OPENVINO_SUPPRESS_DEPRECATED_END + const std::shared_ptr& m_weights; const std::unordered_map& m_opsets; const std::unordered_map& m_extensions; std::unordered_map>& m_variables; diff --git a/src/frontends/onnx/frontend/src/core/tensor.hpp b/src/frontends/onnx/frontend/src/core/tensor.hpp index cb54edf8e95e22..76a97b057f2a61 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.hpp +++ b/src/frontends/onnx/frontend/src/core/tensor.hpp @@ -15,6 +15,7 @@ #include "ngraph/shape.hpp" #include "ngraph/type/element_type.hpp" #include "onnx_common/utils.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "utils/common.hpp" #include "utils/tensor_external_data.hpp" @@ -302,15 +303,13 @@ class Tensor { template std::vector get_external_data() const { const auto ext_data = detail::TensorExternalData(*m_tensor_proto); - OPENVINO_SUPPRESS_DEPRECATED_START - std::shared_ptr buffer = nullptr; + std::shared_ptr buffer = nullptr; if (m_mmap_cache) { buffer = ext_data.load_external_mmap_data(m_model_dir, m_mmap_cache); } else { buffer = ext_data.load_external_data(m_model_dir); } return std::vector(buffer->get_ptr(), buffer->get_ptr() + buffer->size()); - OPENVINO_SUPPRESS_DEPRECATED_END } const void* get_data_ptr() const { diff --git a/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp b/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp index 53e83e5d714101..9a40d1fc6d7595 100644 --- a/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp +++ b/src/frontends/onnx/frontend/src/utils/tensor_external_data.cpp @@ -12,7 +12,6 @@ #include "openvino/util/file_util.hpp" #include "openvino/util/log.hpp" -OPENVINO_SUPPRESS_DEPRECATED_START namespace ngraph { namespace onnx_import { namespace detail { @@ -51,13 +50,13 @@ Buffer TensorExternalData::load_external_mmap_data(const std:: if (m_data_length > mapped_memory->size() || mapped_memory->size() == 0) { throw error::invalid_external_data{*this}; } - return std::make_shared>>( + return std::make_shared>>( mapped_memory->data() + m_offset, m_data_length > 0 ? m_data_length : static_cast(file_size) - m_offset, mapped_memory); } -Buffer TensorExternalData::load_external_data(const std::string& model_dir) const { +Buffer TensorExternalData::load_external_data(const std::string& model_dir) const { auto full_path = ov::util::path_join({model_dir, m_data_location}); #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) NGRAPH_SUPPRESS_DEPRECATED_START @@ -82,14 +81,13 @@ Buffer TensorExternalData::load_external_data(co // default value of m_offset is 0 external_data_stream.seekg(m_offset, std::ios::beg); - auto read_data = std::make_shared(read_data_length); + auto read_data = std::make_shared(read_data_length); external_data_stream.read(read_data->get_ptr(), read_data_length); external_data_stream.close(); - auto buffer = std::make_shared>>( - read_data->get_ptr(), - read_data->size(), - read_data); + auto buffer = std::make_shared>>(read_data->get_ptr(), + read_data->size(), + read_data); return buffer; } diff --git a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp index a13ccd457f485c..eb04e001e7ed4c 100644 --- a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp +++ b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp @@ -6,15 +6,15 @@ #include -#include "ngraph/runtime/shared_buffer.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" namespace ngraph { namespace onnx_import { namespace detail { -OPENVINO_SUPPRESS_DEPRECATED_START template -using Buffer = std::shared_ptr>>; +using Buffer = std::shared_ptr>>; using MappedMemoryHandles = std::shared_ptr>>; /// \brief Helper class used to load tensor data from external files class TensorExternalData { @@ -28,7 +28,7 @@ class TensorExternalData { /// the invalid_external_data exception is thrown. /// /// \return External binary data loaded into the SharedBuffer - Buffer load_external_data(const std::string& model_dir) const; + Buffer load_external_data(const std::string& model_dir) const; /// \brief Map (mmap for lin, MapViewOfFile for win) external data from tensor passed to constructor /// @@ -50,7 +50,6 @@ class TensorExternalData { uint64_t m_data_length = 0; std::string m_sha1_digest{}; }; -OPENVINO_SUPPRESS_DEPRECATED_END } // namespace detail } // namespace onnx_import } // namespace ngraph diff --git a/src/frontends/tensorflow/src/op/var_handle.cpp b/src/frontends/tensorflow/src/op/var_handle.cpp index 0c86041440a8ff..501df1c504309b 100644 --- a/src/frontends/tensorflow/src/op/var_handle.cpp +++ b/src/frontends/tensorflow/src/op/var_handle.cpp @@ -9,6 +9,7 @@ #include "input_model.hpp" #include "ngraph/runtime/shared_buffer.hpp" #include "openvino/opsets/opset8.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" #include "ov_tensorflow/tensor_bundle.pb.h" @@ -44,15 +45,12 @@ static std::shared_ptr read_variable(std::shared_ptr v node, static_cast(mapped_memory->size()) >= entry.offset() + entry.size(), "[TensorFlow Frontend] Internal error: Variable entry size is out of bounds of mapped memory size."); - OPENVINO_SUPPRESS_DEPRECATED_START return std::make_shared( ov_type, shape, - std::make_shared>>( - mapped_memory->data() + entry.offset(), - entry.size(), - mapped_memory)); - OPENVINO_SUPPRESS_DEPRECATED_END + std::make_shared>>(mapped_memory->data() + entry.offset(), + entry.size(), + mapped_memory)); } else { std::vector var_data; var_data.resize(size); diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp index 7fe34b42ed7948..f5aca3586e8339 100644 --- a/src/inference/src/ie_network_reader.cpp +++ b/src/inference/src/ie_network_reader.cpp @@ -20,6 +20,7 @@ #include "ie_icnn_network.hpp" #include "ie_input_info.hpp" #include "openvino/frontend/manager.hpp" +#include "openvino/runtime/shared_buffer.hpp" #ifdef ENABLE_IR_V7_READER # include "legacy/ie_ir_version.hpp" #endif @@ -388,8 +389,8 @@ CNNNetwork details::ReadNetwork(const std::string& model, ov::AnyVector params{&modelStream}; if (weights) { char* data = weights->cbuffer().as(); - std::shared_ptr weights_buffer = - std::make_shared>(data, weights->byteSize(), weights); + std::shared_ptr weights_buffer = + std::make_shared>(data, weights->byteSize(), weights); params.emplace_back(weights_buffer); } diff --git a/src/inference/src/model_reader.cpp b/src/inference/src/model_reader.cpp index 1837d75a2d44aa..bc67f6d21b225a 100644 --- a/src/inference/src/model_reader.cpp +++ b/src/inference/src/model_reader.cpp @@ -9,6 +9,8 @@ #include "openvino/core/model.hpp" #include "openvino/core/preprocess/pre_post_process.hpp" #include "openvino/frontend/manager.hpp" +#include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/file_util.hpp" #include "transformations/utils/utils.hpp" @@ -155,10 +157,10 @@ std::shared_ptr read_model(const std::string& model, ov::AnyVector params{&modelStream}; if (weights) { - std::shared_ptr weights_buffer = - std::make_shared>(reinterpret_cast(weights.data()), - weights.get_byte_size(), - weights); + std::shared_ptr weights_buffer = + std::make_shared>(reinterpret_cast(weights.data()), + weights.get_byte_size(), + weights); params.emplace_back(weights_buffer); } diff --git a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp index 32903887e81181..b1eb1e0539e9a0 100644 --- a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp +++ b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp @@ -51,6 +51,7 @@ #include "legacy/ngraph_ops/selu_ie.hpp" #include "legacy/ngraph_ops/tile_ie.hpp" #include "legacy/ngraph_ops/topk_ie.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" #include "transformations/rt_info/primitives_priority_attribute.hpp" #include "transformations/utils/utils.hpp" @@ -475,6 +476,11 @@ void CNNLayerCreator::on_adapter(const std::string& name, ::ngraph::ValueAccesso const auto data_beg = static_cast(a->get()->get_ptr()); params[name] = std::string(data_beg, a->get()->size()); } + } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter>>(&adapter)) { + if (std::string(node->get_type_name()) != "Constant") { + const auto data_beg = static_cast(a->get()->get_ptr()); + params[name] = std::string(data_beg, a->get()->size()); + } } else if (const auto& a = ngraph::as_type>(&adapter)) { const auto& attrs = a->get(); params[name] = details::joinVec(attrs); diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp index cb90c0699a126d..22373d55292d2a 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/graph_comparator.hpp @@ -14,6 +14,7 @@ #include "openvino/op/loop.hpp" #include "openvino/op/util/framework_node.hpp" #include "openvino/op/util/sub_graph_base.hpp" +#include "openvino/runtime/aligned_buffer.hpp" class FunctionsComparator { public: @@ -945,9 +946,7 @@ class ReadAndCompareAttributes : public ov::AttributeVisitor { template void verify(const std::string& name, const AttrValue& attr_value); - OPENVINO_SUPPRESS_DEPRECATED_START - void verify_mem_buf(const std::string& name, const std::shared_ptr& buffer); - OPENVINO_SUPPRESS_DEPRECATED_END + void verify_mem_buf(const std::string& name, const std::shared_ptr& buffer); using ModelAccessor = ov::ValueAccessor>; diff --git a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp index e3c5a8b2ec1a55..053e0d6d42899a 100644 --- a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp +++ b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp @@ -895,7 +895,6 @@ void check_rt_info(const std::shared_ptr& f) { namespace attributes { namespace detail { -OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccessor& adapter) { if (auto inputs = ov::as_type>(&adapter)) { insert(name, inputs->get()); @@ -904,7 +903,7 @@ void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccess } else if (ov::is_type>(&adapter)) { // drop comparison, no more info than port indexes which will be check in // subgraph::compare_io - } else if (auto a = ov::as_type>>(&adapter)) { + } else if (auto a = ov::as_type>>(&adapter)) { const auto beg = static_cast(a->get()->get_ptr()); const auto end = beg + a->get()->size(); insert(name, storage::MemoryChunk{storage::MemoryChunk::Data(beg, end)}); @@ -923,7 +922,6 @@ void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccess adapter.get_type_info().name + "']"; } } -OPENVINO_SUPPRESS_DEPRECATED_END template void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue& attr_value) { if (should_return()) { @@ -942,9 +940,8 @@ void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue& } } -OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndCompareAttributes::verify_mem_buf(const std::string& name, - const std::shared_ptr& buffer) { + const std::shared_ptr& buffer) { if (should_return()) { return; } @@ -961,7 +958,6 @@ void ReadAndCompareAttributes::verify_mem_buf(const std::string& name, return; } } -OPENVINO_SUPPRESS_DEPRECATED_END void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAccessor& adapter) { if (should_return()) { @@ -980,7 +976,6 @@ void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAcc } } -OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueAccessor& adapter) { if (auto inputs = ov::as_type>(&adapter)) { verify(name, inputs->get()); @@ -989,7 +984,7 @@ void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueA } else if (ov::is_type>(&adapter)) { // drop comparison, no more info than port indexes which will be check in // subgraph::compare_io - } else if (auto a = ov::as_type>>(&adapter)) { + } else if (auto a = ov::as_type>>(&adapter)) { verify_mem_buf(name, a->get()); } else if (auto attrs = ov::as_type>(&adapter)) { verify(name, attrs->get()); @@ -1005,7 +1000,6 @@ void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueA adapter.get_type_info().name + "']"; } } -OPENVINO_SUPPRESS_DEPRECATED_END } // namespace detail From 5853509b3c69e72f3ca80c3939090ea5b8c18fcf Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Oct 2023 10:18:04 +0400 Subject: [PATCH 016/275] Fixed ngraph reference impl for Range operation (#20631) * Fixed ngraph reference impl for Range operation * Truncate range for integer output type * explicit static cast --- src/core/src/op/range.cpp | 33 +++++++++++++++++++++------------ src/core/tests/eval.cpp | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/core/src/op/range.cpp b/src/core/src/op/range.cpp index 6285391ae56e06..204161ce10ac7b 100644 --- a/src/core/src/op/range.cpp +++ b/src/core/src/op/range.cpp @@ -144,37 +144,46 @@ bool evaluate(const HostTensorPtr& out, const HostTensorPtr& step, int version) { using T = typename element_type_traits::value_type; - T start_val; - T stop_val; - T step_val; + double start_val; + double stop_val; + double step_val; if (version < 4) { - start_val = *start->get_data_ptr(); - stop_val = *stop->get_data_ptr(); - step_val = *step->get_data_ptr(); + start_val = static_cast(*start->get_data_ptr()); + stop_val = static_cast(*stop->get_data_ptr()); + step_val = static_cast(*step->get_data_ptr()); if (!(check_value(start_val) && check_value(stop_val) && check_value(step_val) && (step_val != static_cast(0)))) { return false; } } else { - if (!(get_casted_value(start, &start_val) && get_casted_value(stop, &stop_val) && - get_casted_value(step, &step_val))) { + if (!(get_casted_value(start, &start_val) && get_casted_value(stop, &stop_val) && + get_casted_value(step, &step_val))) { return false; } } int64_t out_size = 0; + if (ov::element::Type(ET).is_integral_number()) { + start_val = std::trunc(start_val); + stop_val = std::trunc(stop_val); + step_val = std::trunc(step_val); + } + int64_t steps = static_cast(std::ceil(double(stop_val - start_val) / step_val)); if (steps > 0) { out_size = steps; } ov::Shape out_shape = ov::Shape({static_cast(out_size)}); out->set_shape(out_shape); - ov::reference::range(&start_val, &step_val, shape_size(out_shape), out->get_data_ptr()); + + T start_val_casted = static_cast(start_val); + T step_val_casted = static_cast(step_val); + ov::reference::range(&start_val_casted, &step_val_casted, shape_size(out_shape), out->get_data_ptr()); return true; } -bool evaluate_power(const HostTensorPtr& out, +bool evaluate_range(const HostTensorPtr& out, const HostTensorPtr& start, const HostTensorPtr& stop, const HostTensorPtr& step, @@ -209,7 +218,7 @@ bool op::v4::Range::evaluate(const HostTensorVector& outputs, const HostTensorVe HostTensorPtr start = inputs[0]; HostTensorPtr stop = inputs[1]; HostTensorPtr step = inputs[2]; - return rangeop::evaluate_power(out, start, stop, step, m_output_type, 4); + return rangeop::evaluate_range(out, start, stop, step, m_output_type, 4); } bool op::v4::Range::has_evaluate() const { @@ -381,7 +390,7 @@ bool op::v0::Range::evaluate(const HostTensorVector& outputs, const HostTensorVe HostTensorPtr start = inputs[0]; HostTensorPtr stop = inputs[1]; HostTensorPtr step = inputs[2]; - return rangeop::evaluate_power(out, start, stop, step, start->get_element_type(), 0); + return rangeop::evaluate_range(out, start, stop, step, start->get_element_type(), 0); } bool op::v0::Range::has_evaluate() const { diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp index 86b3cc2ecf82ce..fabf47f0f2f248 100644 --- a/src/core/tests/eval.cpp +++ b/src/core/tests/eval.cpp @@ -178,6 +178,27 @@ TEST(eval, evaluate_dynamic_range_sum) { ASSERT_EQ(cval, seq); } +TEST(eval, evaluate_dynamic_range_fp16_out) { + auto p_start = make_shared(element::i32, PartialShape{}); + auto p_stop = make_shared(element::i32, PartialShape{}); + auto p_step = make_shared(element::i32, PartialShape{}); + auto range = make_shared(p_start, p_stop, p_step, ov::element::f16); + auto model = make_shared(OutputVector{range}, ParameterVector{p_start, p_stop, p_step}); + auto result_tensor = ov::Tensor(); + auto out_vector = ov::TensorVector{result_tensor}; + auto in_vector = ov::TensorVector{make_tensor({}, {0}), + make_tensor({}, {3087}), + make_tensor({}, {1})}; + ASSERT_TRUE(model->evaluate(out_vector, in_vector)); + result_tensor = out_vector.at(0); + EXPECT_EQ(result_tensor.get_element_type(), element::f16); + EXPECT_EQ(result_tensor.get_shape(), (Shape{3087})); + auto cval = read_vector(result_tensor); + for (size_t i = 0; i < 3087; i++) { + ASSERT_EQ(cval[i], ov::float16(i)); + } +} + TEST(eval, evaluate_broadcast_v3_bidirectional) { Shape shape_a{4, 1}; auto A = make_shared(element::f32, shape_a); From ea6922386ead2bb8832242e613bb27e3f8754d73 Mon Sep 17 00:00:00 2001 From: Andrei Gorbachev Date: Tue, 24 Oct 2023 08:38:23 +0100 Subject: [PATCH 017/275] [GPU] Refactor ConvertColorNV12, ConvolutionBackprop, Convolution (#20376) * ConvertColorNV12 * ConvolutionBackprop * Convolution * fix after review --- .../single_layer_tests/convert_color_nv12.cpp | 58 +++++++--- .../single_layer_tests/convolution.cpp | 39 +++---- .../convolution_backprop_data.cpp | 105 ++++++------------ 3 files changed, 95 insertions(+), 107 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp index 63a57786fb3b79..5f74409c75d2f9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_nv12.cpp @@ -4,12 +4,11 @@ #include -#include "single_layer_tests/convert_color_nv12.hpp" +#include "single_op_tests/convert_color_nv12.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::ConvertColorNV12LayerTest; const std::vector inShapes_nhwc = { {1, 10, 10, 1} @@ -20,27 +19,60 @@ const std::vector inTypes = { ov::element::f32 }; -INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12, +auto generate_input_static_shapes = [] (const std::vector& original_shapes, bool single_plane) { + std::vector> result_shapes; + for (const auto& original_shape : original_shapes) { + std::vector one_result_shapes; + if (single_plane) { + auto shape = original_shape; + shape[1] = shape[1] * 3 / 2; + one_result_shapes.push_back(shape); + } else { + auto shape = original_shape; + one_result_shapes.push_back(shape); + auto uvShape = ov::Shape{shape[0], shape[1] / 2, shape[2] / 2, 2}; + one_result_shapes.push_back(uvShape); + } + result_shapes.push_back(one_result_shapes); + } + return result_shapes; +}; + +auto in_shapes_single_plane_static = generate_input_static_shapes(inShapes_nhwc, true); +auto in_shapes_two_planes_static = generate_input_static_shapes(inShapes_nhwc, false); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12SinglePlane, ConvertColorNV12LayerTest, - ::testing::Combine(::testing::ValuesIn(inShapes_nhwc), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_single_plane_static)), ::testing::ValuesIn(inTypes), ::testing::Bool(), + ::testing::Values(true), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + ConvertColorNV12LayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12TwoPlane, + ConvertColorNV12LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_two_planes_static)), + ::testing::ValuesIn(inTypes), ::testing::Bool(), + ::testing::Values(false), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvertColorNV12LayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12_acc, - ConvertColorNV12AccuracyTest, - ::testing::Combine(::testing::Values(ov::Shape{1, 16 * 6, 16, 1}), +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorNV12SinglePlane_acc, + ConvertColorNV12LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + generate_input_static_shapes({{1, 16 * 6, 16, 1}}, true))), ::testing::Values(ov::element::u8), - ::testing::Bool(), - ::testing::Bool(), + ::testing::Values(false), + ::testing::Values(true), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvertColorNV12LayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorNV12_acc, - ConvertColorNV12AccuracyTest, - ::testing::Combine(::testing::Values(ov::Shape{1, 256 * 256, 256, 1}), +INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorNV12SinglePlane_acc, + ConvertColorNV12LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + generate_input_static_shapes({{1, 256 * 256, 256, 1}}, true))), ::testing::Values(ov::element::u8), ::testing::Values(false), ::testing::Values(true), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp index 13293d3f6dc42a..2f00b4e38e7090 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp @@ -4,15 +4,14 @@ #include -#include "single_layer_tests/convolution.hpp" +#include "single_op_tests/convolution.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +using ov::test::ConvolutionLayerTest; +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; /* ============= 2D Convolution ============= */ @@ -27,9 +26,9 @@ const std::vector> padEnds = {{0, 0}, const std::vector> dilations = {{1, 1}, {3, 1}}; const std::vector numOutChannels = {1, 5}; -const std::vector padTypes = { - ngraph::op::PadType::EXPLICIT, - ngraph::op::PadType::VALID +const std::vector padTypes = { + ov::op::PadType::EXPLICIT, + ov::op::PadType::VALID }; const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(kernels), @@ -38,7 +37,7 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds), ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(kernels), @@ -54,11 +53,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_ExplicitPadding, ConvolutionLayerTe ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionLayerTest::getTestCaseName); @@ -66,11 +61,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_AutoPadValid, ConvolutionLayerTest, ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionLayerTest::getTestCaseName); /* ============= 3D Convolution ============= */ @@ -94,18 +85,14 @@ const auto conv3DParams = ::testing::Combine( ::testing::ValuesIn(paddings3d), ::testing::ValuesIn(dilations3d), ::testing::ValuesIn(numOutChannels3d), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_Convolution3D_Basic1, ConvolutionLayerTest, ::testing::Combine( conv3DParams, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 10, 10, 10})), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 10, 10, 10}})), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index f80df8897d6e84..1488f7cbf6358b 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -4,32 +4,32 @@ #include -#include "single_layer_tests/convolution_backprop_data.hpp" +#include "single_op_tests/convolution_backprop_data.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::ConvolutionBackpropDataLayerTest; +using ov::test::convBackpropDataLayerTestParamsSet; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; const std::vector numOutChannels = {1, 5, 16}; -const std::vector> emptyOutputShape = {{}}; +const std::vector emptyOutputShape = {{}}; const std::vector> emptyOutputPadding = {{}}; /* ============= 2D ConvolutionBackpropData ============= */ -const std::vector netPrecisions2D = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions2D = { + ov::element::f32, + ov::element::f16 }; -const std::vector> inputShapes2D = {{1, 3, 30, 30}, - {1, 16, 10, 10}, - {1, 32, 10, 10}}; -const std::vector> kernels2D = {{1, 1}, {3, 3}, {3, 5}}; +const std::vector> inputShapes2D = {{{1, 3, 30, 30}}, + {{1, 16, 10, 10}}, + {{1, 32, 10, 10}}}; +const std::vector> kernels2D = {/*{1, 1},*/ {3, 3}, {3, 5}}; const std::vector> strides2D = {{1, 3}}; const std::vector> padBegins2D = {{0, 0}}; const std::vector> padEnds2D = {{0, 0}, {1, 1}}; @@ -42,7 +42,7 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) ); const auto conv2DParams_AutoPadValid = ::testing::Combine( @@ -52,7 +52,7 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::Values(std::vector({0, 0})), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(emptyOutputPadding) ); @@ -60,11 +60,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, Convol ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions2D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -73,11 +69,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, Convoluti ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions2D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -92,7 +84,7 @@ const auto conv2DParams_ExplicitPadding_output_padding = ::testing::Combine( ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(outputPadding2D) ); const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( @@ -102,7 +94,7 @@ const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::Values(std::vector({0, 0})), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(outputPadding2D) ); @@ -110,11 +102,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputP ::testing::Combine( conv2DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -123,27 +111,24 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddi ::testing::Combine( conv2DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); /* ============= 3D ConvolutionBackpropData ============= */ -const std::vector netPrecisions3D = { - InferenceEngine::Precision::FP32, + +const std::vector netPrecisions3D = { + ov::element::f32, }; -const std::vector> inputShapes3D = {{1, 3, 10, 10, 10}, - {1, 16, 5, 5, 5}, - {1, 32, 5, 5, 5}}; -const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; -const std::vector> strides3D = {{1, 1, 1}}; +const std::vector> inputShapes3D = {{{1, 3, 10, 10, 10}}, + {{1, 16, 5, 5, 5}}, + {{1, 32, 5, 5, 5}}}; +const std::vector> kernels3D = {/*{1, 1, 1}, */{3, 3, 3}}; +const std::vector> strides3D = {{1, 1, 1}}; const std::vector> padBegins3D = {{0, 0, 0}}; const std::vector> padEnds3D = {{0, 0, 0}, {1, 1, 1}}; -const std::vector> dilations3D = {{1, 1, 1}}; +const std::vector> dilations3D = {{1, 1, 1}}; const auto conv3DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(kernels3D), @@ -170,11 +155,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, Convol ::testing::Combine( conv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions3D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -183,11 +164,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, Convoluti ::testing::Combine( conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions3D), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -202,7 +179,7 @@ const auto conv3DParams_ExplicitPadding_output_padding = ::testing::Combine( ::testing::ValuesIn(padEnds3D), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(outputPadding3D) ); const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( @@ -212,7 +189,7 @@ const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::Values(std::vector({0, 0, 0})), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(outputPadding3D) ); @@ -220,11 +197,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputP ::testing::Combine( conv3DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); @@ -233,11 +206,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddi ::testing::Combine( conv3DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); From 2668f68816be17fb9a2dd1a353813003b2ba9f70 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Tue, 24 Oct 2023 10:21:06 +0200 Subject: [PATCH 018/275] CompressQuantizeWeights - fix zero point calculation (#20541) Current implementation tries to leverage branchless approach, but it's not correct if scale is 0. In that case - zero point can can become inf or nan and multiplication by 0 doesn't change its value. That causes another issue - infinite or NaN zero point cannot be optimized out later. Ticket: CVS-122931 Co-authored-by: Ivan Tikhonov --- .../src/compress_quantize_weigths.cpp | 2 +- .../tests/utils/compress_quantize_weights.cpp | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/common/offline_transformations/src/compress_quantize_weigths.cpp b/src/common/offline_transformations/src/compress_quantize_weigths.cpp index 6c9e4554782a96..2b0687c86cde53 100644 --- a/src/common/offline_transformations/src/compress_quantize_weigths.cpp +++ b/src/common/offline_transformations/src/compress_quantize_weigths.cpp @@ -357,7 +357,7 @@ static void compute_scale_and_zero_point_internal(const std::shared_ptr::epsilon(); *zero_point++ = zero_point_value; diff --git a/src/common/transformations/tests/utils/compress_quantize_weights.cpp b/src/common/transformations/tests/utils/compress_quantize_weights.cpp index cc31017368863f..15d07188f805f2 100644 --- a/src/common/transformations/tests/utils/compress_quantize_weights.cpp +++ b/src/common/transformations/tests/utils/compress_quantize_weights.cpp @@ -232,6 +232,30 @@ TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminated) { comparator.enable(FunctionsComparator::CmpValues::ACCURACY); } +TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminatedZeroScale) { + { + auto data = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.144816, 0.0858578, 0.110928}); + auto input_low = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.402659, -0.383148, -0.34054}); + auto input_high = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.399513, 0.380155, 0.33788}); + auto output_low = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {-0.402659, 0.0, -0.34054}); + auto output_high = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.399513, 0.0, 0.33788}); + auto fq = std::make_shared(data, input_low, input_high, output_low, output_high, 256); + model = std::make_shared(NodeVector{fq}, ParameterVector{}); + + manager.register_pass(); + } + + { + auto data = opset8::Constant::create(element::i8, Shape{3, 1, 1, 1}, {-46, 29, 42}); + auto convert = std::make_shared(data, element::f32); + auto scale = opset8::Constant::create(element::f32, Shape{3, 1, 1, 1}, {0.00314577, 0.0, 0.00266047}); + auto mul = std::make_shared(convert, scale); + model_ref = std::make_shared(NodeVector{mul}, ParameterVector{}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ACCURACY); +} + TEST_F(TransformationTestsF, CompressQuantizeWeightsWithZeroPointEliminatedFP16) { { auto data = opset8::Constant::create(element::f16, Shape{3, 1, 1, 1}, {0.2, 1.2, 1.2}); From 1daa4b9e5e12eb3e8d5f9d7346962a69ac859011 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Tue, 24 Oct 2023 13:09:37 +0400 Subject: [PATCH 019/275] Skip `smoke_TestsROIPooling` f16 test on ARM (#20552) --- .../functional/shared_tests_instances/skip_tests_config.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 4eb40365fa95d7..e942043dd3fbf2 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -230,6 +230,8 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); + // Issue: 123064 + retVector.emplace_back(R"(smoke_TestsROIPooling_.*/ROIPoolingLayerTest.*modelType=f16.*)"); #endif #endif From 750f62fd0423740ad437b119a88a384db1222a70 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 24 Oct 2023 12:53:54 +0200 Subject: [PATCH 020/275] [shape_infer]Add preserve partial values on inputs for Mod operator (#20169) * Preserve partial values on mod inputs - static values full range of integers - intervals only if not negatives * Fix bounds evaluate when inputs are scalars --- src/core/dev_api/validation_util.hpp | 21 +- src/core/include/openvino/op/mod.hpp | 2 + .../include/openvino/reference/mod.hpp | 69 +++++- src/core/src/op/mod.cpp | 208 +++++++++++++++++- src/core/src/validation_util.cpp | 70 +++--- src/core/tests/type_prop/mod.cpp | 164 ++++++++++++++ 6 files changed, 497 insertions(+), 37 deletions(-) diff --git a/src/core/dev_api/validation_util.hpp b/src/core/dev_api/validation_util.hpp index e93fefd1411eb9..2495fd1029959a 100644 --- a/src/core/dev_api/validation_util.hpp +++ b/src/core/dev_api/validation_util.hpp @@ -34,7 +34,7 @@ OPENVINO_API bool are_unique(const std::vector& data); /// /// \param value Value to be clipped. /// \param min Minimum value bound. -/// \param max Maximum value boiund +/// \param max Maximum value bound. /// /// \return Value if between min, max otherwise min or max. OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_t& max); @@ -43,18 +43,21 @@ OPENVINO_API int64_t clip(const int64_t& value, const int64_t& min, const int64_ /// /// \param subgraph sink /// -/// \return Constant node or nullptr if unable to constantfold the subgraph +/// \return Constant node or nullptr if unable to constant fold the subgraph OPENVINO_API std::shared_ptr constantfold_subgraph(const Output& subgraph_sink); -/** - * @brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and - * they are the same returns Constant operation from the resulting bound, otherwise nullptr. - * - * @param source Node output used to get its tensor data as constant. - * @return Shared pointer to constant data or nullptr. - */ +/// \brief Runs an estimation of source tensor. If it succeeded to calculate both bounds and +/// they are the same returns Constant operation from the resulting bound, otherwise nullptr. +/// +/// \param source Node output used to get its tensor data as constant. +/// \return Shared pointer to constant data or nullptr. OPENVINO_API std::shared_ptr get_constant_from_source(const Output& source); +/// \brief Make scalar tensor which stores maximum value of ov::element::Type. +/// \param et Element type to get its maximum. +/// \return Tensor with maximum value. +Tensor make_tensor_of_max_value(const element::Type_t et); + /// \brief Apply auto padding to padding_above and padding_below inputs /// if all needed informations are known. /// diff --git a/src/core/include/openvino/op/mod.hpp b/src/core/include/openvino/op/mod.hpp index 5e58a2ec03d733..defb1c65163898 100644 --- a/src/core/include/openvino/op/mod.hpp +++ b/src/core/include/openvino/op/mod.hpp @@ -29,6 +29,8 @@ class OPENVINO_API Mod : public util::BinaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override; + bool evaluate_lower(TensorVector& outputs) const override; + bool evaluate_upper(TensorVector& outputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/mod.hpp b/src/core/reference/include/openvino/reference/mod.hpp index 81ae69e32ebfb2..671ee012393641 100644 --- a/src/core/reference/include/openvino/reference/mod.hpp +++ b/src/core/reference/include/openvino/reference/mod.hpp @@ -6,6 +6,7 @@ #include #include +#include #include "openvino/reference/autobroadcast_binop.hpp" #include "openvino/reference/utils/type_util.hpp" @@ -22,6 +23,72 @@ template ()>::type* = T mod(const T x, const T y) { return x - (std::trunc(x / y) * y); } + +/** + * @brief Estimates division remainder `[v1, v2] % m = [r0, r1]` as interval. + * + * Assumes that ` 0 <= v1 <= v2 and m != 0`, in other cases result is undefined behaviour. + * The result interval estimate minimum and maximum but is not true that value can be any value between min and max. + * e.g. + * - [4,6] % 5 = [0, 4], but in fact accurate result is set of [0,1,4] + + * @param v1 Minimum of value interval. + * @param v2 Maximum of value interval. + * @param m Modulo divisor. + * @return Remainder of division as interval range. + */ +template ::value>::type* = nullptr> +std::pair mod_interval_value(const T v1, const T v2, const T m) { + const auto v_diff = v2 - v1; + auto r = std::make_pair(func::mod(v1, m), func::mod(v2, m)); + + if ((r.second < r.first) || ((v_diff != T{0}) && (v_diff >= m))) { + r.first = T{0}; + r.second = m - T{1}; + } + return r; +} + +/** + * @brief Estimates division reminder of `[v1, v2] & [m1, m2] = [r0, r1]` as interval. + * + * * Assumes that ` 0 <= v1 <= v2 and 0 < m1 <= m2`, in other cases result is undefined behaviour. + * + * @param v1 Minimum of value interval. + * @param v2 Maximum of value interval. + * @param m1 Minimum of modulo divisor. + * @param m2 Maximum of modulo divisor. + * @return Remainder of division as interval range. + */ +template ::value>::type* = nullptr> +std::pair mod_interval(const T v1, const T v2, const T m1, const T m2) { + auto r = mod_interval_value(v1, v2, m1); + if (v2 != 0) { + if (m1 != m2) { + const auto v_diff = v2 - v1; + const auto m_diff = m2 - m1; + + auto r2 = mod_interval_value(v1, v2, m2); + r.first = std::min(r.first, r2.first); + r.second = std::max(r.second, r2.second); + + if (v_diff == T{0} && m_diff != T{1}) { + const T v2_half = v2 / T{2}; + if ((m1 < v2_half) || ((m1 < v2) && (v2 < m2))) { + r.first = T{0}; + + if ((v2_half < m2) && (m2 < v2)) { + const T v2_half_next = v2_half + T{1}; + r.second = func::mod(v2, v2_half_next); + } else { + r.second = m2 - T{1}; + } + } + } + } + } + return r; +} } // namespace func /** @@ -42,7 +109,7 @@ void mod(InputIt arg0, const Shape& arg_shape1, const op::AutoBroadcastSpec& broadcast_spec) { using T = typename std::iterator_traits::value_type; - autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, &func::mod); + autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, func::mod); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/mod.cpp b/src/core/src/op/mod.cpp index e8aa1a8a009cc2..816d605a292657 100644 --- a/src/core/src/op/mod.cpp +++ b/src/core/src/op/mod.cpp @@ -4,13 +4,30 @@ #include "openvino/op/mod.hpp" +#include "bound_evaluate.hpp" #include "element_visitor.hpp" #include "itt.hpp" -#include "openvino/core/shape_util.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/select.hpp" #include "openvino/reference/mod.hpp" #include "utils.hpp" +#include "validation_util.hpp" namespace ov { +namespace util { +namespace { +Tensor make_tensor_of_value(const element::Type_t et, const int64_t value) { + auto c = op::v0::Constant(et, Shape{}, value); + auto t = Tensor(et, Shape{}); + std::memcpy(t.data(), c.get_data_ptr(), t.get_byte_size()); + return t; +} +} // namespace +} // namespace util + namespace op { namespace mod { struct Evaluate : ov::element::NoAction { @@ -31,6 +48,185 @@ struct Evaluate : ov::element::NoAction { return true; } }; + +struct EvaluateBound : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& v_lb, + const Tensor& v_ub, + const Tensor& m_lb, + const Tensor& m_ub, + Tensor& out, + const bool is_lower) { + auto v_lb_first = v_lb.data(); + auto v_lb_last = std::next(v_lb_first, v_lb.get_size()); + auto v_ub_first = v_ub.data(); + auto m_lb_first = m_lb.data(); + auto m_ub_first = m_ub.data(); + auto out_first = out.data(); + + if (is_lower) { + while (v_lb_first != v_lb_last) { + *out_first++ = + reference::func::mod_interval(*v_lb_first++, *v_ub_first++, *m_lb_first++, *m_ub_first++).first; + } + } else { + while (v_lb_first != v_lb_last) { + *out_first++ = + reference::func::mod_interval(*v_lb_first++, *v_ub_first++, *m_lb_first++, *m_ub_first++).second; + } + } + return true; + } +}; + +namespace { + +/** + * @brief Get node inputs bounds as TensorVector. + * + * The inputs bounds are stored as [lower0, upper0, lower1, upper1]. + * + * @param op Pointer to the node. + * @return Vector with inputs bounds tensors. + */ +TensorVector get_bounds(const Node* const op) { + auto&& v_bounds = ov::evaluate_both_bounds(op->input_value(0)); + auto&& m_bounds = ov::evaluate_both_bounds(op->input_value(1)); + return {std::move(v_bounds.first), + std::move(v_bounds.second), + std::move(m_bounds.first), + std::move(m_bounds.second)}; +} + +/** + * @brief Check if all bounds in vector are valid (allocated). + * + * @param bounds TensorVector of bounds for check. + * @return True if bounds area valid otherwise false. + */ +bool are_bounds_valid(const TensorVector& bounds) { + return std::all_of(bounds.begin(), bounds.end(), [](const Tensor& t) { + return static_cast(t); + }); +} + +/** + * @brief Evaluate binary mask of values which cannot be calculated by modulo. + * + * @param bounds Modulo inputs bounds. + * @return Tensor with binary mask or empty tensor if evaluate failed. + */ +Tensor evaluate_undefined_result_mask(const TensorVector& bounds) { + const auto eq_op = v1::Equal(); + const auto or_op = v1::LogicalOr(); + + const auto& in_et = bounds.front().get_element_type(); + + auto zero_t = ov::util::make_tensor_of_value(in_et, 0); + auto max_t = ov::util::make_tensor_of_max_value(in_et); + + const auto& v_ub = bounds[1]; + const auto& m_lb = bounds[2]; + const auto& m_ub = bounds[3]; + + auto m_mask = TensorVector{{element::boolean, m_ub.get_shape()}}; + if (!eq_op.evaluate(m_mask, {m_lb, zero_t})) { + return {}; + } + + auto out_masks = TensorVector{{element::boolean, m_lb.get_shape()}}; + if (!eq_op.evaluate(out_masks, {m_ub, zero_t})) { + return {}; + } + + auto m_or_inputs = TensorVector{out_masks[0], m_mask[0]}; + or_op.evaluate(m_mask, m_or_inputs); + if (!eq_op.evaluate(out_masks, {m_lb, max_t})) { + return {}; + } + + or_op.evaluate(m_mask, m_or_inputs); + auto v_mask = TensorVector{{element::boolean, v_ub.get_shape()}}; + if (!eq_op.evaluate(v_mask, {v_ub, max_t})) { + return {}; + } + + out_masks[0].set_shape(ov::op::infer_broadcast_shape(&or_op, v_mask[0].get_shape(), m_mask[0].get_shape())); + return or_op.evaluate(out_masks, {v_mask[0], m_mask[0]}) ? out_masks[0] : Tensor{}; +} + +/** + * @brief Get the inputs bound with valid values only. + * + * The values which result modulo to give undefined result are replaced by one. + * The auto broadcast is applied to have inputs same shape. + * + * @param bounds Modulo operator inputs bounds. + * @param mask Mask with undefined result values. + * @return Vector of bounds tensors. + */ +TensorVector get_bounds_with_valid_values(const TensorVector& bounds, const Tensor& mask) { + const auto select_op = v1::Select(); + const auto one_t = ov::util::make_tensor_of_value(bounds.front().get_element_type(), 1); + + auto m_bounds = TensorVector(); + m_bounds.reserve(bounds.size()); + std::transform(bounds.cbegin(), bounds.cend(), std::back_inserter(m_bounds), [&](const Tensor& b) { + auto tmp = TensorVector{{b.get_element_type(), mask.get_shape()}}; + return select_op.evaluate(tmp, {mask, one_t, b}) ? tmp.front() : Tensor{}; + }); + return m_bounds; +} + +/** + * @brief Evaluate modulo upper or lower bound. + * + * @param op Pointer to modulo node. + * @param outputs Tensor vector with one tensor to store bounds result. + * @param is_lower True to evaluate lower otherwise evaluate upper. + * @return True if outputs has valid data otherwise false. + */ +bool evaluate_bound(const Node* const op, TensorVector& outputs, bool is_lower) { + const auto bounds = mod::get_bounds(op); + + if (mod::are_bounds_valid(bounds)) { + const auto& in_et = bounds[0].get_element_type(); + + const auto undefined_result_mask = mod::evaluate_undefined_result_mask(bounds); + if (!undefined_result_mask) { + return false; + } + + // Set inputs values to 1 for undefined results mask (0, inf, etc.) + const auto m_bounds = mod::get_bounds_with_valid_values(bounds, undefined_result_mask); + if (!mod::are_bounds_valid(m_bounds)) { + return false; + } + + // Evaluate bound. + outputs[0].set_shape(undefined_result_mask.get_shape()); + using namespace ov::element; + if (!IfTypeOf::apply(in_et, + m_bounds[0], + m_bounds[1], + m_bounds[2], + m_bounds[3], + outputs[0], + is_lower)) { + return false; + } + // Set undefined bound value for results which cannot be calculated. + const auto select_op = v1::Select(); + const auto undefined_bound = + is_lower ? ov::util::make_tensor_of_value(in_et, 0) : ov::util::make_tensor_of_max_value(in_et); + return select_op.evaluate(outputs, {undefined_result_mask, undefined_bound, outputs.front()}); + } else { + return false; + } +} +} // namespace } // namespace mod namespace v1 { @@ -59,6 +255,16 @@ bool Mod::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) co get_autob()); } +bool Mod::evaluate_lower(TensorVector& outputs) const { + OV_OP_SCOPE(v1_Mod_evaluate_lower); + return mod::evaluate_bound(this, outputs, true); +} + +bool Mod::evaluate_upper(TensorVector& outputs) const { + OV_OP_SCOPE(v1_Mod_evaluate_upper); + return mod::evaluate_bound(this, outputs, false); +} + bool Mod::has_evaluate() const { OV_OP_SCOPE(v1_Mod_has_evaluate); diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp index 803364b289008d..4a7bd1958f1c53 100644 --- a/src/core/src/validation_util.cpp +++ b/src/core/src/validation_util.cpp @@ -910,32 +910,8 @@ void evaluate_nodes(std::map& value_map, } std::shared_ptr get_constant_max_of_type(element::Type_t t) { -#define OPENVINO_TYPE_TO_MAX_CONST(t) \ - case t: \ - return ov::op::v0::Constant::create( \ - t, \ - {}, \ - {std::numeric_limits::value_type>::max()}); \ - break - - switch (t) { - OPENVINO_TYPE_TO_MAX_CONST(element::boolean); - OPENVINO_TYPE_TO_MAX_CONST(element::bf16); - OPENVINO_TYPE_TO_MAX_CONST(element::f16); - OPENVINO_TYPE_TO_MAX_CONST(element::f32); - OPENVINO_TYPE_TO_MAX_CONST(element::f64); - OPENVINO_TYPE_TO_MAX_CONST(element::i8); - OPENVINO_TYPE_TO_MAX_CONST(element::i16); - OPENVINO_TYPE_TO_MAX_CONST(element::i32); - OPENVINO_TYPE_TO_MAX_CONST(element::i64); - OPENVINO_TYPE_TO_MAX_CONST(element::u1); - OPENVINO_TYPE_TO_MAX_CONST(element::u8); - OPENVINO_TYPE_TO_MAX_CONST(element::u16); - OPENVINO_TYPE_TO_MAX_CONST(element::u32); - OPENVINO_TYPE_TO_MAX_CONST(element::u64); - default: - return nullptr; - } + auto tensor = ov::util::make_tensor_of_max_value(t); + return tensor ? std::make_shared(tensor) : nullptr; } std::shared_ptr get_constant_min_of_type(element::Type_t t) { @@ -1385,6 +1361,48 @@ std::shared_ptr get_constant_from_source(const Output& source) { } } +template +Tensor make_tensor_of_max_value(const element::Type_t et) { + Tensor t{et, Shape{}}; + *t.data() = std::numeric_limits::max(); + return t; +} + +Tensor make_tensor_of_max_value(const element::Type_t et) { + switch (et) { + case element::boolean: + return make_tensor_of_max_value>(et); + case element::bf16: + return make_tensor_of_max_value>(et); + case element::f16: + return make_tensor_of_max_value>(et); + case element::f32: + return make_tensor_of_max_value>(et); + case element::f64: + return make_tensor_of_max_value>(et); + case element::i8: + return make_tensor_of_max_value>(et); + case element::i16: + return make_tensor_of_max_value>(et); + case element::i32: + return make_tensor_of_max_value>(et); + case element::i64: + return make_tensor_of_max_value>(et); + case element::u1: + return make_tensor_of_max_value>(et); + case element::u8: + return make_tensor_of_max_value>(et); + case element::u16: + return make_tensor_of_max_value>(et); + case element::u32: + return make_tensor_of_max_value>(et); + case element::u64: + return make_tensor_of_max_value>(et); + default: + return {}; + } +} + std::vector get_tensors_partial_shapes(const TensorVector& tensors) { std::vector shapes; shapes.reserve(tensors.size()); diff --git a/src/core/tests/type_prop/mod.cpp b/src/core/tests/type_prop/mod.cpp index b1dbab11eea61f..0e5af52401b412 100644 --- a/src/core/tests/type_prop/mod.cpp +++ b/src/core/tests/type_prop/mod.cpp @@ -5,7 +5,171 @@ #include "openvino/op/mod.hpp" #include "arithmetic_ops.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" using Type = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_mod, ArithmeticOperator, Type); + +using ov::op::v0::Constant; +using ov::op::v0::Parameter; +using ov::op::v0::Squeeze; +using ov::op::v3::Broadcast; +using ov::op::v3::ShapeOf; + +class TypePropModV1Test : public TypePropOpTest {}; + +TEST_F(TypePropModV1Test, preserve_constant_data_on_inputs) { + const auto a = Constant::create(ov::element::i32, ov::Shape{4}, {4, 10, 22, 5}); + const auto b = Constant::create(ov::element::i32, ov::Shape{4}, {3, 4, 8, 3}); + const auto op = make_op(a, b); + + const auto param = std::make_shared(ov::element::i32, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({1, 2, 6, 2})); +} + +TEST_F(TypePropModV1Test, preserve_partial_values_on_inputs) { + const auto a = std::make_shared(ov::element::i64, ov::PartialShape{{5, 6}, 22, {3, 7}, -1, {7, 9}}); + const auto b = std::make_shared(ov::element::i64, ov::PartialShape{3, {12, 18}, {4, 6}, -1, {0, 4}}); + const auto op = make_op(std::make_shared(a), std::make_shared(b)); + + const auto param = std::make_shared(ov::element::i64, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({{0, 2}, {4, 10}, {0, 5}, -1, -1})); +} + +TEST_F(TypePropModV1Test, preserve_partial_values_when_m_is_interval_scalar) { + const auto a = std::make_shared(ov::element::i64, ov::PartialShape{{5, 6}, 22, {3, 7}, -1, {7, 9}}); + const auto b = std::make_shared(ov::element::i64, ov::PartialShape{{12, 18}}); + const auto b_scalar = std::make_shared(std::make_shared(b)); + const auto op = make_op(std::make_shared(a), b_scalar); + + const auto param = std::make_shared(ov::element::i64, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({{5, 6}, {4, 10}, {3, 7}, -1, {7, 9}})); +} + +TEST_F(TypePropModV1Test, preserve_partial_values_when_value_is_interval_scalar) { + const auto a = std::make_shared(ov::element::i64, ov::PartialShape{{3, 7}}); + const auto b = std::make_shared(ov::element::i64, ov::PartialShape{3, {12, 18}, {4, 6}, -1, {0, 4}}); + const auto a_scalar = std::make_shared(std::make_shared(a)); + const auto op = make_op(a_scalar, std::make_shared(b)); + + const auto param = std::make_shared(ov::element::i64, ov::Shape{1}); + auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + + const auto& output_shape = bc->get_output_partial_shape(0); + EXPECT_EQ(output_shape, ov::PartialShape({{0, 2}, {3, 7}, {0, 5}, -1, -1})); +} + +// test params as {a, b, exp_result} +using IntervalModuloParams = std::tuple; + +class SingleDimModV1Test : public TypePropModV1Test, public testing::WithParamInterface { +protected: + void SetUp() override { + std::tie(a_dim, b_dim, exp_dim) = GetParam(); + } + + ov::Dimension a_dim, b_dim, exp_dim; +}; + +const auto v_and_m_static = testing::Values(IntervalModuloParams{{0, 0}, {1, 1}, {0, 0}}, + IntervalModuloParams{{0, 0}, {9, 9}, {0, 0}}, + IntervalModuloParams{{0, 0}, {1000, 1000}, {0, 0}}, + IntervalModuloParams{{10, 10}, {3, 3}, {1, 1}}, + IntervalModuloParams{{10, 10}, {6, 6}, {4, 4}}, + IntervalModuloParams{{10, 10}, {5, 5}, {0, 0}}, + IntervalModuloParams{{10, 10}, {15, 15}, {10, 10}}); + +const auto v_interval_m_static = testing::Values(IntervalModuloParams{{6, 7}, {4, 4}, {2, 3}}, + IntervalModuloParams{{6, 8}, {4, 4}, {0, 3}}, // Result [0,2,3] + IntervalModuloParams{{6, 8}, {10, 10}, {6, 8}}, + IntervalModuloParams{{6, 8}, {7, 7}, {0, 6}}, + IntervalModuloParams{{4, 8}, {7, 7}, {0, 6}}, + IntervalModuloParams{{15, 16}, {7, 7}, {1, 2}}, + IntervalModuloParams{{5, 20}, {5, 5}, {0, 4}}, + + IntervalModuloParams{{5, 10}, {7, 7}, {0, 6}}); + +const auto v_static_m_interval = testing::Values(IntervalModuloParams{{0, 0}, {3, 13}, {0, 0}}, + IntervalModuloParams{{10, 10}, {2, 4}, {0, 3}}, + IntervalModuloParams{{10, 10}, {2, 6}, {0, 4}}, + IntervalModuloParams{{10, 10}, {6, 9}, {1, 4}}, + IntervalModuloParams{{10, 10}, {9, 11}, {0, 10}}, + IntervalModuloParams{{10, 10}, {3, 11}, {0, 10}}, + IntervalModuloParams{{10, 10}, {3, 10}, {0, 9}}, + IntervalModuloParams{{10, 10}, {7, 8}, {2, 3}}, + IntervalModuloParams{{100, 100}, {2, 20}, {0, 19}}, + // can be estimated accurate as only two results are possible + IntervalModuloParams{{100, 100}, {15, 16}, {4, 10}}, + // can not be estimated accurate as there are three results [10,4,15] + // Requires to calculate all possibilities and pick min, max + IntervalModuloParams{{100, 100}, {15, 17}, {0, 16}}); + +const auto v_and_m_intervals = testing::Values(IntervalModuloParams{{1, 10}, {2, 9}, {0, 8}}, + IntervalModuloParams{{1, 10}, {6, 9}, {0, 8}}, + IntervalModuloParams{{1, 10}, {2, 12}, {0, 10}}, + IntervalModuloParams{{1, 10}, {6, 12}, {0, 10}}, + IntervalModuloParams{{1, 10}, {11, 12}, {1, 10}}, + IntervalModuloParams{{1, 10}, {11, 15}, {1, 10}}, + IntervalModuloParams{{4, 10}, {10, 13}, {0, 10}}, + IntervalModuloParams{{10, 20}, {3, 5}, {0, 4}}, + IntervalModuloParams{{10, 10}, {3, 10}, {0, 9}}, + IntervalModuloParams{{5, 20}, {5, 10}, {0, 9}}, + IntervalModuloParams{{10, 100}, {3, 20}, {0, 19}}, + IntervalModuloParams{{10, 100}, {2, 20}, {0, 19}}, + IntervalModuloParams{{10, 100}, {51, 60}, {0, 59}}); + +// If input is infinite or m has 0 then output is undefined. +const auto v_and_m_special_values = testing::Values(IntervalModuloParams{{0, -1}, {5, 5}, {0, -1}}, + IntervalModuloParams{{10, -1}, {4, 4}, {0, -1}}, + // Evaluate low/up return [0, max] + // but evaluate both bounds return [0] as `m` has same bounds + IntervalModuloParams{{11, 11}, {0, 0}, {0, 0}}, + IntervalModuloParams{{11, 11}, {0, 5}, {0, -1}}, + IntervalModuloParams{{11, 20}, {0, 5}, {0, -1}}, + IntervalModuloParams{{11, 20}, {0, -1}, {0, -1}}, + IntervalModuloParams{{0, -1}, {0, -1}, {0, -1}}); + +INSTANTIATE_TEST_SUITE_P(v_and_m_static, SingleDimModV1Test, v_and_m_static); +INSTANTIATE_TEST_SUITE_P(value_interval_m_static, SingleDimModV1Test, v_interval_m_static); +INSTANTIATE_TEST_SUITE_P(value_static_m_interval, SingleDimModV1Test, v_static_m_interval); +INSTANTIATE_TEST_SUITE_P(value_and_m_as_intervals, SingleDimModV1Test, v_and_m_intervals); +INSTANTIATE_TEST_SUITE_P(value_and_m_special_values, SingleDimModV1Test, v_and_m_special_values); + +TEST_P(SingleDimModV1Test, preserve_value_on_inputs_i64) { + constexpr auto et = ov::element::i64; + const auto a = std::make_shared(et, ov::PartialShape{a_dim}); + const auto b = std::make_shared(et, ov::PartialShape{b_dim}); + const auto op = make_op(std::make_shared(a), std::make_shared(b)); + + const auto param = std::make_shared(et, ov::Shape{1}); + const auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + const auto& output_shape = bc->get_output_partial_shape(0); + + EXPECT_EQ(output_shape, ov::PartialShape({exp_dim})); +} + +TEST_P(SingleDimModV1Test, preserve_value_on_inputs_i32) { + constexpr auto et = ov::element::i32; + const auto a = std::make_shared(et, ov::PartialShape{a_dim}); + const auto b = std::make_shared(et, ov::PartialShape{b_dim}); + const auto op = make_op(std::make_shared(a, et), std::make_shared(b, et)); + + const auto param = std::make_shared(et, ov::Shape{1}); + const auto bc = std::make_shared(param, op, ov::op::BroadcastType::BIDIRECTIONAL); + const auto& output_shape = bc->get_output_partial_shape(0); + + EXPECT_EQ(output_shape, ov::PartialShape({exp_dim})); +} From 6395fc672d3e1dcb098905c2dfa1b38c559404ab Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 24 Oct 2023 14:28:00 +0200 Subject: [PATCH 021/275] [CPU] Make static TI run a dynamic subgraph (#20527) * Make static TI run a dynamic subgraph * Dedicated SL test * Change condition to respect stat shapes * Adjust test to cover the code path properly From b7406247fff48df3864cc6bb25f8cf8cce9bc125 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Tue, 24 Oct 2023 14:40:36 +0200 Subject: [PATCH 022/275] Add TransposeMatMul transformation to MOC (#20460) Transformation fuses Transpose on first or second MatMul's input and sets MatMul's transpose_a/transpose_b accordingly. TransposeMatMul is already part of SmartReshape, but it can be added to MOCTransformations as well so native models that are don't use reshape can benefit from that. Ticket: CVS-118908 --- .../moc_transformations.cpp | 5 +- .../transpose_matmul_fusion.cpp | 14 +++++ .../skip_tests_config.cpp | 2 + .../transpose_matmul_fusion.cpp | 14 +++++ .../transpose_matmul_fusion.hpp | 17 +++++++ .../subgraph/transpose_matmul_fusion.hpp | 23 +++++++++ .../src/subgraph/transpose_matmul_fusion.cpp | 51 +++++++++++++++++++ 7 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp create mode 100644 src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp create mode 100644 src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp create mode 100644 src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 86746f176cad7f..9a3446f2386161 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -87,6 +87,7 @@ #include "transformations/op_conversions/convert_ti_to_sequences.hpp" #include "transformations/resolve_names_collisions.hpp" #include "transformations/smart_reshape/lstm_states_broadcast.hpp" +#include "transformations/smart_reshape/matmul_sr.hpp" #include "transformations/smart_reshape/reshape_sinking.hpp" bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr& f) { @@ -166,11 +167,13 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr auto transpose_sinking = manager.register_pass(); ADD_MATCHER(transpose_sinking, TransposeSinking) - // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking, // because it replaces pattern that may contain Transposes which must be optimized before // the transformation and it also inserts Transpose that can be optimized by TransposeSinking ADD_MATCHER(transpose_sinking, SplitSqueezeConcatFusion) + + REGISTER_PASS(manager, TransposeMatMul) + auto eliminations = manager.register_pass(); ADD_MATCHER(eliminations, EliminateUnsqueezeGather) ADD_MATCHER(eliminations, NopElimination, m_use_shapes) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp new file mode 100644 index 00000000000000..f84c9844db6c10 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/transpose_matmul_fusion.hpp" + +using namespace ov::test; + +namespace { +INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion, + ::testing::Values(ov::test::utils::DEVICE_CPU), + TransposeMatMulFusion::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp index 106b7696ccec40..05fac378c01874 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -102,5 +102,7 @@ std::vector disabledTestPatterns() { R"(.*SplitConvTest.CompareWithRefImpl.*IS=\(1.(128|256)\).*IC=4.*OC=4.*configItem=GNA_DEVICE_MODE_GNA_SW_FP32)", // TODO: Issue: 114149 R"(.*smoke_Decompose2DConv.*)", + // TODO: Issue: 123306 + R"(smoke_convert_matmul_to_fc/ConvertMatmulToFcWithTransposesPass.CompareWithRefImpl/netPRC=FP(32|16)_targetDevice=GNA__configItem=GNA_COMPACT_MODE_NO_configItem=GNA_DEVICE_MODE_GNA_SW_(FP32|EXACT)_IS=\(8.*)", }; } diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp new file mode 100644 index 00000000000000..6e95d1e29a15af --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/transpose_matmul_fusion.hpp" + +using namespace ov::test; + +namespace { +INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion, + ::testing::Values(ov::test::utils::DEVICE_GPU), + TransposeMatMulFusion::getTestCaseName); + +} // namespace diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp new file mode 100644 index 00000000000000..f253419ca924f4 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp" + +namespace ov { +namespace test { + +TEST_P(TransposeMatMulFusion, CompareWithRefs){ + run(); +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp new file mode 100644 index 00000000000000..c94383725f47ce --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +class TransposeMatMulFusion : public testing::WithParamInterface, + public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; + void TearDown() override; +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp b/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp new file mode 100644 index 00000000000000..dc95fe704400f9 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp @@ -0,0 +1,51 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp" + +namespace ov { +namespace test { + +std::string TransposeMatMulFusion::getTestCaseName(const testing::TestParamInfo &obj) { + return "device=" + std::string(obj.param); +} + +void TransposeMatMulFusion::SetUp() { + targetDevice = GetParam(); + + ov::PartialShape shape1{1, 3, 128, 64}; + ov::PartialShape shape2{1, 3, 64, 128}; + + InputShape input_shape1 = {shape1, {Shape{1, 3, 128, 64}}}; + InputShape input_shape2 = {shape2, {Shape{1, 3, 64, 128}}}; + init_input_shapes({input_shape1, input_shape2}); + + const auto param1 = std::make_shared(ov::element::f32, shape1); + const auto param2 = std::make_shared(ov::element::f32, shape2); + const auto order = ov::op::v0::Constant::create(ov::element::i32, Shape{4}, {0, 1, 3, 2}); + const auto transpose1 = std::make_shared(param1, order); + const auto transpose2 = std::make_shared(param2, order); + const auto matmul = std::make_shared(transpose1, transpose2, false, false); + const auto constant = op::v0::Constant::create(element::f32, Shape{1}, {9}); + const auto mul = std::make_shared(matmul, constant); + function = std::make_shared(mul, ov::ParameterVector{param1, param2}); +} + +void TransposeMatMulFusion::TearDown() { + const auto model = compiledModel.get_runtime_model(); + + int num_ops = 0; + for (const auto& node : model->get_ordered_ops()) { + const auto& rt_info = node->get_rt_info(); + const auto layer_type = rt_info.find("layerType")->second.as(); + if (layer_type != "Reorder" && layer_type != "Const") + num_ops++; + EXPECT_NE(layer_type, "Transpose"); + EXPECT_NE(layer_type, "Permute"); + } + ASSERT_EQ(num_ops, 5); // two Inputs, one Eltwise, one MatMul and one Output +} + +} // namespace test +} // namespace ov From e977a6ed9b252dc0b2155ee3317b0b438e15487a Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Tue, 24 Oct 2023 16:44:01 +0400 Subject: [PATCH 023/275] Fix `COPY_INSTEAD_OF_MOVE` issue in Hetero (#20667) --- src/plugins/hetero/src/async_infer_request.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/hetero/src/async_infer_request.cpp b/src/plugins/hetero/src/async_infer_request.cpp index a4f5f36e15f0f5..e9d3643b5baa43 100644 --- a/src/plugins/hetero/src/async_infer_request.cpp +++ b/src/plugins/hetero/src/async_infer_request.cpp @@ -7,7 +7,7 @@ struct RequestExecutor : ov::threading::ITaskExecutor { explicit RequestExecutor(ov::SoPtr& request) : m_request(request) { m_request->set_callback([this](std::exception_ptr exception_ptr) mutable { - m_exception_ptr = exception_ptr; + m_exception_ptr = std::move(exception_ptr); auto task = std::move(m_task); task(); }); From 5ffde7d8d6f668f9bb5061b11b7203cc2bdd3f7b Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 24 Oct 2023 14:48:09 +0200 Subject: [PATCH 024/275] [core]Migrate Minimum operator to new API (#20597) * Migrate Minimum op to new API * Refactor evaluates to reduce binary size - add infer_broadcast_shape, get shapes from tensors reduce OV_ASSERT - refactor Evaluate structures to reduce binary size --------- Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/minimum.hpp | 4 +- .../include/openvino/reference/minimum.hpp | 17 +-- src/core/shape_inference/include/utils.hpp | 11 ++ src/core/shape_inference/src/utils.cpp | 5 + src/core/src/op/add.cpp | 14 +-- src/core/src/op/logical_and.cpp | 9 +- src/core/src/op/logical_or.cpp | 9 +- src/core/src/op/minimum.cpp | 112 ++++++++---------- src/core/src/op/mod.cpp | 14 +-- src/core/src/op/subtract.cpp | 14 +-- src/core/src/op/xor.cpp | 11 +- .../template/backend/ops/bitwise_and.cpp | 4 +- .../template/backend/ops/bitwise_or.cpp | 4 +- .../template/backend/ops/bitwise_xor.cpp | 4 +- 14 files changed, 113 insertions(+), 119 deletions(-) diff --git a/src/core/include/openvino/op/minimum.hpp b/src/core/include/openvino/op/minimum.hpp index c8cfc5c9d7c999..30819b2a72f849 100644 --- a/src/core/include/openvino/op/minimum.hpp +++ b/src/core/include/openvino/op/minimum.hpp @@ -29,9 +29,7 @@ class OPENVINO_API Minimum : public util::BinaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/minimum.hpp b/src/core/reference/include/openvino/reference/minimum.hpp index 4bfe8ff0c89c83..8d70ae0fc99ee0 100644 --- a/src/core/reference/include/openvino/reference/minimum.hpp +++ b/src/core/reference/include/openvino/reference/minimum.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include "openvino/core/shape.hpp" #include "openvino/op/util/attr_types.hpp" @@ -12,11 +12,16 @@ namespace ov { namespace reference { +namespace func { +template +T min(const T a, const T b) { + return std::min(a, b); +} +} // namespace func + template void minimum(const T* arg0, const T* arg1, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] < arg1[i] ? arg0[i] : arg1[i]; - } + std::transform(arg0, std::next(arg0, count), arg1, out, func::min); } template @@ -26,9 +31,7 @@ void minimum(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T { - return x < y ? x : y; - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::min); } } // namespace reference } // namespace ov diff --git a/src/core/shape_inference/include/utils.hpp b/src/core/shape_inference/include/utils.hpp index 32e53766ba0d60..cac12973a18179 100644 --- a/src/core/shape_inference/include/utils.hpp +++ b/src/core/shape_inference/include/utils.hpp @@ -419,6 +419,17 @@ ov::optional get_input_bounds(const ov::Node* op, size_t port, const IT * @return Result shape from inputs with applied broadcast specification. */ ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::Shape& first, const ov::Shape& second); + +/** + * @brief Inference broadcast shape from input tensor shapes for element wise operator + * according to broadcast specification stored in operator. + * + * @param op Pointer to operator. + * @param inputs Tensors vector to get theirs shapes. + * + * @return Result shape from input tensors shape with applied broadcast specification. + */ +ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::TensorVector& inputs); } // namespace op /** diff --git a/src/core/shape_inference/src/utils.cpp b/src/core/shape_inference/src/utils.cpp index c89221d286ac69..74351e6fc1cfc0 100644 --- a/src/core/shape_inference/src/utils.cpp +++ b/src/core/shape_inference/src/utils.cpp @@ -5,6 +5,7 @@ #include "utils.hpp" #include "eltwise_shape_inference.hpp" +#include "openvino/core/validation_util.hpp" namespace ov { namespace op { @@ -12,5 +13,9 @@ namespace op { ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::Shape& first, const ov::Shape& second) { return eltwise_shape_infer(op, std::vector{first, second}).front().to_shape(); } + +ov::Shape infer_broadcast_shape(const ov::Node* const op, const ov::TensorVector& inputs) { + return eltwise_shape_infer(op, ov::util::get_tensors_partial_shapes(inputs)).front().to_shape(); +} } // namespace op } // namespace ov diff --git a/src/core/src/op/add.cpp b/src/core/src/op/add.cpp index 316f71b3ebca7c..0d09563b9ae201 100644 --- a/src/core/src/op/add.cpp +++ b/src/core/src/op/add.cpp @@ -19,14 +19,11 @@ struct Evaluate : element::NoAction { static result_type visit(const Tensor& in0, const Tensor& in1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; - reference::add(in0.data(), - in1.data(), - out.data(), - in0.get_shape(), - in1.get_shape(), - broadcast_spec); + reference::add(in0.data(), in1.data(), out.data(), shape0, shape1, broadcast_spec); return true; } }; @@ -48,15 +45,16 @@ std::shared_ptr Add::clone_with_new_inputs(const OutputVector& new_args) c bool Add::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { OV_OP_SCOPE(v1_Add_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); using namespace ov::element; return IfTypeOf::apply( inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); } diff --git a/src/core/src/op/logical_and.cpp b/src/core/src/op/logical_and.cpp index fe8bd612ed2d85..91ff10dc15601c 100644 --- a/src/core/src/op/logical_and.cpp +++ b/src/core/src/op/logical_and.cpp @@ -25,19 +25,16 @@ std::shared_ptr LogicalAnd::clone_with_new_inputs(const OutputVector& new_ bool LogicalAnd::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_LogicalAnd_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - const auto& shape_0 = inputs[0].get_shape(); - const auto& shape_1 = inputs[1].get_shape(); - outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1)); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); if (inputs[0].get_element_type() == element::boolean) { using T = fundamental_type_for; reference::logical_and(inputs[0].data(), inputs[1].data(), outputs[0].data(), - shape_0, - shape_1, + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); return true; } else { diff --git a/src/core/src/op/logical_or.cpp b/src/core/src/op/logical_or.cpp index 403089318de314..5d9532b1358286 100644 --- a/src/core/src/op/logical_or.cpp +++ b/src/core/src/op/logical_or.cpp @@ -26,19 +26,16 @@ std::shared_ptr LogicalOr::clone_with_new_inputs(const OutputVector& new_a bool LogicalOr::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_LogicalOr_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - const auto& shape_0 = inputs[0].get_shape(); - const auto& shape_1 = inputs[1].get_shape(); - outputs[0].set_shape(infer_broadcast_shape(this, shape_0, shape_1)); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); if (inputs[0].get_element_type() == element::boolean) { using T = fundamental_type_for; reference::logical_or(inputs[0].data(), inputs[1].data(), outputs[0].data(), - shape_0, - shape_1, + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); return true; } else { diff --git a/src/core/src/op/minimum.cpp b/src/core/src/op/minimum.cpp index 83252519beeeac..1844c6e5b25e36 100644 --- a/src/core/src/op/minimum.cpp +++ b/src/core/src/op/minimum.cpp @@ -2,92 +2,78 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/minimum.hpp" - -#include +#include "openvino/op/minimum.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/convert.hpp" -#include "ngraph/op/less.hpp" -#include "ngraph/op/multiply.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/type/element_type.hpp" #include "openvino/reference/minimum.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { -OPENVINO_SUPPRESS_DEPRECATED_START -namespace minimumop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::minimum(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} +namespace minimum { -bool evaluate_minimum(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_minimum, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u8, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_minimum, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::minimum(arg0.data(), arg1.data(), out.data(), shape0, shape1, broadcast_spec); + return true; } - return rc; -} -} // namespace -} // namespace minimumop +}; +} // namespace minimum // ------------------------------ v1 ------------------------------------------- - -op::v1::Minimum::Minimum(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +Minimum::Minimum(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Minimum::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Minimum::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Minimum_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::Minimum::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Minimum::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Minimum_evaluate); - return minimumop::evaluate_minimum(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::Minimum::has_evaluate() const { +bool Minimum::has_evaluate() const { OV_OP_SCOPE(v1_Minimum_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/mod.cpp b/src/core/src/op/mod.cpp index 816d605a292657..69ac9493052d20 100644 --- a/src/core/src/op/mod.cpp +++ b/src/core/src/op/mod.cpp @@ -37,14 +37,11 @@ struct Evaluate : ov::element::NoAction { static result_type visit(const Tensor& in0, const Tensor& in1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; - reference::mod(in0.data(), - in1.data(), - out.data(), - in0.get_shape(), - in1.get_shape(), - broadcast_spec); + reference::mod(in0.data(), in1.data(), out.data(), shape0, shape1, broadcast_spec); return true; } }; @@ -244,14 +241,15 @@ std::shared_ptr Mod::clone_with_new_inputs(const OutputVector& new_args) c bool Mod::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { OV_OP_SCOPE(v1_Mod_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); using namespace ov::element; return IfTypeOf::apply(inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); } diff --git a/src/core/src/op/subtract.cpp b/src/core/src/op/subtract.cpp index 6538918f9f14e2..6b21fa00483b78 100644 --- a/src/core/src/op/subtract.cpp +++ b/src/core/src/op/subtract.cpp @@ -19,14 +19,11 @@ struct Evaluate : element::NoAction { static result_type visit(const Tensor& in0, const Tensor& in1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; - reference::subtract(in0.data(), - in1.data(), - out.data(), - in0.get_shape(), - in1.get_shape(), - broadcast_spec); + reference::subtract(in0.data(), in1.data(), out.data(), shape0, shape1, broadcast_spec); return true; } }; @@ -48,14 +45,15 @@ std::shared_ptr Subtract::clone_with_new_inputs(const OutputVector& new_ar bool Subtract::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Subtract_evaluate); OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(this, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); using namespace ov::element; return IfTypeOf::apply(inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), get_autob()); } diff --git a/src/core/src/op/xor.cpp b/src/core/src/op/xor.cpp index eafe1fe465e315..c96599d9de3cef 100644 --- a/src/core/src/op/xor.cpp +++ b/src/core/src/op/xor.cpp @@ -21,13 +21,15 @@ struct Evaluate : element::NoAction { static result_type visit(const Tensor& arg0, const Tensor& arg1, Tensor& out, + const Shape& shape0, + const Shape& shape1, const AutoBroadcastSpec& broadcast_spec) { using T = typename element_type_traits::value_type; reference::logical_xor(arg0.data(), arg1.data(), out.data(), - arg0.get_shape(), - arg1.get_shape(), + shape0, + shape1, broadcast_spec); return true; } @@ -40,14 +42,15 @@ bool input_supported_type(const element::Type& et) { bool evaluate(const Node* const op, TensorVector& outputs, const TensorVector& inputs) { OPENVINO_ASSERT(outputs.size() == 1); - OPENVINO_ASSERT(inputs.size() == 2); - outputs[0].set_shape(infer_broadcast_shape(op, inputs[0].get_shape(), inputs[1].get_shape())); + outputs[0].set_shape(infer_broadcast_shape(op, inputs)); using namespace ov::element; return IfTypeOf::apply(inputs[0].get_element_type(), inputs[0], inputs[1], outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), op->get_autob()); } } // namespace diff --git a/src/plugins/template/backend/ops/bitwise_and.cpp b/src/plugins/template/backend/ops/bitwise_and.cpp index d0e5d05b11360d..b6686175377aac 100644 --- a/src/plugins/template/backend/ops/bitwise_and.cpp +++ b/src/plugins/template/backend/ops/bitwise_and.cpp @@ -14,9 +14,9 @@ template bool evaluate(const std::shared_ptr& node, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - OPENVINO_ASSERT(inputs.size() == 2); OPENVINO_ASSERT(outputs.size() == 1); - outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape())); + + outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs)); using T = typename ov::element_type_traits::value_type; ov::reference::bitwise_and(inputs[0].data(), inputs[1].data(), diff --git a/src/plugins/template/backend/ops/bitwise_or.cpp b/src/plugins/template/backend/ops/bitwise_or.cpp index fe163edeccb3a1..69f45d2916731d 100644 --- a/src/plugins/template/backend/ops/bitwise_or.cpp +++ b/src/plugins/template/backend/ops/bitwise_or.cpp @@ -14,9 +14,9 @@ template bool evaluate(const std::shared_ptr& node, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - OPENVINO_ASSERT(inputs.size() == 2); OPENVINO_ASSERT(outputs.size() == 1); - outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape())); + + outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs)); using T = typename ov::element_type_traits::value_type; ov::reference::bitwise_or(inputs[0].data(), inputs[1].data(), diff --git a/src/plugins/template/backend/ops/bitwise_xor.cpp b/src/plugins/template/backend/ops/bitwise_xor.cpp index 3fa98775a05e18..43a15c60b5e0a8 100644 --- a/src/plugins/template/backend/ops/bitwise_xor.cpp +++ b/src/plugins/template/backend/ops/bitwise_xor.cpp @@ -14,9 +14,9 @@ template bool evaluate(const std::shared_ptr& node, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - OPENVINO_ASSERT(inputs.size() == 2); OPENVINO_ASSERT(outputs.size() == 1); - outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs[0].get_shape(), inputs[1].get_shape())); + + outputs[0].set_shape(infer_broadcast_shape(node.get(), inputs)); using T = typename ov::element_type_traits::value_type; ov::reference::bitwise_xor(inputs[0].data(), inputs[1].data(), From 251602636c10d053f106d2954bd2c01b920e247f Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Tue, 24 Oct 2023 16:23:14 +0200 Subject: [PATCH 025/275] Fuse GeLU (#20428) * add transformation + test * move transformation to GeluFusion * add comments * code review fixes * fix * code review fixes * code style fix * fix windows build warning --- .../common_optimizations/gelu_fusion.hpp | 13 + .../common_optimizations/gelu_fusion.cpp | 79 ++- .../common_optimizations/gelu_fusion.cpp | 649 ++++++++++-------- 3 files changed, 448 insertions(+), 293 deletions(-) diff --git a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp index 61722260bb5a57..71b5fcafc9fe75 100644 --- a/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/gelu_fusion.hpp @@ -19,6 +19,7 @@ class TRANSFORMATIONS_API GeluFusionWithErfTwo; class TRANSFORMATIONS_API GeluFusionWithErfThree; class TRANSFORMATIONS_API GeluFusionWithErfFour; class TRANSFORMATIONS_API GeluFusionWithTanh; +class TRANSFORMATIONS_API GeluFusionWithTanhNoPower; } // namespace pass } // namespace ov @@ -78,6 +79,17 @@ class ov::pass::GeluFusionWithTanh : public ov::pass::MatcherPass { GeluFusionWithTanh(); }; +/** + * @ingroup ie_transformation_common_api + * @brief GeluFusion transformation replaces a sub-graph + * x * 0.5 * (1 + tanh((x * 0.044715 * x + 1) * x * sqrt(2 / pi))) with a Gelu (Tanh) op. + */ +class ov::pass::GeluFusionWithTanhNoPower : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("GeluFusionWithTanhNoPower", "0"); + GeluFusionWithTanhNoPower(); +}; + /** * @ingroup ie_transformation_common_api * @brief GeluFusion transformation replaces various sub-graphs with a Gelu op. @@ -91,5 +103,6 @@ class ov::pass::GeluFusion : public ov::pass::GraphRewrite { add_matcher(); add_matcher(); add_matcher(); + add_matcher(); } }; diff --git a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp index 90b718f8067cae..7f7915f7965774 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp @@ -8,6 +8,7 @@ #include +#include #include #include "itt.hpp" @@ -16,8 +17,8 @@ #include "openvino/op/constant.hpp" #include "openvino/op/divide.hpp" #include "openvino/op/erf.hpp" -#include "openvino/op/gelu.hpp" #include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" #include "openvino/op/power.hpp" #include "openvino/op/tanh.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -302,11 +303,10 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { return false; } - constexpr float pi = 3.141592653589793238462643383279502884f; bool valid_constant_values = op::util::has_constant_value(pow_constant_value, 3.0f) && op::util::has_constant_value(mul_0_constant_value, 0.044715f, 0.001f) && - op::util::has_constant_value(mul_1_constant_value, std::sqrt(2.0f / pi), 0.01f) && + op::util::has_constant_value(mul_1_constant_value, std::sqrt(2.0 / M_PI), 0.01) && op::util::has_constant_value(mul_2_constant_value, 0.5f) && op::util::has_constant_value(add_1_constant_value, 1.0f); @@ -336,3 +336,76 @@ ov::pass::GeluFusionWithTanh::GeluFusionWithTanh() { auto m = std::make_shared(mul_3, matcher_name); register_matcher(m, callback); } + +ov::pass::GeluFusionWithTanhNoPower::GeluFusionWithTanhNoPower() { + // Replaces a sub-graph with a Gelu (ov::op::v0::Tanh) op + // x * 0.5 * (1 + tanh((x * 0.044715 * x + 1) * x * sqrt(2 / pi))) + MATCHER_SCOPE(GeluFusionWithTanhNoPower); + auto input = pattern::any_input(); + + auto const1 = pattern::wrap_type(); + auto mul1 = pattern::wrap_type({input, const1}); + + auto mul2 = pattern::wrap_type({mul1, input}); + + auto const2 = pattern::wrap_type(); + auto add1 = pattern::wrap_type({const2, mul2}); + + auto const3 = pattern::wrap_type(); + auto mul3 = pattern::wrap_type({input, const3}); + + auto mul4 = pattern::wrap_type({add1, mul3}); + + auto tanh = pattern::wrap_type({mul4}); + + auto const4 = pattern::wrap_type(); + auto add2 = pattern::wrap_type({tanh, const4}); + + auto const5 = pattern::wrap_type(); + auto mul5 = pattern::wrap_type({input, const5}); + + auto mul6 = pattern::wrap_type({add2, mul5}); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto x_output = pattern_to_output.at(input); + + auto const1_value = pattern_to_output.at(const1).get_node_shared_ptr(); + auto const2_value = pattern_to_output.at(const2).get_node_shared_ptr(); + auto const3_value = pattern_to_output.at(const3).get_node_shared_ptr(); + auto const4_value = pattern_to_output.at(const4).get_node_shared_ptr(); + auto const5_value = pattern_to_output.at(const5).get_node_shared_ptr(); + + bool valid_constant_values = op::util::has_constant_value(const1_value, 0.044715f, 0.001f) && + op::util::has_constant_value(const2_value, 1.0f) && + op::util::has_constant_value(const3_value, std::sqrt(2.0 / M_PI), 0.01) && + op::util::has_constant_value(const4_value, 1.0f) && + op::util::has_constant_value(const5_value, 0.5f); + + if (!valid_constant_values) { + return false; + } + + auto gelu = std::make_shared(x_output, op::GeluApproximationMode::TANH); + + gelu->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info( + { + pattern_to_output.at(mul1).get_node_shared_ptr(), + pattern_to_output.at(mul2).get_node_shared_ptr(), + pattern_to_output.at(add1).get_node_shared_ptr(), + pattern_to_output.at(mul3).get_node_shared_ptr(), + pattern_to_output.at(mul4).get_node_shared_ptr(), + pattern_to_output.at(tanh).get_node_shared_ptr(), + pattern_to_output.at(add2).get_node_shared_ptr(), + pattern_to_output.at(mul5).get_node_shared_ptr(), + pattern_to_output.at(mul6).get_node_shared_ptr(), + }, + gelu); + ov::replace_node(m.get_match_root(), gelu); + return true; + }; + + auto m = std::make_shared(mul6, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp index 7c5311307d7d95..aa1f1d32a3da16 100644 --- a/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/tests/common_optimizations/gelu_fusion.cpp @@ -9,14 +9,21 @@ #include #include +#include #include #include #include #include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" -#include "openvino/opsets/opset7.hpp" -#include "openvino/opsets/opset9.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/erf.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/tanh.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" #include "transformations/convert_precision.hpp" @@ -28,17 +35,17 @@ using namespace ov; TEST_F(TransformationTestsF, GeluFusionPatternOne) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, mul_const); - auto mul = std::make_shared(mul_first, add); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, mul_const); + auto mul = std::make_shared(mul_first, add); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -47,24 +54,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternOne) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternOneF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, mul_const); - auto mul = std::make_shared(mul_first, add); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, mul_const); + auto mul = std::make_shared(mul_first, add); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -73,24 +80,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternOneF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternTwo) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -99,24 +106,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternTwo) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternTwoF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -125,24 +132,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternTwoF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternThree) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(add, mul_const); - auto mul = std::make_shared(data, mul_first); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(add, mul_const); + auto mul = std::make_shared(data, mul_first); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -151,24 +158,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternThree) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternThreeF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f16, Shape{1}, {M_SQRT2}); - auto add_const = opset7::Constant::create(element::f16, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f16, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(add, mul_const); - auto mul = std::make_shared(data, mul_first); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(add, mul_const); + auto mul = std::make_shared(data, mul_first); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -177,24 +184,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternThreeF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternFour) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto mul1_const = opset9::Constant::create(element::f32, Shape{1}, {1.0f / M_SQRT2}); - auto add_const = opset9::Constant::create(element::f32, Shape{1}, {0.5f}); - auto mul2_const = opset9::Constant::create(element::f32, Shape{1}, {0.5f}); + auto mul1_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0f / M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5f}); + auto mul2_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5f}); - auto mul1 = std::make_shared(data, mul1_const); - auto erf = std::make_shared(mul1); - auto mul2 = std::make_shared(erf, mul2_const); - auto add = std::make_shared(mul2, add_const); - auto mul3 = std::make_shared(data, add); + auto mul1 = std::make_shared(data, mul1_const); + auto erf = std::make_shared(mul1); + auto mul2 = std::make_shared(erf, mul2_const); + auto add = std::make_shared(mul2, add_const); + auto mul3 = std::make_shared(data, add); model = std::make_shared(NodeVector{mul3}, ParameterVector{data}); @@ -203,24 +210,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternFour) { { auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternFourF16) { { - auto data = std::make_shared(element::f16, Shape{2, 2}); + auto data = std::make_shared(element::f16, Shape{2, 2}); - auto mul1_const = opset9::Constant::create(element::f16, Shape{1}, {1.0f / M_SQRT2}); - auto add_const = opset9::Constant::create(element::f16, Shape{1}, {0.5f}); - auto mul2_const = opset9::Constant::create(element::f16, Shape{1}, {0.5f}); + auto mul1_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {1.0f / M_SQRT2}); + auto add_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5f}); + auto mul2_const = ov::op::v0::Constant::create(element::f16, Shape{1}, {0.5f}); - auto mul1 = std::make_shared(data, mul1_const); - auto erf = std::make_shared(mul1); - auto mul2 = std::make_shared(erf, mul2_const); - auto add = std::make_shared(mul2, add_const); - auto mul3 = std::make_shared(data, add); + auto mul1 = std::make_shared(data, mul1_const); + auto erf = std::make_shared(mul1); + auto mul2 = std::make_shared(erf, mul2_const); + auto add = std::make_shared(mul2, add_const); + auto mul3 = std::make_shared(data, add); model = std::make_shared(NodeVector{mul3}, ParameterVector{data}); @@ -229,24 +236,24 @@ TEST_F(TransformationTestsF, GeluFusionPatternFourF16) { { auto data = std::make_shared(element::f16, Shape{2, 2}); - auto gelu = std::make_shared(data); + auto gelu = std::make_shared(data); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionPatternIncorrectDivConstValue) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {1.4149}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.4149}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); model_ref = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -257,17 +264,17 @@ TEST_F(TransformationTestsF, GeluFusionPatternIncorrectDivConstValue) { TEST_F(TransformationTestsF, GeluFusionPatternTooShortDivConstValue) { { - auto data = std::make_shared(element::f32, Shape{2, 2}); + auto data = std::make_shared(element::f32, Shape{2, 2}); - auto div_const = opset7::Constant::create(element::f32, Shape{1}, {1.4142}); - auto add_const = opset7::Constant::create(element::f32, Shape{1}, {1.0}); - auto mul_const = opset7::Constant::create(element::f32, Shape{1}, {0.5}); + auto div_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.4142}); + auto add_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {1.0}); + auto mul_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.5}); - auto div = std::make_shared(data, div_const); - auto erf = std::make_shared(div); - auto add = std::make_shared(erf, add_const); - auto mul_first = std::make_shared(data, add); - auto mul = std::make_shared(mul_first, mul_const); + auto div = std::make_shared(data, div_const); + auto erf = std::make_shared(div); + auto add = std::make_shared(erf, add_const); + auto mul_first = std::make_shared(data, add); + auto mul = std::make_shared(mul_first, mul_const); model = std::make_shared(NodeVector{mul}, ParameterVector{data}); model_ref = std::make_shared(NodeVector{mul}, ParameterVector{data}); @@ -278,60 +285,62 @@ TEST_F(TransformationTestsF, GeluFusionPatternTooShortDivConstValue) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_equal_const_values) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_params_no_conversion) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_param = std::make_shared(element::f32, Shape{1}); - auto pow = std::make_shared(input, pow_param); - auto mul_0_param = std::make_shared(element::f32, Shape{1}); - auto mul_0 = std::make_shared(pow, mul_0_param); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_param = std::make_shared(element::f32, Shape{1}); + auto pow = std::make_shared(input, pow_param); + auto mul_0_param = std::make_shared(element::f32, Shape{1}); + auto mul_0 = std::make_shared(pow, mul_0_param); + auto add_0 = std::make_shared(input, mul_0); - auto mul_1_param = std::make_shared(element::f32, Shape{1}); - auto mul_1 = std::make_shared(add_0, mul_1_param); + auto mul_1_param = std::make_shared(element::f32, Shape{1}); + auto mul_1 = std::make_shared(add_0, mul_1_param); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_param = std::make_shared(element::f32, Shape{1}); - auto add_1 = std::make_shared(tanh, add_1_param); + auto add_1_param = std::make_shared(element::f32, Shape{1}); + auto add_1 = std::make_shared(tanh, add_1_param); - auto mul_2_param = std::make_shared(element::f32, Shape{1}); - auto mul_2 = std::make_shared(add_1, mul_2_param); + auto mul_2_param = std::make_shared(element::f32, Shape{1}); + auto mul_2 = std::make_shared(add_1, mul_2_param); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared( NodeVector{mul_3}, @@ -342,63 +351,67 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_params_no_conversion) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_pow_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); + auto input = std::make_shared(element::f32, Shape{2, 2}); auto pow_constant = - std::make_shared(element::f32, Shape{1}, std::vector{3.0f + 1.0e-8f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + std::make_shared(element::f32, Shape{1}, std::vector{3.0f + 1.0e-8f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -407,62 +420,66 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_pow_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_0_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.04515f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.04515f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_0_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.4715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{1.4715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -471,61 +488,64 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_0_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{0.7980868f}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, Shape{1}, std::vector{0.7980868f}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(10.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(10.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -534,63 +554,67 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_1_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_add_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); auto add_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{1.0f + 1.0e-8f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + std::make_shared(element::f32, Shape{1}, std::vector{1.0f + 1.0e-8f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_add_1_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{2.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.5f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); @@ -599,65 +623,110 @@ TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_add_1_value) { TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_epsilon_mul_2_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); auto mul_2_constant = - std::make_shared(element::f32, Shape{1}, std::vector{0.5f + 1.0e-8f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + std::make_shared(element::f32, Shape{1}, std::vector{0.5f + 1.0e-8f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } { - auto data = std::make_shared(element::f32, Shape{2, 2}); - auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto gelu = std::make_shared(data, op::GeluApproximationMode::TANH); model_ref = std::make_shared(NodeVector{gelu}, ParameterVector{data}); } } TEST_F(TransformationTestsF, GeluFusionTanhWithTanh_wrong_mul_2_value) { { - auto input = std::make_shared(element::f32, Shape{2, 2}); - auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); - auto pow = std::make_shared(input, pow_constant); - auto mul_0_constant = std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); - auto mul_0 = std::make_shared(pow, mul_0_constant); - auto add_0 = std::make_shared(input, mul_0); + auto input = std::make_shared(element::f32, Shape{2, 2}); + auto pow_constant = std::make_shared(element::f32, Shape{1}, std::vector{3.0f}); + auto pow = std::make_shared(input, pow_constant); + auto mul_0_constant = + std::make_shared(element::f32, Shape{1}, std::vector{0.044715f}); + auto mul_0 = std::make_shared(pow, mul_0_constant); + auto add_0 = std::make_shared(input, mul_0); - constexpr float pi = 3.141592653589793238462643383279502884f; auto mul_1_constant = - std::make_shared(element::f32, Shape{1}, std::vector{std::sqrt(2.0f / pi)}); - auto mul_1 = std::make_shared(add_0, mul_1_constant); + std::make_shared(element::f32, + Shape{1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul_1 = std::make_shared(add_0, mul_1_constant); - auto tanh = std::make_shared(mul_1); + auto tanh = std::make_shared(mul_1); - auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); - auto add_1 = std::make_shared(tanh, add_1_constant); + auto add_1_constant = std::make_shared(element::f32, Shape{1}, std::vector{1.0f}); + auto add_1 = std::make_shared(tanh, add_1_constant); - auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{5.0f}); - auto mul_2 = std::make_shared(add_1, mul_2_constant); + auto mul_2_constant = std::make_shared(element::f32, Shape{1}, std::vector{5.0f}); + auto mul_2 = std::make_shared(add_1, mul_2_constant); - auto mul_3 = std::make_shared(input, mul_2); + auto mul_3 = std::make_shared(input, mul_2); model = std::make_shared(NodeVector{mul_3}, ParameterVector{input}); manager.register_pass(); } } + +TEST_F(TransformationTestsF, FoldGeluOperation) { + { + auto param = std::make_shared(element::f32, Shape{1006, 2, 100, 3, 4096}); + auto const1 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{0.044715f}); + + auto mul1 = std::make_shared(param, const1); + auto mul2 = std::make_shared(mul1, param); + + auto const2 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{1.0}); + auto add1 = std::make_shared(const2, mul2); + + auto const3 = ov::op::v0::Constant::create(element::f32, + Shape{1, 1, 1}, + std::vector{static_cast(std::sqrt(2.0 / M_PI))}); + auto mul3 = std::make_shared(param, const3); + + auto mul4 = std::make_shared(add1, mul3); + auto tan = std::make_shared(mul4); + + auto const4 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{1.0}); + auto add2 = std::make_shared(tan, const4); + + auto const5 = ov::op::v0::Constant::create(element::f32, Shape{1, 1, 1}, std::vector{0.5}); + auto mul5 = std::make_shared(param, const5); + + auto mul6 = std::make_shared(add2, mul5); + + auto result = std::make_shared(mul6); + model = std::make_shared(NodeVector{result}, ParameterVector{param}); + + manager.register_pass(); + } + + { + auto param = std::make_shared(element::f32, Shape{1006, 2, 100, 3, 4096}); + auto gelu = std::make_shared(param, ov::op::GeluApproximationMode::TANH); + auto result = std::make_shared(gelu); + model_ref = std::make_shared(NodeVector{result}, ParameterVector{param}); + } +} From 63fff9d27026c1332cc757a96385439c0970dc96 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 24 Oct 2023 18:34:37 +0400 Subject: [PATCH 026/275] [IE TESTS][CONFORMANCE] Move Calculate reference over TEMPLATE plugin instead of `interpreter` (#12532) * [CONFORMANCE] Enable template calculation reference * Move convert init to cpu * skip gpu --- .../shared_tests_instances/core_config.cpp | 2 + .../single_layer_tests/adaptive_pooling.cpp | 12 +- .../single_layer_tests/batch_to_space.cpp | 143 +++++++++--------- .../classes/random_uniform.cpp | 21 +-- .../classes/random_uniform.hpp | 2 - .../convolution_backprop_data.cpp | 39 +---- .../group_convolution_backprop_data.cpp | 19 +-- .../functional/single_layer_tests/one_hot.cpp | 5 +- .../src/input_output_tensor_reuse.cpp | 2 - .../gpu_dyn_batch_shape_tests.cpp | 12 +- .../skip_tests_config.cpp | 2 + .../dynamic/convolution_backprop_data.cpp | 19 +-- .../group_convolution_backprop_data.cpp | 19 +-- .../dynamic/random_uniform.cpp | 6 - .../shared_test_classes/CMakeLists.txt | 4 +- .../shared_test_classes/base/ov_subgraph.hpp | 14 +- .../src/base/ov_subgraph.cpp | 141 +++++++++-------- .../include/ov_models/utils/ov_helpers.hpp | 1 + .../common_test_utils/test_constants.hpp | 2 + .../src/graph_comparator.cpp | 1 - .../src/ov_plugin_cache.cpp | 15 +- 21 files changed, 225 insertions(+), 256 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp index dfae3a030abdeb..fee91b5c5a38d3 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp @@ -34,6 +34,8 @@ void core_configuration(ov::test::SubgraphBaseTest* test) { test->configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()}); } #endif + // todo: issue: 123320 + test->convert_precisions = {{ ov::element::bf16, ov::element::f32 }, { ov::element::f16, ov::element::f32 }}; } } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp index 1efb20ffc1d8d2..8cab3926b72a5f 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/adaptive_pooling.cpp @@ -91,6 +91,10 @@ class AdaPoolLayerCPUTest : public testing::WithParamInterfaceget_parameters().size() == 2) { + generatePooledVector(); + functionRefs = createFunction(true); + } } void generatePooledVector() { @@ -124,14 +128,6 @@ class AdaPoolLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 2) { - generatePooledVector(); - funcRef = createFunction(true); - } - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp index 836931ec465669..99367ef14e8ba9 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp @@ -9,14 +9,12 @@ using namespace InferenceEngine; using namespace CPUTestUtils; -using namespace ngraph::opset3; using namespace ov::test; namespace CPULayerTestsDefinitions { namespace { std::vector blockShape, cropsBegin, cropsEnd; - ngraph::Shape paramShape; } // namespace using BatchToSpaceLayerTestCPUParams = std::tuple< @@ -24,7 +22,7 @@ using BatchToSpaceLayerTestCPUParams = std::tuple< std::vector, // block shape std::vector, // crops begin std::vector, // crops end - Precision , // Network precision + ov::element::Type, // Network precision CPUSpecificParams>; class BatchToSpaceCPULayerTest : public testing::WithParamInterface, @@ -32,9 +30,9 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface &obj) { std::vector inputShapes; - Precision netPrecision; + ov::element::Type model_type; CPUSpecificParams cpuParams; - std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = obj.param; + std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, model_type, cpuParams) = obj.param; std::ostringstream result; if (inputShapes.front().first.size() != 0) { result << "IS=("; @@ -53,39 +51,40 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface& targetInputStaticShapes) override { inputs.clear(); - const auto& funcInputs = function->inputs(); - for (size_t i = 0; i < funcInputs.size(); i++) { - const auto& funcInput = funcInputs[i]; + const auto& parameters = function->get_parameters(); + for (size_t i = 0; i < parameters.size(); i++) { + const auto& parameter = parameters[i]; ov::Tensor tensor; - if (i == 0U) { - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); - } else if (i == 1U) { - tensor = ov::Tensor(funcInput.get_element_type(), paramShape); - auto *dataPtr = tensor.data(); - for (size_t j = 0; j < blockShape.size(); j++) { - dataPtr[j] = blockShape[j]; + const auto& param_type = parameter->get_output_element_type(0); + const auto& static_shape = targetInputStaticShapes[i]; + switch (i) { + case 0: { + tensor = ov::test::utils::create_and_fill_tensor(param_type, static_shape, 2560, 0, 256); + break; } - } else if (i == 2U) { - tensor = ov::Tensor(funcInput.get_element_type(), paramShape); - auto *dataPtr = tensor.data(); - for (size_t j = 0; j < cropsBegin.size(); j++) { - dataPtr[j] = cropsBegin[j]; + case 1: { + ASSERT_EQ(ov::shape_size(static_shape), blockShape.size()); + tensor = ov::Tensor(param_type, static_shape, blockShape.data()); + break; } - } else if (i == 3U) { - tensor = ov::Tensor(funcInput.get_element_type(), paramShape); - auto *dataPtr = tensor.data(); - for (size_t j = 0; j < cropsEnd.size(); j++) { - dataPtr[j] = cropsEnd[j]; + case 2: + case 3: { + ASSERT_EQ(ov::shape_size(static_shape), cropsEnd.size()); + tensor = ov::Tensor(param_type, static_shape, cropsEnd.data()); + break; + } + default: { + throw std::runtime_error("Incorrect parameter number!"); } } - inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + inputs.insert({parameter, tensor}); } } @@ -94,55 +93,51 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface inputShapes; - Precision netPrecision; + ov::element::Type model_type; CPUSpecificParams cpuParams; - std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = this->GetParam(); + std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, model_type, cpuParams) = this->GetParam(); std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + init_input_shapes(inputShapes); - auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - - const std::vector inputShapesVec{inputShapes}; - init_input_shapes(inputShapesVec); - - if (strcmp(netPrecision.name(), "U8") == 0) + if (model_type == ov::element::Type_t::u8) { selectedType = std::string("ref_any_") + "I8"; - else - selectedType = std::string("ref_any_") + netPrecision.name(); - - ov::ParameterVector params{std::make_shared(ngPrec, inputDynamicShapes.front())}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - paramShape = {paramOuts[0].get_partial_shape().size()}; - - std::shared_ptr in2, in3, in4; - auto blockShapeParam = std::make_shared(ngraph::element::i64, paramShape); - in2 = blockShapeParam; - params.push_back(blockShapeParam); - auto cropsBeginParam = std::make_shared(ngraph::element::i64, paramShape); - params.push_back(cropsBeginParam); - in3 = cropsBeginParam; - auto cropsEndParam = std::make_shared(ngraph::element::i64, paramShape); - params.push_back(cropsEndParam); - in4 = cropsEndParam; - auto btsNode = std::make_shared(paramOuts[0], in2, in3, in4); + } else { + std::string type_name = model_type.get_type_name(); + if (type_name == "f16") + type_name = "fp16"; + if (type_name == "f32") + type_name = "fp32"; + if (type_name == "f64") + type_name = "fp64"; + std::transform(type_name.begin(), type_name.end(), type_name.begin(), ::toupper); + selectedType = std::string("ref_any_") + type_name; + } + + std::shared_ptr in0, in1, in2, in3; + in0 = std::make_shared(model_type, inputDynamicShapes.front()); + in1 = std::make_shared(ov::element::Type_t::i64, inputDynamicShapes[1]); + in2 = std::make_shared(ov::element::Type_t::i64, inputDynamicShapes[2]); + in3 = std::make_shared(ov::element::Type_t::i64, inputDynamicShapes[3]); + auto btsNode = std::make_shared(in0, in1, in2, in3); btsNode->get_rt_info() = getCPUInfo(); - ngraph::ResultVector results{std::make_shared(btsNode)}; - function = std::make_shared(results, params, "BatchToSpace"); + ngraph::ResultVector results{std::make_shared(btsNode)}; + function = std::make_shared(results, ov::ParameterVector{in0, in1, in2, in3}, "BatchToSpace"); } }; TEST_P(BatchToSpaceCPULayerTest, CompareWithRefs) { run(); - CheckPluginRelatedResults(compiledModel, "BatchToSpace"); + // CheckPluginRelatedResults(compiledModel, "BatchToSpace"); }; namespace { -const std::vector netPrecision = { - Precision::U8, - Precision::I8, - Precision::I32, - Precision::FP32, - Precision::BF16 +const std::vector model_types = { + ov::element::Type_t::u8, + ov::element::Type_t::i8, + ov::element::Type_t::i32, + ov::element::Type_t::f32, + ov::element::Type_t::bf16 }; const std::vector> blockShape4D1 = {{1, 1, 1, 2}, {1, 2, 2, 1}}; @@ -226,7 +221,7 @@ const auto staticBatchToSpaceParamsSet4D1 = ::testing::Combine( ::testing::ValuesIn(blockShape4D1), ::testing::ValuesIn(cropsBegin4D1), ::testing::ValuesIn(cropsEnd4D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); const auto dynamicBatchToSpaceParamsSet4D1 = ::testing::Combine( @@ -234,7 +229,7 @@ const auto dynamicBatchToSpaceParamsSet4D1 = ::testing::Combine( ::testing::ValuesIn(blockShape4D1), ::testing::ValuesIn(cropsBegin4D1), ::testing::ValuesIn(cropsEnd4D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_4D)); const auto dynamicBatchToSpaceParamsWithBlockedSet4D1 = ::testing::Combine( @@ -242,7 +237,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet4D1 = ::testing::Combine( ::testing::ValuesIn(blockShape4D1), ::testing::ValuesIn(cropsBegin4D1), ::testing::ValuesIn(cropsEnd4D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); const auto staticBatchToSpaceParamsSet4D2 = ::testing::Combine( @@ -250,7 +245,7 @@ const auto staticBatchToSpaceParamsSet4D2 = ::testing::Combine( ::testing::ValuesIn(blockShape4D2), ::testing::ValuesIn(cropsBegin4D2), ::testing::ValuesIn(cropsEnd4D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); const auto dynamicBatchToSpaceParamsSet4D2 = ::testing::Combine( @@ -258,7 +253,7 @@ const auto dynamicBatchToSpaceParamsSet4D2 = ::testing::Combine( ::testing::ValuesIn(blockShape4D2), ::testing::ValuesIn(cropsBegin4D2), ::testing::ValuesIn(cropsEnd4D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_4D)); const auto dynamicBatchToSpaceParamsWithBlockedSet4D2 = ::testing::Combine( @@ -266,7 +261,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet4D2 = ::testing::Combine( ::testing::ValuesIn(blockShape4D2), ::testing::ValuesIn(cropsBegin4D2), ::testing::ValuesIn(cropsEnd4D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_4D)); INSTANTIATE_TEST_SUITE_P(smoke_StaticBatchToSpaceCPULayerTestCase1_4D, BatchToSpaceCPULayerTest, @@ -381,7 +376,7 @@ const auto staticBatchToSpaceParamsSet5D1 = ::testing::Combine( ::testing::ValuesIn(blockShape5D1), ::testing::ValuesIn(cropsBegin5D1), ::testing::ValuesIn(cropsEnd5D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); const auto dynamicBatchToSpaceParamsSet5D1 = ::testing::Combine( @@ -389,7 +384,7 @@ const auto dynamicBatchToSpaceParamsSet5D1 = ::testing::Combine( ::testing::ValuesIn(blockShape5D1), ::testing::ValuesIn(cropsBegin5D1), ::testing::ValuesIn(cropsEnd5D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_5D)); const auto dynamicBatchToSpaceParamsWithBlockedSet5D1 = ::testing::Combine( @@ -397,7 +392,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet5D1 = ::testing::Combine( ::testing::ValuesIn(blockShape5D1), ::testing::ValuesIn(cropsBegin5D1), ::testing::ValuesIn(cropsEnd5D1), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); const auto staticBatchToSpaceParamsSet5D2 = ::testing::Combine( @@ -405,7 +400,7 @@ const auto staticBatchToSpaceParamsSet5D2 = ::testing::Combine( ::testing::ValuesIn(blockShape5D2), ::testing::ValuesIn(cropsBegin5D2), ::testing::ValuesIn(cropsEnd5D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); const auto dynamicBatchToSpaceParamsSet5D2 = ::testing::Combine( @@ -413,7 +408,7 @@ const auto dynamicBatchToSpaceParamsSet5D2 = ::testing::Combine( ::testing::ValuesIn(blockShape5D2), ::testing::ValuesIn(cropsBegin5D2), ::testing::ValuesIn(cropsEnd5D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParams_5D)); const auto dynamicBatchToSpaceParamsWithBlockedSet5D2 = ::testing::Combine( @@ -421,7 +416,7 @@ const auto dynamicBatchToSpaceParamsWithBlockedSet5D2 = ::testing::Combine( ::testing::ValuesIn(blockShape5D2), ::testing::ValuesIn(cropsBegin5D2), ::testing::ValuesIn(cropsEnd5D2), - ::testing::ValuesIn(netPrecision), + ::testing::ValuesIn(model_types), ::testing::ValuesIn(cpuParamsWithBlock_5D)); INSTANTIATE_TEST_SUITE_P(smoke_StaticBatchToSpaceCPULayerTestCase1_5D, BatchToSpaceCPULayerTest, diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp index 2f9706e7d2562e..282ebef47ba9bb 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp @@ -122,6 +122,14 @@ void RandomUniformLayerTestCPU::SetUp() { const ov::ResultVector results{std::make_shared(rnd_op)}; function = std::make_shared(results, in_params, "RandomUniformLayerTestCPU"); + + // todo: issue: 123320 + if (!InferenceEngine::with_cpu_x86_avx512_core()) { + convert_precisions.insert({ ov::element::bf16, ov::element::f32 }); + } + if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) { + convert_precisions.insert({ ov::element::f16, ov::element::f32 }); + } } template @@ -206,19 +214,6 @@ void RandomUniformLayerTestCPU::compare(const std::vector& expected, #undef CASE } -precisions_map RandomUniformLayerTestCPU::get_ref_precisions_convert_map() { - precisions_map precisions; - - if (!InferenceEngine::with_cpu_x86_avx512_core()) { - precisions.insert({ ov::element::bf16, ov::element::f32 }); - } - if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) { - precisions.insert({ ov::element::f16, ov::element::f32 }); - } - - return precisions; -} - inline double less_or_equal(double a, double b) { return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * std::numeric_limits::epsilon()); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp index 1cb9f5fccc451a..8e071439bc8577 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp @@ -35,8 +35,6 @@ class RandomUniformLayerTestCPU : public testing::WithParamInterface& expected, const std::vector& actual) override; - precisions_map get_ref_precisions_convert_map() override; - template void rndUCompare(const ov::Tensor& expected, const ov::Tensor& actual); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp index 455c78a8c09ed0..a602d3cbac45a8 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp @@ -94,6 +94,13 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -111,38 +118,6 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - - void validate() override { - auto actualOutputs = get_plugin_outputs(); - if (function->get_parameters().size() == 2) { - auto pos = std::find_if(inputs.begin(), inputs.end(), - [](const std::pair, ov::Tensor> ¶ms) { - return params.first->get_friendly_name() == "param_1"; - }); - IE_ASSERT(pos != inputs.end()); - inputs.erase(pos); - } - auto expectedOutputs = calculate_refs(); - if (expectedOutputs.empty()) { - return; - } - ASSERT_EQ(actualOutputs.size(), expectedOutputs.size()) - << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size(); - - compare(expectedOutputs, actualOutputs); - } - void configure_model() override { ov::preprocess::PrePostProcessor p(function); { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp index 03f1f707254bc5..96a295830079ed 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp @@ -93,6 +93,13 @@ class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -110,18 +117,6 @@ class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp index 8eda5f4221e77e..84f8c4b4740b22 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp @@ -101,13 +101,10 @@ class OneHotLayerCPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { if (function->get_parameters().size() == 2) { generateDepth(); - funcRef = createFunction(true); + functionRefs = createFunction(true); } - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); } void validate() override { auto actualOutputs = get_plugin_outputs(); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp index 612006be75dc2b..1a55b7204045d6 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/input_output_tensor_reuse.cpp @@ -61,7 +61,6 @@ class InputOutputTensorReuse : public SubgraphBaseTest { TEST_F(InputOutputTensorReuse, smoke_Input_Output_Binding) { compile_model(); std::vector inputShapes = {{1, 32, 5, 16}, {1, 32, 1, 16}}; - init_ref_function(functionRefs, inputShapes); generate_inputs(inputShapes); validate(); @@ -69,7 +68,6 @@ TEST_F(InputOutputTensorReuse, smoke_Input_Output_Binding) { for (size_t i = 0; i < num_iter; i++) { auto outputTensor = inferRequest.get_output_tensor(0); inputShapes.back() = outputTensor.get_shape(); - init_ref_function(functionRefs, inputShapes); auto itr = std::find_if(inputs.begin(), inputs.end(), [](const std::pair, ov::Tensor>& item) { return item.first->get_friendly_name() == "Param_1"; }); diff --git a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp index 964e6dcad3dcc5..9f4c18ef5d2ce4 100644 --- a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_batch_shape_tests.cpp @@ -7,6 +7,7 @@ #include "common_test_utils/common_utils.hpp" #include "common_test_utils/file_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" +#include "functional_test_utils/ov_plugin_cache.hpp" #include "ov_models/subgraph_builders.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" @@ -59,8 +60,11 @@ class OVDynamicBatchShape_Tests : public WithParamInterfaceGetParam(); init_input_shapes(inputShape); @@ -73,6 +77,7 @@ class OVDynamicBatchShape_Tests : public WithParamInterfacereshape(dynShape); } + std::shared_ptr src_func; // std::map configuration; std::vector inputShape; @@ -81,7 +86,6 @@ class OVDynamicBatchShape_Tests : public WithParamInterface(); run(); } @@ -97,12 +101,12 @@ TEST_P(OVDynamicBatchShape_Tests, InferDynamicBatchBound_cached) { ov::test::utils::removeFilesWithExt(cacheFolderName, "cl_cache"); ov::test::utils::removeDir(cacheFolderName); - core = std::make_shared(); core->set_property(ov::cache_dir(cacheFolderName)); run(); } { - core = std::make_shared(); + core.reset(); + core = ov::test::utils::PluginCache::get().core(); core->set_property(ov::cache_dir(cacheFolderName)); run(); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 798282680dbccd..25b679cf22cc82 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -115,5 +115,7 @@ std::vector disabledTestPatterns() { R"(.*smoke_LPT.*ElementwiseBranchSelectionTransformation.*)", // Dynamic state unsupported for now R"(.*MemoryDynamicBatch.*)", + // Issue: 123493 + R"(.*GroupNormalizationTest.*CompareWithRefs.*NetType=f16.*)", }; } diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp index d394eb7d05de17..a36426cd84c373 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp @@ -89,6 +89,13 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -106,18 +113,6 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp index fb955a63a837de..531c97fa218a16 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/group_convolution_backprop_data.cpp @@ -90,6 +90,13 @@ class GroupDeconvolutionLayerGPUTest : public testing::WithParamInterface& targetInputStaticShapes) override { + if (function->get_parameters().size() != 1) { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + functionRefs = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } inputs.clear(); const auto& funcInputs = function->inputs(); for (size_t i = 0; i < funcInputs.size(); ++i) { @@ -107,18 +114,6 @@ class GroupDeconvolutionLayerGPUTest : public testing::WithParamInterface &funcRef, const std::vector& targetInputStaticShapes) override { - if (function->get_parameters().size() == 1) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); - } else { - // WA: output_shape depends on 3rd deconvolution input data - // but the reference implementation doesn't implement shape inference - // so we need to build a new ngraph function and replace the 3rd input parameter with a constant - // to get valid output shapes - funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); - } - } - void validate() override { auto actualOutputs = get_plugin_outputs(); if (function->get_parameters().size() == 2) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp index 440924fd0a541e..755371e1b0a548 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/random_uniform.cpp @@ -140,12 +140,6 @@ class RandomUnifromDynamicGPUTest : public testing::WithParamInterface(results, params, "random_uniform_test"); } - precisions_map get_ref_precisions_convert_map() override { - // Do not convert reference function from FP16 to FP32 precision, since in case of RandomUniform operation - // data type is matter - return {}; - } - private: std::pair min_max_values; }; diff --git a/src/tests/functional/shared_test_classes/CMakeLists.txt b/src/tests/functional/shared_test_classes/CMakeLists.txt index a4f46b241437b0..0aa8d0f33592c1 100644 --- a/src/tests/functional/shared_test_classes/CMakeLists.txt +++ b/src/tests/functional/shared_test_classes/CMakeLists.txt @@ -12,6 +12,8 @@ ov_add_target( INCLUDES PUBLIC "$" + PRIVATE + "${OpenVINO_SOURCE_DIR}/src/plugins/template/include" ADDITIONAL_SOURCE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src LINK_LIBRARIES @@ -27,4 +29,4 @@ ov_build_target_faster(${TARGET_NAME} # install & export ov_developer_package_export_targets(TARGET ${TARGET_NAME} - INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") + INSTALL_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include/") \ No newline at end of file diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp index 5ca0b6531a39f3..c76cd8fbc1bc72 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp @@ -34,15 +34,14 @@ class SubgraphBaseTest : public ov::test::TestsCommon { protected: virtual void compare(const std::vector& expected, const std::vector& actual); - - virtual void configure_model(); virtual void compile_model(); - virtual void init_ref_function(std::shared_ptr& funcRef, - const std::vector& targetInputStaticShapes); - virtual void generate_inputs(const std::vector& targetInputStaticShapes); virtual void infer(); virtual void validate(); + virtual void configure_model();; + virtual void generate_inputs(const std::vector& targetInputStaticShapes); + void update_ref_model(); + void match_parameters(); void init_input_shapes(const std::vector& shapes); void TearDown() override { @@ -65,6 +64,10 @@ class SubgraphBaseTest : public ov::test::TestsCommon { ov::CompiledModel compiledModel; ov::InferRequest inferRequest; + // to provide correct inputs for reference function + std::map, std::shared_ptr> matched_parameters; + precisions_map convert_precisions; + constexpr static const double disable_threshold = std::numeric_limits::max(); double abs_threshold = disable_threshold, rel_threshold = disable_threshold; @@ -75,7 +78,6 @@ class SubgraphBaseTest : public ov::test::TestsCommon { virtual std::vector calculate_refs(); virtual std::vector get_plugin_outputs(); - virtual precisions_map get_ref_precisions_convert_map(); friend void core_configuration(SubgraphBaseTest* test); }; diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 016dc26cccdfc5..aca76c4e9e65ab 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -18,6 +18,8 @@ #include "openvino/pass/serialize.hpp" #include "transformations/convert_precision.hpp" +#include "template/properties.hpp" + #include "common_test_utils/graph_comparator.hpp" #include "ov_models/utils/ov_helpers.hpp" @@ -72,18 +74,7 @@ void SubgraphBaseTest::run() { try { compile_model(); for (const auto& targetStaticShapeVec : targetStaticShapes) { - try { - if (!inputDynamicShapes.empty()) { - // resize ngraph function according new target shape - // Note: output shapes of some nodes depend on the input data - // so for some tests we need to override this function and replace parameter with constant node to get correct output shapes - init_ref_function(functionRefs, targetStaticShapeVec); - } - generate_inputs(targetStaticShapeVec); - } catch (const std::exception& ex) { - throw std::runtime_error("[IE TEST INFRA] Impossible to reshape ov::Model using the shape: " + - ov::test::utils::vec2str(targetStaticShapeVec) + " " + ex.what()); - } + generate_inputs(targetStaticShapeVec); validate(); } status = ov::test::utils::PassRate::Statuses::PASSED; @@ -208,9 +199,6 @@ void SubgraphBaseTest::compile_model() { auto start_time = std::chrono::system_clock::now(); configure_model(); - if (functionRefs == nullptr) { - functionRefs = function->clone(); - } core_configuration(this); compiledModel = core->compile_model(function, targetDevice, configuration); if (is_report_stages) { @@ -220,10 +208,6 @@ void SubgraphBaseTest::compile_model() { } } -void SubgraphBaseTest::init_ref_function(std::shared_ptr &funcRef, const std::vector& targetInputStaticShapes) { - ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); -} - void SubgraphBaseTest::generate_inputs(const std::vector& targetInputStaticShapes) { inputs.clear(); auto inputMap = utils::getInputMap(); @@ -255,44 +239,21 @@ void SubgraphBaseTest::infer() { inferRequest.infer(); } -precisions_map SubgraphBaseTest::get_ref_precisions_convert_map() { - //TODO: remove this conversions as soon as function interpreter fully support bf16 and f16 - precisions_map precisions = { - { ngraph::element::bf16, ngraph::element::f32 } - }; - - auto convert_added = false; - for (const auto ¶m : function->get_parameters()) { - for (size_t i = 0; i < param->get_output_size(); i++) { - for (const auto &node : param->get_output_target_inputs(i)) { - std::shared_ptr nodePtr = node.get_node()->shared_from_this(); - if (std::dynamic_pointer_cast(nodePtr)) { - convert_added = true; - break; - } - } - } - } - - if (!convert_added) { - precisions.insert({ ngraph::element::f16, ngraph::element::f32}); +void SubgraphBaseTest::update_ref_model() { + if (functionRefs == nullptr) { + functionRefs = function->clone(); } - - return precisions; -} - -std::vector SubgraphBaseTest::calculate_refs() { using InputsMap = std::map, ov::Tensor>; - auto functionToProcess = functionRefs->clone(); - precisions_map convert_precisions = get_ref_precisions_convert_map(); - pass::Manager manager; - manager.register_pass(convert_precisions, type_to_fuse_map{}, false, false); - manager.run_passes(functionToProcess); - functionToProcess->validate_nodes_and_infer_types(); + if (!convert_precisions.empty()) { + pass::Manager manager; + manager.register_pass(convert_precisions, type_to_fuse_map{}, false, false); + manager.run_passes(functionRefs); + functionRefs->validate_nodes_and_infer_types(); + } - ov::preprocess::PrePostProcessor p(functionToProcess); - const auto& inputNodes = functionToProcess->inputs(); + ov::preprocess::PrePostProcessor p(functionRefs); + const auto& inputNodes = functionRefs->inputs(); for (size_t i = 0; i < inputNodes.size(); ++i) { auto itr = std::find_if(inputs.begin(), inputs.end(), [&](const InputsMap::value_type& item) { @@ -310,18 +271,80 @@ std::vector SubgraphBaseTest::calculate_refs() { throw std::runtime_error(errMsg.str()); } } - - const auto& outputs = functionToProcess->outputs(); + const auto& outputs = functionRefs->outputs(); for (size_t i = 0; i < outputs.size(); ++i) { if (outType != ElementType::undefined && outType != outputs[i].get_element_type()) { p.output(i).tensor().set_element_type(outType); } } + functionRefs = p.build(); +} + +void SubgraphBaseTest::match_parameters() { + matched_parameters.clear(); + const auto& ref_params = functionRefs->get_parameters(); + const auto& params = function->get_parameters(); + size_t param_size = params.size(), ref_param_size = ref_params.size(); + if (params.size() < ref_params.size()) { + throw std::runtime_error("Incompatible parameters in original and reference model!"); + } + if (params.size() == ref_params.size()) { + for (size_t in_idx = 0; in_idx < params.size(); ++in_idx) { + matched_parameters.insert({ ref_params[in_idx], params[in_idx] }); + } + } else { + auto it = params.begin(); + auto it_ref = ref_params.begin(); + while (it_ref != ref_params.end() && it != params.end()) { + bool is_match_in = true; + if ((*it_ref)->get_output_partial_shape(0).is_static()) { + if (inputs.at(*it).get_shape() != (*it_ref)->get_output_shape(0)) { + is_match_in = false; + } + } else if ((*it)->get_output_partial_shape(0) != (*it_ref)->get_output_partial_shape(0)) { + is_match_in = false; + } + if ((*it)->get_output_element_type(0) != ((*it_ref)->get_output_element_type(0))) { + is_match_in = false; + } + if (is_match_in) { + matched_parameters.insert({ *it_ref, *it }); + ++it_ref; + } + ++it; + } + if (matched_parameters.size() != ref_params.size()) { + throw std::runtime_error("Incompatible parameters in original and reference model!"); + } + } +} + +std::vector SubgraphBaseTest::calculate_refs() { + if (is_report_stages) { + std::cout << "[ REFERENCE ] `SubgraphBaseTest::calculate_refs()` is started"<< std::endl; + } + auto start_time = std::chrono::system_clock::now(); - functionToProcess = p.build(); + update_ref_model(); + match_parameters(); - auto results = ngraph::helpers::interpretFunction(functionToProcess, inputs); - return results; + auto compiledModelRef = core->compile_model(functionRefs, ov::test::utils::DEVICE_TEMPLATE, {{ ov::template_plugin::disable_transformations(true) }}); + auto inferRequestRef = compiledModelRef.create_infer_request(); + for (const auto& param : functionRefs->get_parameters()) { + inferRequestRef.set_tensor(param->get_default_output(), inputs.at(matched_parameters[param])); + } + inferRequestRef.infer(); + + auto outputs = std::vector{}; + for (const auto& output : functionRefs->outputs()) { + outputs.push_back(inferRequestRef.get_tensor(output)); + } + if (is_report_stages) { + auto end_time = std::chrono::system_clock::now(); + std::chrono::duration duration = end_time - start_time; + std::cout << "[ REFERENCE ] `SubgraphBaseTest::calculate_refs()` is finished successfully. Duration is " << duration.count() << "s" << std::endl; + } + return outputs; } std::vector SubgraphBaseTest::get_plugin_outputs() { @@ -361,7 +384,7 @@ void SubgraphBaseTest::validate() { } ASSERT_EQ(actualOutputs.size(), expectedOutputs.size()) - << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size(); + << "TEMPLATE plugin has " << expectedOutputs.size() << " outputs, while " << targetDevice << " " << actualOutputs.size(); if (is_report_stages) { std::cout << "[ COMPARATION ] `ov_tensor_utils.hpp::compare()` is started"<< std::endl; } diff --git a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp index 5d0f3cd4ac7d0b..862ff798efcf30 100644 --- a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp +++ b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp @@ -163,6 +163,7 @@ std::vector convertOutputPrecision(const std::vector std::ostream& operator<<(std::ostream& os, MemoryTransformation type); +// todo: remove the following function from the source code after cleaning up VPU repo void resize_function(std::shared_ptr function, const std::vector& targetInputStaticShapes); using ov::test::utils::operator<<; diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp index 70778266d8f1b2..c8026f4ef2d7e0 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp @@ -23,6 +23,8 @@ const char API_REPORT_FILENAME[] = "report_api"; const char REPORT_EXTENSION[] = ".xml"; const char LST_EXTENSION[] = ".lst"; +const char TEMPLATE_LIB[] = "openvino_template_plugin"; + const char DEVICE_SUFFIX_SEPARATOR = '.'; const unsigned int maxFileNameLength = 140; diff --git a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp index 053e0d6d42899a..f6cc70ed10a625 100644 --- a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp +++ b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp @@ -1036,7 +1036,6 @@ AccuracyCheckResult accuracy_check(const std::shared_ptr& ref_functio auto ref_outputs = ngraph::helpers::interpretFunction(ref_function, ref_input_data); auto outputs = ngraph::helpers::interpretFunction(cur_function, cur_input_data); - IE_ASSERT(ref_outputs.size() == outputs.size()); for (int i = 0; i < ref_outputs.size(); i++) { diff --git a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp index 118368f7a1180f..483b9b98c38db2 100644 --- a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp +++ b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp @@ -55,21 +55,20 @@ std::shared_ptr PluginCache::core(const std::string& deviceToCheck) { } assert(0 != ov_core.use_count()); - // register template plugin if it is needed - try { - std::string pluginName = "openvino_template_plugin"; - pluginName += OV_BUILD_POSTFIX; + // Register Template plugin as a reference provider + const auto devices = ov_core->get_available_devices(); + if (std::find(devices.begin(), devices.end(), std::string(ov::test::utils::DEVICE_TEMPLATE)) == devices.end()) { ov_core->register_plugin( - ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), pluginName), - "TEMPLATE"); - } catch (...) { + ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), + std::string(ov::test::utils::TEMPLATE_LIB) + OV_BUILD_POSTFIX), + ov::test::utils::DEVICE_TEMPLATE); } if (!deviceToCheck.empty()) { auto properties = ov_core->get_property(deviceToCheck, ov::supported_properties); if (std::find(properties.begin(), properties.end(), ov::available_devices) != properties.end()) { - auto availableDevices = ov_core->get_property(deviceToCheck, ov::available_devices); + const auto availableDevices = ov_core->get_property(deviceToCheck, ov::available_devices); if (availableDevices.empty()) { std::cerr << "No available devices for " << deviceToCheck << std::endl; From c6707aab86e3647af6bd17e53f0f1b59322f786e Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Tue, 24 Oct 2023 16:45:48 +0200 Subject: [PATCH 027/275] [CPU] Add input type check into in-place condition (#20529) --- src/plugins/intel_cpu/src/edge.cpp | 4 +- .../subgraph_tests/src/inplace_edge.cpp | 77 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 734fd462acbebb..ba3474fddf2cc6 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -450,7 +450,9 @@ void Edge::init() { DEBUG_LOG(*this, " getBaseEdge() return itself"); changeStatus(Status::NeedAllocation); } else { - if (edgePtr->getParent()->isConstant() && !edgePtr->getChild()->isConstant()) { + if (Type::Input == edgePtr->getParent()->getType() && + edgePtr->getParent()->isConstant() && + !edgePtr->getChild()->isConstant()) { changeStatus(Status::NeedAllocation); DEBUG_LOG(*this, " edge inplace from ", *edgePtr, " is broken!"); return; diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp new file mode 100644 index 00000000000000..1385313ce88d41 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/inplace_edge.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "test_utils/cpu_test_utils.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ov_models/utils/ov_helpers.hpp" +#include "ov_models/builders.hpp" + +using namespace CPUTestUtils; +using namespace InferenceEngine; + +namespace SubgraphTestsDefinitions { +// If a node (CumSum) with constant parents has several non-constant nodes (Eltwises) than the edge is broken. +// The fix is to check node type - is should be Input. +// Subgraph: +/* + * Constant Constant + * \ / + * \ / + * CumSum + * Parameter / \ Parameter + * \ / \ / + * \ / \ / + * Eltwise Eltwise + * \ / + * Eltwise + * | + * Result + */ + +using namespace ov::test; + +class NonInputInPlaceTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::ostringstream result; + result << "NonInputInPlaceTest_inPrc=outPrc=" << obj.param; + return result.str(); + } + + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + configuration.insert({ov::hint::inference_precision.name(), ov::element::f16.to_string()}); + const std::vector inputShape = {1, 11, 3, 3}; + targetStaticShapes = {{inputShape, inputShape}}; + ElementType prc = this->GetParam(); + + ov::ParameterVector inputParams {std::make_shared(prc, ov::Shape(inputShape)), + std::make_shared(prc, ov::Shape(inputShape))}; + + auto cumsum_tensor = ngraph::opset8::Constant::create(prc, inputShape, {10.0f}); + auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i32, {}, {0}); + const auto cumsum = std::make_shared(cumsum_tensor, axis_node); + + auto eltwiseMul = ngraph::builder::makeEltwise(inputParams[0], cumsum, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto eltwiseAdd1 = ngraph::builder::makeEltwise(inputParams[1], cumsum, ngraph::helpers::EltwiseTypes::ADD); + auto eltwiseAdd2 = ngraph::builder::makeEltwise(eltwiseAdd1, eltwiseMul, ngraph::helpers::EltwiseTypes::ADD); + + ngraph::ResultVector results{std::make_shared(eltwiseAdd2)}; + function = std::make_shared(results, inputParams, "NonInputInPlaceT"); + } +}; + +namespace { + TEST_P(NonInputInPlaceTest, CompareWithRefs) { + run(); + } + +INSTANTIATE_TEST_SUITE_P(smoke_NonInputInPlaceTest_CPU, NonInputInPlaceTest, + testing::Values(ngraph::element::f32, ngraph::element::f16), + NonInputInPlaceTest::getTestCaseName); + +} // namespace +} // namespace SubgraphTestsDefinitions From 20bd58759ae6e3f9f9417ef3f6d75e2ce97faaab Mon Sep 17 00:00:00 2001 From: Andrei Gorbachev Date: Tue, 24 Oct 2023 15:48:29 +0100 Subject: [PATCH 028/275] [GPU] Refactor grn, gru_cell, group_convolution, group_convolution_backprop_data, is_inf, log_softmax, logical, mat_mul, lrn, lstm_cell (#20520) * grn * gru_cell * group_convolution * group_convolution_backprop_data * is_inf * log_softmax * logical * mat_mul * lrn * lstm_cell * fix after review --- .../single_layer_tests/grn.cpp | 40 +++--- .../single_layer_tests/group_convolution.cpp | 56 +++----- .../group_convolution_backprop_data.cpp | 57 ++++---- .../single_layer_tests/gru_cell.cpp | 44 +++--- .../single_layer_tests/is_inf.cpp | 13 +- .../single_layer_tests/log_softmax.cpp | 43 +++--- .../single_layer_tests/logical.cpp | 62 ++++----- .../single_layer_tests/lrn.cpp | 15 +-- .../single_layer_tests/lstm_cell.cpp | 16 +-- .../single_layer_tests/mat_mul.cpp | 125 ++++++++++++------ 10 files changed, 233 insertions(+), 238 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp index 9d3c5b80145d0f..2990b49ee6e44e 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grn.cpp @@ -3,30 +3,28 @@ // #include -#include "single_layer_tests/grn.hpp" +#include "single_op_tests/grn.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::helpers; - namespace { - // Common params - const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 - }; +using ov::test::GrnLayerTest; +// Common params +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 +}; + +std::vector> input_shapes_static = { + {{1, 3, 30, 30}}, + {{2, 16, 15, 20}} +}; - INSTANTIATE_TEST_SUITE_P(smoke_Grn_Basic, - GrnLayerTest, - ::testing::Combine(::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30}), - std::vector({2, 16, 15, 20})), - ::testing::Values(0.33f, 1.1f), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - GrnLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Grn_Basic, + GrnLayerTest, + ::testing::Combine(::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), + ::testing::ValuesIn({0.33f, 1.1f}), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + GrnLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp index 446b0bc46336d0..e643f5716e5696 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution.cpp @@ -4,16 +4,16 @@ #include -#include "single_layer_tests/group_convolution.hpp" +#include "single_op_tests/group_convolution.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::GroupConvolutionLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 +const std::vector netPrecisions = { + ov::element::f32 }; + /* ============= 1D GroupConvolution ============= */ // 1D group convolution is not working correctly const std::vector> kernels1D = {{3}}; @@ -33,18 +33,15 @@ const auto groupConv1DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations1D), ::testing::ValuesIn(numOutChannels1D), ::testing::ValuesIn(numGroups1D), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution1D_ExplicitPadding_Disabled, GroupConvolutionLayerTest, ::testing::Combine( groupConv1DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -56,18 +53,15 @@ const auto dwConv1DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations1D), ::testing::ValuesIn(numOutChannels1D), ::testing::ValuesIn(numDWGroups1D), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ov::op::PadType::EXPLICIT) ); INSTANTIATE_TEST_SUITE_P(smoke_DwGroupConvolution1D_ExplicitPadding, GroupConvolutionLayerTest, ::testing::Combine( dwConv1DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -105,11 +99,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution2D_ExplicitPadding, GroupConvolut ::testing::Combine( groupConv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -117,11 +108,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution2D_AutoPadValid, GroupConvolution ::testing::Combine( groupConv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 30, 30})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 16, 30, 30}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -157,11 +145,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution3D_ExplicitPadding, GroupConvolut ::testing::Combine( groupConv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 10, 10, 10})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 4, 10, 10, 10}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); @@ -169,11 +154,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvolution3D_AutoPadValid, GroupConvolution ::testing::Combine( groupConv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 10, 10, 10})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>({{{1, 4, 10, 10, 10}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvolutionLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp index d7baff2c2e0d7f..40d364385fe7d9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp @@ -4,26 +4,28 @@ #include -#include "single_layer_tests/group_convolution_backprop_data.hpp" +#include "single_op_tests/group_convolution_backprop_data.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::GroupConvBackpropLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 +const std::vector netPrecisions = { + ov::element::f32 }; -const std::vector> emptyOutputShape = {{}}; +const std::vector emptyOutputShape = {{}}; const std::vector> emptyOutputPadding = {{}}; const std::vector numOutChannels = {16, 32}; const std::vector numGroups = {2, 8, 16}; /* ============= 2D GroupConvolution ============= */ -const std::vector> inputShapes2D = {{1, 16, 10, 10}, - {1, 32, 10, 10}}; +const std::vector> inputShapes2D = { + {{1, 16, 10, 10}}, + {{1, 32, 10, 10}} +}; + const std::vector> kernels2D = {{1, 1}, {3, 3}}; const std::vector> strides2D = {{1, 1}}; const std::vector> padBegins2D = {{0, 0}}; @@ -38,7 +40,7 @@ const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) ); const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine( @@ -49,7 +51,7 @@ const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(emptyOutputPadding) ); @@ -57,11 +59,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData2D_ExplicitPadding, GroupCon ::testing::Combine( groupConvBackpropData2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); @@ -70,18 +68,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData2D_AutoPadValid, GroupConvBa ::testing::Combine( groupConvBackpropData2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); /* ============= 3D GroupConvolution ============= */ -const std::vector> inputShapes3D = {{1, 16, 5, 5, 5}, - {1, 32, 5, 5, 5}}; +const std::vector> inputShapes3D = { + {{1, 16, 5, 5, 5}}, + {{1, 32, 5, 5, 5}} +}; + const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; const std::vector> strides3D = {{1, 1, 1}}; const std::vector> padBegins3D = {{0, 0, 0}}; @@ -96,7 +93,7 @@ const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::Values(ov::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) ); const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine( @@ -107,7 +104,7 @@ const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), ::testing::ValuesIn(numGroups), - ::testing::Values(ngraph::op::PadType::VALID), + ::testing::Values(ov::op::PadType::VALID), ::testing::ValuesIn(emptyOutputPadding) ); @@ -115,11 +112,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData3D_ExplicitPadding, GroupCon ::testing::Combine( groupConvBackpropData3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); @@ -128,11 +121,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConvBackpropData3D_AutoPadValid, GroupConvBa ::testing::Combine( groupConvBackpropData3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes3D)), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(ov::test::utils::DEVICE_GPU)), GroupConvBackpropLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp index df58f1314d6346..70fe6f0d8fd995 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp @@ -4,28 +4,34 @@ #include -#include "single_layer_tests/gru_cell.hpp" +#include "single_op_tests/gru_cell.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { - std::vector should_decompose{false, true}; - std::vector batch{5}; - std::vector hidden_size{1, 10}; - std::vector input_size{1, 30}; - std::vector> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"}, - {"tanh", "relu"}}; - std::vector clip = {0.0f, 0.7f}; - std::vector linear_before_reset = {true, false}; - std::vector layer_types = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER - }; - std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; - - INSTANTIATE_TEST_SUITE_P(GRUCellCommon, GRUCellTest, +using ov::test::GRUCellTest; + +std::vector should_decompose{false, true}; +std::vector batch{5}; +std::vector hidden_size{1, 10}; +std::vector input_size{1, 30}; +std::vector> activations = { + {"relu", "tanh"}, + {"tanh", "sigmoid"}, + {"sigmoid", "tanh"}, + {"tanh", "relu"} +}; + +std::vector clip = {0.0f, 0.7f}; +std::vector linear_before_reset = {true, false}; +std::vector layer_types = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER +}; + +std::vector netPrecisions = {ov::element::f32, + ov::element::f16}; + +INSTANTIATE_TEST_SUITE_P(GRUCellCommon, GRUCellTest, ::testing::Combine( ::testing::ValuesIn(should_decompose), ::testing::ValuesIn(batch), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp index 6614ab44b7e294..5f4e9c3f1acd84 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/is_inf.cpp @@ -2,17 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "shared_test_classes/single_layer/is_inf.hpp" - -#include -#include - -using namespace ov::test; -using namespace ov::test::subgraph; +#include "single_op_tests/is_inf.hpp" namespace { +using ov::test::IsInfLayerTest; -const std::vector> inShapesStatic = { +const std::vector> inShapesStatic = { {{{}, {{2}}}}, {{{}, {{10, 200}}}}, {{{}, {{4, 4, 16}}}}, @@ -21,7 +16,7 @@ const std::vector> inShapesStatic = { {{{}, {{16, 16, 16, 16, 16, 16}}}}, }; -constexpr std::array netPrecisions = {ov::element::f32, ov::element::f16}; +constexpr std::array netPrecisions = {ov::element::f32, ov::element::f16}; constexpr std::array detectNegative = {true, false}; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp index 1f27e97f8f97c3..57359e6d928776 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/log_softmax.cpp @@ -4,21 +4,20 @@ #include -#include "single_layer_tests/log_softmax.hpp" +#include "single_op_tests/log_softmax.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::LogSoftmaxLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, +const std::vector netPrecisions = { + ov::element::f32, }; -const std::vector inputShapes2D = { - InferenceEngine::SizeVector {1, 100}, - InferenceEngine::SizeVector {100, 1}, - InferenceEngine::SizeVector {10, 10}, +const std::vector> inputShapes2D = { + {{1, 100}}, + {{100, 1}}, + {{10, 10}}, }; const std::vector axis2D = { @@ -28,20 +27,15 @@ const std::vector axis2D = { INSTANTIATE_TEST_SUITE_P(smoke_LogSoftmax2D, LogSoftmaxLayerTest, testing::Combine(testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes2D), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes2D)), testing::ValuesIn(axis2D), - testing::Values(ov::test::utils::DEVICE_GPU), - testing::Values(std::map())), + testing::Values(ov::test::utils::DEVICE_GPU)), LogSoftmaxLayerTest::getTestCaseName); -const std::vector inputShapes4D = { - InferenceEngine::SizeVector {1, 100, 1, 1}, - InferenceEngine::SizeVector {1, 3, 4, 3}, - InferenceEngine::SizeVector {2, 3, 4, 5}, +const std::vector> inputShapes4D = { + {{1, 100, 1, 1}}, + {{1, 3, 4, 3}}, + {{2, 3, 4, 5}}, }; const std::vector axis4D = { @@ -51,14 +45,9 @@ const std::vector axis4D = { INSTANTIATE_TEST_SUITE_P(smoke_LogSoftmax4D, LogSoftmaxLayerTest, testing::Combine(testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes4D), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes4D)), testing::ValuesIn(axis4D), - testing::Values(ov::test::utils::DEVICE_GPU), - testing::Values(std::map())), + testing::Values(ov::test::utils::DEVICE_GPU)), LogSoftmaxLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp index f0b1d390149936..604b557aeab6f3 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/logical.cpp @@ -3,15 +3,27 @@ // #include -#include "single_layer_tests/logical.hpp" +#include "single_op_tests/logical.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace LayerTestsDefinitions::LogicalParams; - namespace { +using ov::test::LogicalLayerTest; + +std::vector> combine_shapes(const std::map>& input_shapes_static) { + std::vector> result; + for (const auto& input_shape : input_shapes_static) { + for (auto& item : input_shape.second) { + result.push_back({input_shape.first, item}); + } + + if (input_shape.second.empty()) { + result.push_back({input_shape.first, {}}); + } + } + return result; +} -std::map, std::vector>> inputShapes = { +std::map> inputShapes = { {{1}, {{1}, {17}, {1, 1}, {2, 18}, {1, 1, 2}, {2, 2, 3}, {1, 1, 2, 3}}}, {{5}, {{1}, {1, 1}, {2, 5}, {1, 1, 1}, {2, 2, 5}}}, {{2, 200}, {{1}, {200}, {1, 200}, {2, 200}, {2, 2, 200}}}, @@ -20,7 +32,7 @@ std::map, std::vector>> inputShapes = { {{2, 1, 1, 3, 1}, {{1}, {1, 3, 4}, {2, 1, 3, 4}, {1, 1, 1, 1, 1}}}, }; -std::map, std::vector>> inputShapesNot = { +std::map> inputShapesNot = { {{1}, {}}, {{5}, {}}, {{2, 200}, {}}, @@ -29,51 +41,39 @@ std::map, std::vector>> inputShapesNot {{2, 1, 1, 3, 1}, {}}, }; -std::vector inputsPrecisions = { - InferenceEngine::Precision::BOOL, -}; - -std::vector logicalOpTypes = { - ngraph::helpers::LogicalTypes::LOGICAL_AND, - ngraph::helpers::LogicalTypes::LOGICAL_OR, - ngraph::helpers::LogicalTypes::LOGICAL_XOR, +std::vector logicalOpTypes = { + ov::test::utils::LogicalTypes::LOGICAL_AND, + ov::test::utils::LogicalTypes::LOGICAL_OR, + ov::test::utils::LogicalTypes::LOGICAL_XOR, }; -std::vector secondInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER, +std::vector secondInputTypes = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER, }; -std::vector netPrecisions = { - InferenceEngine::Precision::FP32, +std::vector netPrecisions = { + ov::element::boolean, }; std::map additional_config = {}; INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, LogicalLayerTest, - ::testing::Combine(::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapes)), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(combine_shapes(inputShapes))), ::testing::ValuesIn(logicalOpTypes), ::testing::ValuesIn(secondInputTypes), ::testing::ValuesIn(netPrecisions), - ::testing::ValuesIn(inputsPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), LogicalLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNot, LogicalLayerTest, - ::testing::Combine(::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapesNot)), - ::testing::Values(ngraph::helpers::LogicalTypes::LOGICAL_NOT), - ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(combine_shapes(inputShapesNot))), + ::testing::Values(ov::test::utils::LogicalTypes::LOGICAL_NOT), + ::testing::Values(ov::test::utils::InputLayerType::CONSTANT), ::testing::ValuesIn(netPrecisions), - ::testing::ValuesIn(inputsPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), LogicalLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp index 916690bc246a1e..776095cb34a4cd 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lrn.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/lrn.hpp" +#include "single_op_tests/lrn.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; +using ov::test::LrnLayerTest; + +const std::vector netPrecisions = {ov::element::f32, + ov::element::f16}; const std::vector> axes = {{1}, {2, 3}}; @@ -28,9 +28,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_LrnCheck, LrnLayerTest, ::testing::Values(size), ::testing::ValuesIn(axes), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(std::vector({10, 10, 3, 2})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(std::vector>( + {{{10, 10, 3, 2}}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), LrnLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp index 99bd6279d2f141..37db834db0e465 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/lstm_cell.cpp @@ -4,12 +4,12 @@ #include -#include "single_layer_tests/lstm_cell.hpp" +#include "single_op_tests/lstm_cell.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::LSTMCellTest; + std::vector should_decompose{false, true}; std::vector batch{5}; std::vector hidden_size{1, 10}; @@ -20,12 +20,12 @@ std::vector> activations = {{"relu", "sigmoid", "tanh"} {"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"}, {"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}}; std::vector clip{0.f, 0.7f}; -std::vector layer_types = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER +std::vector layer_types = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER }; -std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; +std::vector netPrecisions = {ov::element::f32, + ov::element::f16}; INSTANTIATE_TEST_SUITE_P(LSTMCellCommon, LSTMCellTest, ::testing::Combine( diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp index 026a97154cee4a..95f9f58da6fcd8 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mat_mul.cpp @@ -4,65 +4,102 @@ #include -#include "single_layer_tests/mat_mul.hpp" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/mat_mul.hpp" namespace { +using ov::test::MatMulLayerTest; +using ov::test::utils::InputLayerType; + +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::f16, +}; + +std::vector> no_transpose_shapeRelatedParams = { + { {2, 1, 1, 5, 6}, {1, 1, 6, 4} }, + { {2, 1, 2, 3, 5, 6}, {1, 1, 6, 4} }, + { {1, 4, 5, 6}, {1, 4, 6, 4} }, + { {4, 5, 6}, {6, 3} }, + { {9, 9, 9}, {9, 9} }, + { {1, 2, 3}, {1, 1, 3, 2} }, + { {1, 3, 2, 4}, {2, 1, 4, 2} }, + { {2, 1, 2, 4}, {1, 3, 4, 2} }, + { {3, 2, 4}, {2, 1, 4, 2} }, + { {2, 1, 4, 2}, {3, 2, 4} }, + { {3}, {2, 2, 3, 1} }, + { {2, 2, 1, 3}, {3} }, + { {1, 5}, {5, 1} }, + { {1, 5}, {5} }, + { {5}, {5, 1} }, + { {5}, {5} }, +}; + +std::vector> first_transpose_shapeRelatedParams = { + { {2, 1, 2, 3}, {3, 2, 4} }, + { {100, 65}, {100, 73} }, + { {5, 1}, {5, 1} }, +}; -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +std::vector> second_transpose_shapeRelatedParams = { + { {1, 16, 128}, {1, 64, 128} }, + { {2, 1, 3, 2}, {3, 4, 2} }, + { {1, 64, 80}, {1, 77, 80} }, + { {65, 100}, {73, 100} }, + { {1, 5}, {1, 5} }, }; -const std::vector shapeRelatedParams = { - { { {2, 1, 1, 5, 6}, false }, { {1, 1, 6, 4}, false } }, - { { {2, 2, 4, 16}, true }, { {1, 1, 1, 4}, true } }, - { { {2, 1, 2, 3, 5, 6}, false }, { {1, 1, 6, 4}, false } }, - { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } }, - { { {1, 16, 128}, false }, { {1, 64, 128}, true } }, - { { {4, 5, 6}, false }, { {6, 3}, false } }, - { { {9, 9, 9}, false }, { {9, 9}, false } }, - { { {1, 2, 3}, false }, { {1, 1, 3, 2}, false } }, - { { {1, 3, 2, 4}, false }, { {2, 1, 4, 2}, false } }, - { { {2, 1, 2, 4}, false }, { {1, 3, 4, 2}, false } }, - { { {3, 2, 4}, false }, { {2, 1, 4, 2}, false } }, - { { {2, 1, 4, 2}, false }, { {3, 2, 4}, false } }, - { { {2, 1, 2, 3}, true }, { {3, 2, 4}, false } }, - { { {2, 1, 3, 2}, false }, { {3, 4, 2}, true } }, - { { {2, 1, 2, 3}, true }, { {3, 4, 2}, true } }, - { { {1, 64, 80}, false }, { {1, 77, 80}, true } }, - { { {3}, false }, { {2, 2, 3, 1}, false } }, - { { {2, 2, 1, 3}, false }, { {3}, false } }, - { { {65, 100}, false }, { {73, 100}, true } }, - { { {100, 65}, true }, { {100, 73}, false } }, - { { {100, 65}, true }, { {73, 100}, true } }, - { { {1, 5}, false }, { {5, 1}, false } }, - { { {5, 1}, true }, { {5, 1}, false } }, - { { {1, 5}, false }, { {1, 5}, true } }, - { { {1, 5}, false }, { {5}, false } }, - { { {5}, false }, { {5, 1}, false } }, - { { {5}, false }, { {5}, false } }, - { { {5}, true }, { {5}, true } } +std::vector> both_transpose_shapeRelatedParams = { + { {2, 2, 4, 16}, {1, 1, 1, 4} }, + { {2, 1, 2, 3}, {3, 4, 2} }, + { {100, 65}, {73, 100} }, + { {5}, {5} }, }; -std::vector secondaryInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER, +std::vector secondaryInputTypes = { + InputLayerType::CONSTANT, + InputLayerType::PARAMETER, }; std::map additional_config = {}; -INSTANTIATE_TEST_SUITE_P(smoke_MatMul, MatMulTest, +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_NoTranspose, MatMulLayerTest, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(no_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(false, false)), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(secondaryInputTypes), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(additional_config)), + MatMulLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_FirstTranspose, MatMulLayerTest, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(first_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(true, false)), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(secondaryInputTypes), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(additional_config)), + MatMulLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_SecondTranspose, MatMulLayerTest, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(second_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(false, true)), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(secondaryInputTypes), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(additional_config)), + MatMulLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_MatMul_BothTranspose, MatMulLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapeRelatedParams), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(both_transpose_shapeRelatedParams)), + ::testing::Values(std::make_pair(true, true)), ::testing::ValuesIn(inputPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(secondaryInputTypes), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), - MatMulTest::getTestCaseName); + MatMulLayerTest::getTestCaseName); } // namespace From 26cab79988f21293e948efb0297c31acbaa47e3d Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Tue, 24 Oct 2023 18:49:41 +0400 Subject: [PATCH 029/275] [GPU] Improve OOOQ synchronization between ShapeOf subgraph's CPU impls and GPU kernels (#20595) * [GPU] Add ITT counters for primitive inst * [GPU] Improve OOOQ synchronization between ShapeOf subgraph's CPU impls and GPU kernels --- .../intel_gpu/src/graph/primitive_inst.cpp | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 6c1e88de349115..92f9f60743b9f5 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -242,6 +242,7 @@ event::ptr primitive_inst::set_output_memory(memory::ptr mem_new, bool check, si } void primitive_inst::update_shape() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_shape: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::shape_inference); if (update_shape_done_by_other) { update_shape_done_by_other = false; // reset @@ -341,6 +342,7 @@ void primitive_inst::update_shape() { } if (has_runtime_deps) { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_shape_sync: " + id())); if (!dependencies_events.empty() && queue_type == QueueTypes::out_of_order) { _network.get_stream().wait_for_events(dependencies_events); } else if (queue_type == QueueTypes::in_order) { @@ -380,6 +382,7 @@ void primitive_inst::update_shape() { } event::ptr primitive_inst::realloc_if_needed() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("realloc_if_needed: " + id())); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::memory_allocation); @@ -493,6 +496,7 @@ bool primitive_inst::use_async_compilation() { } bool primitive_inst::update_impl() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_impl: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_implementation); auto prev_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr"; @@ -656,6 +660,7 @@ bool primitive_inst::update_impl() { } void primitive_inst::do_runtime_skip_reorder() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_skip_reorder: " + id())); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->disable_runtime_skip_reorder) { return; @@ -713,6 +718,7 @@ void primitive_inst::do_runtime_skip_reorder() { } void primitive_inst::do_runtime_in_place_concat() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("do_runtime_in_place_concat: " + id())); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing) { return; @@ -780,6 +786,7 @@ bool primitive_inst::has_inner_networks() const { } event::ptr primitive_inst::execute(const std::vector& events) { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("primitive_inst::execute: " + id())); const auto primitive_id = id(); OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input"); GPU_DEBUG_GET_INSTANCE(debug_config); @@ -802,6 +809,7 @@ event::ptr primitive_inst::execute(const std::vector& events) { } if (!is_valid_fusion()) { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("unfused_subgraph_exec: " + id())); auto subgraph = get_unfused_subgraph(); for (auto& d : _deps) { @@ -859,16 +867,16 @@ event::ptr primitive_inst::execute(const std::vector& events) { GPU_DEBUG_TRACE << id() << ": execute " << _impl->get_kernel_name() << " (is_dynamic=" << _impl->is_dynamic() << ", " << "can_be_optimized=" << can_be_optimized() << ")" << std::endl; + const bool out_of_order_queue = get_network().get_stream().get_queue_type() == QueueTypes::out_of_order; if (_exec_deps.empty() && dependencies.empty()) { dependencies = events; } else { - auto queue_type = get_network().get_stream().get_queue_type(); // Prepare dependencies events in case of OOO queue, CPU implementation, // or optimized_out impl which has CPU users (needs_completion_event() && !is_output() condition) - if (queue_type == QueueTypes::out_of_order || _impl->is_cpu() || (can_be_optimized() && needs_completion_event() && !is_output())) { + if (out_of_order_queue || _impl->is_cpu() || (can_be_optimized() && needs_completion_event() && !is_output())) { dependencies.reserve(dependencies.size() + _exec_deps.size()); for (auto& input : _exec_deps) { - if (input->is_input() && queue_type != QueueTypes::out_of_order) + if (input->is_input() && !out_of_order_queue) continue; auto id = input->id(); try { @@ -883,6 +891,13 @@ event::ptr primitive_inst::execute(const std::vector& events) { } } + // Replace multiple events with single grouped event in case of barriers synchronization to prevent `_last_barrier_ev` usage as a dependency + // event of optimized_out instance's users, which may lead to unwanted extra synchronization of CPU impls with GPU kernels + if (_node && _node->is_in_shape_of_subgraph() && can_be_optimized() && dependencies.size() > 1 && out_of_order_queue) { + auto grouped_ev = get_network().get_stream().group_events(dependencies); + dependencies = {grouped_ev}; + } + { GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::inference); auto ev = _impl->execute(dependencies, *this); @@ -905,6 +920,7 @@ event::ptr primitive_inst::execute(const std::vector& events) { } void primitive_inst::set_arguments() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("set_arguments: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::set_arguments); OPENVINO_ASSERT(_has_valid_input, id(), " has invalid/unset input"); _impl->set_arguments(*this); @@ -1138,6 +1154,7 @@ void primitive_inst::allocate_internal_buffers(bool reset) { } event::ptr primitive_inst::update_weights() { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_weights: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_weights); if (!_impl) return nullptr; From 337e225dbd4fbc02afb72d110f23a9508e34e6e5 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 24 Oct 2023 18:59:17 +0400 Subject: [PATCH 030/275] Removed linux.yml AZure pipeline (#20341) --- .ci/azure/linux.yml | 590 -------------------------------------------- 1 file changed, 590 deletions(-) delete mode 100644 .ci/azure/linux.yml diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml deleted file mode 100644 index 8626f9d609ed0e..00000000000000 --- a/.ci/azure/linux.yml +++ /dev/null @@ -1,590 +0,0 @@ -trigger: - branches: - include: - - 'master' - - 'releases/*' - paths: - exclude: - - '*/docs/*' - - 'docs/*' - - '*/*.md' - - '*.md' - - '*/layer_tests_summary/*' - - '*/conformance/*' - -pr: - branches: - include: - - 'master' - - 'releases/*' - paths: - exclude: - - '*/docs/*' - - 'docs/*' - - '*/*.md' - - '*.md' - - '*/layer_tests_summary/*' - - '*/conformance/*' - -resources: - repositories: - - repository: openvino_contrib - type: github - endpoint: openvinotoolkit - name: openvinotoolkit/openvino_contrib - ref: master - -variables: - - group: github - -jobs: -- job: Lin - strategy: - matrix: - # Dynamic: - # CMAKE_BUILD_SHARED_LIBS: 'ON' - # PYTHON_STATIC_ARGS: - # CMAKE_CPACK_GENERATOR: - # SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/samples - # PYTHON_SAMPLES_INSTALL_DIR: $(SAMPLES_INSTALL_DIR)/python - # RUN_PREFIX: . $(SETUPVARS) -pyver 3.8 && - # Debian: - # CMAKE_BUILD_SHARED_LIBS: 'ON' - # PYTHON_STATIC_ARGS: - # CMAKE_CPACK_GENERATOR: 'DEB' - # SAMPLES_INSTALL_DIR: /usr/share/openvino/samples - # PYTHON_SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/share/openvino/samples/python - # RUN_PREFIX: LD_LIBRARY_PATH=$(INSTALL_TEST_DIR):$(INSTALL_DIR)/opencv/lib:$LD_LIBRARY_PATH - Static: - CMAKE_BUILD_SHARED_LIBS: 'OFF' - PYTHON_STATIC_ARGS: -m "not dynamic_library" - CMAKE_CPACK_GENERATOR: "TGZ" - SAMPLES_INSTALL_DIR: $(INSTALL_DIR)/samples - PYTHON_SAMPLES_INSTALL_DIR: $(SAMPLES_INSTALL_DIR)/python - RUN_PREFIX: . $(SETUPVARS) && - maxParallel: '2' - - # About 150% of total time - timeoutInMinutes: '180' - - pool: - name: LIN_VMSS_VENV_F16S_U20_WU2 - - variables: - system.debug: true - VSTS_HTTP_RETRY: 5 - VSTS_HTTP_TIMEOUT: 200 - BUILD_TYPE: Release - REPO_DIR: $(Build.Repository.LocalPath) - OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib - WORK_DIR: $(Pipeline.Workspace)/_w - BUILD_DIR: $(WORK_DIR)/build - BUILD_SAMPLES_DIR: $(WORK_DIR)/build_samples - BUILD_LAYER_TESTS_DIR: $(WORK_DIR)/build_layer_tests - BUILD_SAMPLES_TESTS_DIR: $(WORK_DIR)/build_samples_tests - INSTALL_DIR: $(WORK_DIR)/install_pkg - INSTALL_TEST_DIR: $(INSTALL_DIR)/tests - LAYER_TESTS_DIR: $(INSTALL_TEST_DIR)/layer_tests - SETUPVARS: $(INSTALL_DIR)/setupvars.sh - TMP_DIR: /mnt/tmp - SHARE_DIR: /mount/cinfsshare/onnxtestdata - CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux - CMAKE_VERSION: 3.24.0 - BUILD_PYTHON: $(WORK_DIR)/build_python - INSTALL_PYTHON: $(INSTALL_OPENVINO)/extras/python - LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib - OV_PYTHON_VERSION: 3.11.2 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. - addToPath: true - disableDownloadFromRegistry: false - architecture: 'x64' - githubToken: $(auth_token) - displayName: Setup Python 3.11 - name: setupPython - - bash: | - #!/bin/bash - python -V - - - script: | - curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01" - whoami - uname -a - echo Python3 info ; which python3 ; python3 --version - echo Java info ; which java ; java -version - echo gcc info ; which gcc ; gcc --version - echo cmake info ; which cmake ; cmake --version - lsb_release - env - cat /proc/cpuinfo - cat /proc/meminfo - cat /etc/fstab - vmstat -s - df - lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd" - free -h - echo TargetBranch: $(System.PullRequest.TargetBranch) - echo SourceBranch: $(Build.SourceBranch) - displayName: 'System info' - - - script: | - set -e - rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR) - rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR) - rm -rf $(BUILD_SAMPLES_DIR) ; mkdir $(BUILD_SAMPLES_DIR) - sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR) - sudo mkdir -p $(SHARE_DIR) - sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common - sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(SHARE_DIR) -o vers=4,minorversion=1,sec=sys - mkdir -p $(CCACHE_DIR) - displayName: 'Make dir' - - - checkout: self - clean: 'true' - submodules: 'true' - path: openvino - - - checkout: openvino_contrib - clean: 'true' - submodules: 'true' - path: openvino_contrib - - - script: | - set -e - sudo -E $(REPO_DIR)/install_build_dependencies.sh - # Move jdk into contrib - # 'clang' compiler is used as a default compiler - sudo apt --assume-yes install openjdk-11-jdk libbz2-dev clang - # For Python API - python3 -m pip install --upgrade pip - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/requirements.txt - # For running Python API tests - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/src/compatibility/openvino/requirements-dev.txt - # For running Paddle frontend unit tests - # TODO Reenable PDPD after paddlepaddle==2.5.0 with compliant protobuf is released (ticket 95904) - #python3 -m pip install -r $(REPO_DIR)/src/frontends/paddle/tests/requirements.txt - # For running ONNX frontend unit tests - python3 -m pip install -r $(REPO_DIR)/src/frontends/onnx/tests/requirements.txt - # For running TensorFlow frontend unit tests - python3 -m pip install -r $(REPO_DIR)/src/frontends/tensorflow/tests/requirements.txt - # For running torchvision -> OpenVINO preprocess converter - python3 -m pip install -r $(REPO_DIR)/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt - # For MO unit tests - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_mxnet.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_caffe.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_kaldi.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_onnx.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_tf2.txt - python3 -m pip install -r $(REPO_DIR)/tools/mo/requirements_dev.txt - # Speed up build - sudo apt -y --no-install-recommends install unzip - wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip - unzip ninja-linux.zip - sudo cp -v ninja /usr/local/bin/ - displayName: 'Install dependencies' - - - script: | - curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01" - whoami - uname -a - echo Python3 info ; which python3 ; python3 --version - echo Python info ; which python ; python --version - echo Java info ; which java ; java -version - echo gcc info ; which gcc ; gcc --version - echo cmake info ; which cmake ; cmake --version - lsb_release - env - cat /proc/cpuinfo - cat /proc/meminfo - cat /etc/fstab - vmstat -s - df - lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd" - free -h - echo TargetBranch: $(System.PullRequest.TargetBranch) - echo SourceBranch: $(Build.SourceBranch) - displayName: 'System info' - - - task: CMake@1 - inputs: - # CMake must get Python 3.x version by default - cmakeArgs: > - -GNinja - -DCMAKE_VERBOSE_MAKEFILE=ON - -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) - -DCMAKE_COMPILE_WARNING_AS_ERROR=ON - -DENABLE_PYTHON=ON - -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) - -DENABLE_ONEDNN_FOR_GPU=$(CMAKE_BUILD_SHARED_LIBS) - -DENABLE_TESTS=ON - -DENABLE_OV_ONNX_FRONTEND=ON - -DENABLE_FASTER_BUILD=ON - -DENABLE_STRICT_DEPENDENCIES=OFF - -DOPENVINO_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules - -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - -DCMAKE_C_COMPILER_LAUNCHER=ccache - -DCMAKE_CXX_LINKER_LAUNCHER=ccache - -DCMAKE_C_LINKER_LAUNCHER=ccache - -DCMAKE_CXX_COMPILER=clang++ - -DCMAKE_C_COMPILER=clang - -DENABLE_SYSTEM_SNAPPY=ON - -DENABLE_SYSTEM_TBB=ON - -DCPACK_GENERATOR=$(CMAKE_CPACK_GENERATOR) - -DBUILD_nvidia_plugin=OFF - -S $(REPO_DIR) - -B $(BUILD_DIR) - displayName: 'Cmake OpenVINO' - - - script: ls -alR $(REPO_DIR)/temp/ - displayName: 'List temp SDKs' - - - script: ccache --zero-stats --max-size=50G --show-config - displayName: 'Clean ccache stats' - - - script: cmake --build $(BUILD_DIR) --parallel --config $(BUILD_TYPE) - env: - CCACHE_DIR: $(CCACHE_DIR) - CCACHE_TEMPDIR: $(TMP_DIR)/ccache - CCACHE_BASEDIR: $(Pipeline.Workspace) - CCACHE_MAXSIZE: 50G - displayName: 'Build Lin' - - - script: ccache --show-stats - displayName: 'Show ccache stats' - - - script: ls -alR $(REPO_DIR)/bin/ - displayName: 'List bin files' - - - task: CMake@1 - inputs: - cmakeArgs: > - -GNinja - -S $(REPO_DIR)/tests/layer_tests - -B $(BUILD_LAYER_TESTS_DIR) - displayName: 'Cmake Layer Tests' - - - script: cmake --build $(BUILD_LAYER_TESTS_DIR) --parallel --config $(BUILD_TYPE) - displayName: 'Build Layer Tests' - - - script: sudo apt-get remove libtbb2 -y - displayName: 'Remove debian dependencies' - condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - - - script: cmake -DCOMPONENT=python_wheels -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_DIR)/cmake_install.cmake - displayName: 'Install wheel packages' - - - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_LAYER_TESTS_DIR)/cmake_install.cmake - displayName: 'Install Layer Tests' - - - script: python3 -m pip install openvino-dev --find-links=$(INSTALL_DIR)/tools - displayName: 'Install python wheels' - - - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P $(BUILD_DIR)/cmake_install.cmake - displayName: 'Install tests' - - - script: ls -alR $(INSTALL_DIR) - displayName: 'List install test files' - - - script: | - set -e - sudo apt-get install libtbb-dev libpugixml-dev -y - cmake --build $(BUILD_DIR) --target package --parallel - condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'Build Debian packages' - - - script: | - set -e - # install debian packages from previous release - sudo apt-get install --no-install-recommends gnupg wget -y - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - echo "deb https://apt.repos.intel.com/openvino/2022 focal main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2022.list - sudo apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/intel-openvino-2022.list - sudo apt-get install openvino -y - # install our local one and make sure the conflicts are resolved - sudo apt-get install --no-install-recommends dpkg-dev -y - rm -r _CPack_Packages - dpkg-scanpackages . /dev/null | gzip -9c > Packages.gz - echo "deb [trusted=yes] file:$(BUILD_DIR) ./" | sudo tee /etc/apt/sources.list.d/openvino-local.list - sudo apt-get update -o Dir::Etc::sourcelist=/etc/apt/sources.list.d/openvino-local.list - sudo apt-get install openvino -y - workingDirectory: $(BUILD_DIR) - condition: eq(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'Install Debian packages' - - - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_DIR)/cmake_install.cmake - condition: ne(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'Install openvino' - - - script: ls -alR $(INSTALL_DIR) - condition: ne(variables['CMAKE_CPACK_GENERATOR'], 'DEB') - displayName: 'List install files' - - - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -i $(INSTALL_DIR) -b $(BUILD_DIR)/cpp_samples - displayName: 'Build cpp samples - gcc' - - - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -b $(BUILD_DIR)/cpp_samples_clang - env: - CC: clang - CXX: clang++ - displayName: 'Build cpp samples - clang' - - - script: $(SAMPLES_INSTALL_DIR)/c/build_samples.sh -i $(INSTALL_DIR) -b $(BUILD_DIR)/c_samples - env: - VERBOSE: 1 - displayName: 'Build c samples' - - - script: rm -fr $(BUILD_DIR) - displayName: 'Clean build dir' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVCoreUT.xml - displayName: 'OV Core UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_inference_functional_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceFunc.xml - displayName: 'Inference Func Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_inference_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceUnit.xml - displayName: 'Inference Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVProxyTests.xml - displayName: 'OV Proxy Plugin Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVHeteroUnitTests.xml - displayName: 'OV Hetero Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-OVHeteroFuncTests.xml - displayName: 'OV Hetero Func Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_conditional_compilation_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ConditionalCompilation.xml - displayName: 'Conditional Compilation Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_ir_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-IRFrontend.xml - displayName: 'IR Frontend Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ONNXFrontend.xml - displayName: 'ONNX Frontend Tests' - - # TODO Reenable PDPD after paddlepaddle==2.5.1 with compliant protobuf is released (ticket 95904) - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Paddle.xml - displayName: 'Paddle Frontend UT' - enabled: 'false' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Tensorflow.xml - displayName: 'TensorFlow Frontend Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_common_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-TensorflowCommon.xml - displayName: 'TensorFlow Common Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_tensorflow_lite_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-TensorflowLite.xml - displayName: 'TensorFlow Lite Frontend Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_lp_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-LpTransformations.xml - displayName: 'Low Precision Transformations Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-Transformations.xml - displayName: 'Transformations Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_legacy_transformations_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-LegacyTransformations.xml - displayName: 'Legacy Transformations Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_util_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-CommonUtilTests.xml - displayName: 'Common Utils Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceEngineUnitTests.xml - displayName: 'IE UT old' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_snippets_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_snippets_func_tests.xml - displayName: 'Snippets Func Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_unit_tests.xml - displayName: 'Intel CPU Unit Tests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_gna_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_gna_unit_tests.xml - displayName: 'GNA UT' - enabled: 'false' # TODO: fix - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml - displayName: 'AUTO UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_func_tests.xml - displayName: 'AUTO FuncTests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_unit_tests.xml - displayName: 'AutoBatch UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_template_func_tests --gtest_filter=*smoke* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-templateFuncTests.xml - displayName: 'TEMPLATE FuncTests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/InferenceEngineCAPITests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-InferenceEngineCAPITests.xml - displayName: 'IE CAPITests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_capi_test --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_capi_test.xml - displayName: 'OV CAPITests' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_func_tests.xml - displayName: 'AutoBatch FuncTests' - - # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - - script: | - $(RUN_PREFIX) python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \ - --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \ - --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \ - --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py - displayName: 'nGraph and IE Python Bindings Tests' - - - script: | - set -e - export LD_LIBRARY_PATH=$INSTALL_TEST_DIR:$LD_LIBRARY_PATH - $(RUN_PREFIX) python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \ - --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \ - --ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py - displayName: 'Python API 2.0 Tests' - - # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - - script: | - python3 -m pytest -sv $(REPO_DIR)/src/frontends/onnx/tests $(PYTHON_STATIC_ARGS) \ - --ignore=$(REPO_DIR)/src/frontends/onnx/tests/test_python/test_zoo_models.py \ - --ignore=$(REPO_DIR)/src/frontends/onnx/tests/test_python/test_backend.py -v - displayName: 'ONNX Frontend Python Tests' - - - script: python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml - displayName: 'Model Optimizer UT' - - - script: python3 -m pytest -s $(REPO_DIR)/tools/ovc/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-OpenVinoConversion.xml - displayName: 'OpenVino Conversion UT' - - - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_func_tests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_func_tests.xml - displayName: 'CPU FuncTests' - condition: and(succeeded(), eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'OFF')) - - - task: CMake@1 - inputs: - cmakeArgs: > - -GNinja - -S $(REPO_DIR)/tests/samples_tests - -B $(BUILD_SAMPLES_TESTS_DIR) - displayName: 'CMake Samples Tests' - - - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P $(BUILD_SAMPLES_TESTS_DIR)/cmake_install.cmake - displayName: 'Install Samples Tests' - - - script: python3 -m pip install -r $(INSTALL_TEST_DIR)/smoke_tests/requirements.txt - displayName: 'Install dependencies for samples smoke tests' - - - script: | - set -e - export PATH=$HOME/.local/bin:$PATH - export LD_LIBRARY_PATH=$IE_APP_PATH:$LD_LIBRARY_PATH - $(RUN_PREFIX) python3 -m pytest $(INSTALL_TEST_DIR)/smoke_tests/ \ - --env_conf $(INSTALL_TEST_DIR)/smoke_tests/env_config.yml \ - -s --junitxml=$(INSTALL_TEST_DIR)/TEST-SamplesSmokeTests.xml - env: - IE_APP_PATH: $(INSTALL_DIR)/samples_bin - IE_APP_PYTHON_PATH: $(PYTHON_SAMPLES_INSTALL_DIR)/ - SHARE: $(INSTALL_TEST_DIR)/smoke_tests/samples_smoke_tests_data/ - WORKSPACE: $(INSTALL_DIR) - displayName: 'Samples Smoke Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/pytorch_tests/ -m precommit --junitxml=$(INSTALL_TEST_DIR)/TEST-pytorch.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'PyTorch Layer Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=$(INSTALL_TEST_DIR)/TEST-tf_fe.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow 1 Layer Tests - TF FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=$(INSTALL_TEST_DIR)/TEST-tf2_fe.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow 2 Layer Tests - TF FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/jax_tests/ -m precommit --junitxml=$(INSTALL_TEST_DIR)/TEST-jax.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'JAX Layer Tests - TF FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=$(INSTALL_TEST_DIR)/TEST-tf_Roll.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - displayName: 'TensorFlow 1 Layer Tests - Legacy FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow2_keras_tests/test_tf2_keras_activation.py --ir_version=11 --junitxml=./TEST-tf2_Activation.xmlTEST -k "sigmoid" - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow 2 Layer Tests - Legacy FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/tensorflow_lite_tests/ --junitxml=$(INSTALL_TEST_DIR)/TEST-tfl_fe.xmlTEST - env: - PYTHONPATH: $(REPO_DIR)/tools/mo/:$(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'TensorFlow Lite Layer Tests - TFL FE' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/ovc_python_api_tests/ --junitxml=./TEST-test_ovc_convert.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'OVC Python API Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/mo_python_api_tests/ --junitxml=./TEST-test_mo_convert.xmlTEST - env: - PYTHONPATH: $(LAYER_TESTS_DIR) - TEST_DEVICE: CPU - displayName: 'MO Python API Tests' - - - script: | - set -e - python3 -m pip install -r $(LAYER_TESTS_DIR)/requirements.txt - $(RUN_PREFIX) python3 -m pytest $(LAYER_TESTS_DIR)/py_frontend_tests --junitxml=./TEST-test_py_fontend.xml - displayName: 'Python Frontend tests' - - - task: PublishTestResults@2 - condition: always() - inputs: - testResultsFormat: 'JUnit' # Options: JUnit, NUnit, VSTest, xUnit, cTest - testResultsFiles: '**/TEST-*.xml' - #searchFolder: '$(BUILD_DIR)' - mergeTestResults: false # Optional - #failTaskOnFailedTests: false # Optional - #testRunTitle: 'Pre/Post-Commit' # Optional - buildPlatform: 'x64' # Optional - buildConfiguration: 'Linux' # Optional - #publishRunAttachments: true # Optional From 124f2bc5c99e27152ca20916857df2d870cba359 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 24 Oct 2023 19:19:16 +0400 Subject: [PATCH 031/275] Remove NF4 Convert from public API (#20666) * Remove NF4 Convert from public API * Fixed build --- src/core/{include => dev_api}/openvino/core/type/nf4.hpp | 0 src/core/include/openvino/core/type/element_type.hpp | 1 - src/core/include/openvino/op/constant.hpp | 7 ++++--- src/core/reference/include/openvino/reference/convert.hpp | 3 ++- src/core/src/op/constant.cpp | 7 +++++++ 5 files changed, 13 insertions(+), 5 deletions(-) rename src/core/{include => dev_api}/openvino/core/type/nf4.hpp (100%) diff --git a/src/core/include/openvino/core/type/nf4.hpp b/src/core/dev_api/openvino/core/type/nf4.hpp similarity index 100% rename from src/core/include/openvino/core/type/nf4.hpp rename to src/core/dev_api/openvino/core/type/nf4.hpp diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp index 1534e9e0cc8fca..78e200d5035f79 100644 --- a/src/core/include/openvino/core/type/element_type.hpp +++ b/src/core/include/openvino/core/type/element_type.hpp @@ -20,7 +20,6 @@ #include "openvino/core/rtti.hpp" #include "openvino/core/type/bfloat16.hpp" #include "openvino/core/type/float16.hpp" -#include "openvino/core/type/nf4.hpp" /** * @defgroup ov_element_cpp_api Element types diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 100ed2f7e18d6f..6299dde459061c 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -705,15 +705,15 @@ class OPENVINO_API Constant : public Op { auto p = get_data_ptr_nc(); size_t i = 0; for (; i < source.size() / 2; i++) { - const auto idx1 = ConvertNF4::quantize(static_cast(source[i * 2])); - const auto idx2 = ConvertNF4::quantize(static_cast(source[i * 2 + 1])); + const auto idx1 = quantize_nf4(static_cast(source[i * 2])); + const auto idx2 = quantize_nf4(static_cast(source[i * 2 + 1])); const auto v1 = value_in_range(idx1) & 0x0F; const auto v2 = value_in_range(idx2) & 0x0F; const auto v = (v2 << 4) | v1; p[i] = static_cast(v); } if (source.size() % 2) { - const auto idx1 = ConvertNF4::quantize(static_cast(source[i * 2])); + const auto idx1 = quantize_nf4(static_cast(source[i * 2])); const auto v = value_in_range(idx1) & 0x0F; p[i] = static_cast(v); } @@ -853,6 +853,7 @@ class OPENVINO_API Constant : public Op { } return shape_size(m_shape) * m_element_type.size(); } + static uint8_t quantize_nf4(float x); element::Type m_element_type; Shape m_shape{}; diff --git a/src/core/reference/include/openvino/reference/convert.hpp b/src/core/reference/include/openvino/reference/convert.hpp index bd36b50b03301d..3924ce690553b2 100644 --- a/src/core/reference/include/openvino/reference/convert.hpp +++ b/src/core/reference/include/openvino/reference/convert.hpp @@ -8,6 +8,7 @@ #include "openvino/core/type/element_type.hpp" #include "openvino/core/type/float16.hpp" +#include "openvino/core/type/nf4.hpp" namespace ov { namespace reference { @@ -87,7 +88,7 @@ void lp_convert(const TI* arg, TO* out, size_t count, element::Type_t src_type, } else if (dst_type == element::i4) { detail::set_i4(output, i, detail::get_value(input, i, src_type)); } else if (src_type == element::nf4) { - ConvertNF4::unpack(out, input, i); + ov::ConvertNF4::unpack(out, input, i); } else { out[i] = detail::get_value(input, i, src_type); } diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index 2fe3d024fd9551..34e97d73eeee30 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -13,6 +13,9 @@ #include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/tensor.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/core/type/float16.hpp" +#include "openvino/core/type/nf4.hpp" #include "openvino/runtime/shared_buffer.hpp" template @@ -606,3 +609,7 @@ bool ov::op::v0::Constant::evaluate_lower(TensorVector& outputs) const { bool ov::op::v0::Constant::evaluate_upper(TensorVector& outputs) const { return evaluate(outputs, {}); } + +uint8_t ov::op::v0::Constant::quantize_nf4(float x) { + return ov::ConvertNF4::quantize(x); +} From 22184c32f4c55c4dd2915594c1d45700579e0eb9 Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Tue, 24 Oct 2023 18:18:50 +0200 Subject: [PATCH 032/275] fix random layer names and count (#20323) * add sorting for fix sporadic failure in SharedOpOptimization shared_node_optimization * fix Output and Input comparison * remove unneed sorting from transformation * add unit test * code review fixes * code review fixes * code review fixes * code review fixes --------- Co-authored-by: Ivan Tikhonov --- .../shared_ops_optimization.cpp | 71 +++++++++++++++++++ src/core/src/node_input.cpp | 13 ++-- src/core/src/node_output.cpp | 12 ++-- 3 files changed, 88 insertions(+), 8 deletions(-) diff --git a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp index 698973740e08e6..b0e327e4d4bad4 100644 --- a/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp +++ b/src/common/transformations/tests/common_optimizations/shared_ops_optimization.cpp @@ -433,3 +433,74 @@ TEST_F(SharedTransformationTestsF, SharedShapeOfTestMixed) { model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); } } + +namespace { +OutputVector createShapeNodesInMemory(const std::vector& node_order_in_memory, + std::shared_ptr& memory, + const std::string& node_name_prefix, + const std::shared_ptr& input, + element::Type output_type) { + OutputVector outputs; + memory.reset(::malloc(node_order_in_memory.size() * sizeof(v3::ShapeOf)), ::free); + for (size_t i = 0; i < node_order_in_memory.size(); ++i) { + v3::ShapeOf* node_addr = static_cast(memory.get()) + node_order_in_memory[i]; + auto node_ptr = + std::shared_ptr(new (node_addr) v3::ShapeOf(input, output_type), [](v3::ShapeOf* node) { + node->v3::ShapeOf::~ShapeOf(); + }); + std::stringstream ss; + ss << node_name_prefix << i; + node_ptr->set_friendly_name(ss.str()); + outputs.push_back(node_ptr->output(0)); + } + + return outputs; +} + +std::shared_ptr createModelWithShapes(const Shape& input_shape, + const std::vector& node_order_in_memory, + const std::string& node_name_prefix, + std::shared_ptr& buffer) { + auto input = std::make_shared(element::f32, input_shape); + auto shape_nodes = createShapeNodesInMemory(node_order_in_memory, buffer, node_name_prefix, input, element::i64); + + NodeVector inputs_of_concat; + for (const auto& shape_node : shape_nodes) { + auto node = std::make_shared(shape_node, element::i64); + inputs_of_concat.push_back(node); + } + + auto concat = std::make_shared(inputs_of_concat, 0); + return std::make_shared(NodeVector{concat}, ParameterVector{input}); +} +} // namespace + +/** + * @brief Check that node address is not influenced on the transformation result + */ +TEST(TransformationTests, SharedShapeOfTestRandomOrder) { + Shape input_shape{120, 4}; + std::shared_ptr buffer; + // nodes are placed into pre-allocated memory in order that is specified in next variable + std::vector> node_orders_in_memory = {{0, 1}, {1, 0}}; + + std::vector> models; + for (const auto& node_order_in_memory : node_orders_in_memory) { + auto model = createModelWithShapes(input_shape, node_order_in_memory, "Shape_", buffer); + + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(model); + + const auto model_ops = model->get_ops(); + const auto op_it = std::find_if(model_ops.begin(), model_ops.end(), [](const std::shared_ptr& node) { + return node->get_friendly_name() == "Shape_0"; + }); + ASSERT_TRUE(op_it != model_ops.end()) << "node Shape_0 is not found in model"; + // we need to clone while memory will be reused on the next iteration for the new model + models.push_back(model->clone()); + } + + FunctionsComparator comparator = FunctionsComparator::with_default(); + comparator.compare(models[0], models[1]); +} diff --git a/src/core/src/node_input.cpp b/src/core/src/node_input.cpp index 7c6b8a9ff2102c..11a353cb765b49 100644 --- a/src/core/src/node_input.cpp +++ b/src/core/src/node_input.cpp @@ -60,12 +60,15 @@ bool Input::operator==(const Input& other) const { bool Input::operator!=(const Input& other) const { return !(*this == other); } + bool Input::operator<(const Input& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Input::operator>(const Input& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Input::operator<=(const Input& other) const { @@ -135,11 +138,13 @@ bool Input::operator!=(const Input& other) const { return !(*this == other); } bool Input::operator<(const Input& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Input::operator>(const Input& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Input::operator<=(const Input& other) const { diff --git a/src/core/src/node_output.cpp b/src/core/src/node_output.cpp index fbd7d3f172280c..4d5de39b75132a 100644 --- a/src/core/src/node_output.cpp +++ b/src/core/src/node_output.cpp @@ -137,10 +137,12 @@ bool Output::operator!=(const Output& other) const { return !(*this == other); } bool Output::operator<(const Output& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Output::operator>(const Output& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Output::operator<=(const Output& other) const { return !(*this > other); @@ -211,10 +213,12 @@ bool Output::operator!=(const Output& other) const { return !(*this == other); } bool Output::operator<(const Output& other) const { - return m_node < other.m_node || (m_node == other.m_node && m_index < other.m_index); + return m_node->get_instance_id() < other.m_node->get_instance_id() || + (m_node == other.m_node && m_index < other.m_index); } bool Output::operator>(const Output& other) const { - return m_node > other.m_node || (m_node == other.m_node && m_index > other.m_index); + return m_node->get_instance_id() > other.m_node->get_instance_id() || + (m_node == other.m_node && m_index > other.m_index); } bool Output::operator<=(const Output& other) const { return !(*this > other); From eb55360f101d87ef1fddfb28b14d7699fd4c6747 Mon Sep 17 00:00:00 2001 From: Siddhant Chauhan Date: Tue, 24 Oct 2023 23:15:50 +0530 Subject: [PATCH 033/275] [ONNX] Extend ONNX Frontend with BlackmanWindow, HammingWindow and HannWindow operators (#19428) * ONNX BlackManWindow enabled * added a test periodic * Add the license statement * ONNX HammingWindow, HannWindow enabled also added basic tests for each * minor tests added * made reviewed changes * made reviewed changes used output_datatype directly, returned y_values directly * fixed clang-format * add OPENVINO_SUPPRESS_DEPRECATED_START * include math.h * float fix * fix * fix namespace to set_1 * test fixes * fix cast to output_datatype * fix, replace cast with ov::convert * fix, use element::f32 * major fixes * fixes * Update onnx_import.in.cpp * Update onnx_import.in.cpp --------- Co-authored-by: Przemyslaw Wysocki --- .../onnx/frontend/src/op/blackmanwindow.cpp | 86 +++++++++ .../onnx/frontend/src/op/blackmanwindow.hpp | 23 +++ .../onnx/frontend/src/op/hammingwindow.cpp | 72 ++++++++ .../onnx/frontend/src/op/hammingwindow.hpp | 23 +++ .../onnx/frontend/src/op/hannwindow.cpp | 68 +++++++ .../onnx/frontend/src/op/hannwindow.hpp | 23 +++ .../onnx/frontend/src/ops_bridge.cpp | 6 + .../models/blackmanwindow_periodic.prototxt | 46 +++++ .../models/blackmanwindow_symmetric.prototxt | 46 +++++ .../models/hammingwindow_periodic.prototxt | 46 +++++ .../models/hammingwindow_symmetric.prototxt | 46 +++++ .../tests/models/hannwindow_periodic.prototxt | 46 +++++ .../models/hannwindow_symmetric.prototxt | 46 +++++ src/frontends/onnx/tests/onnx_import.in.cpp | 168 ++++++++++++++++++ .../onnx/tests/tests_python/test_backend.py | 6 - 15 files changed, 745 insertions(+), 6 deletions(-) create mode 100644 src/frontends/onnx/frontend/src/op/blackmanwindow.cpp create mode 100644 src/frontends/onnx/frontend/src/op/blackmanwindow.hpp create mode 100644 src/frontends/onnx/frontend/src/op/hammingwindow.cpp create mode 100644 src/frontends/onnx/frontend/src/op/hammingwindow.hpp create mode 100644 src/frontends/onnx/frontend/src/op/hannwindow.cpp create mode 100644 src/frontends/onnx/frontend/src/op/hannwindow.hpp create mode 100644 src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt create mode 100644 src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt create mode 100644 src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt create mode 100644 src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt create mode 100644 src/frontends/onnx/tests/models/hannwindow_periodic.prototxt create mode 100644 src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt diff --git a/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp b/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp new file mode 100644 index 00000000000000..8ebca88b32f4cf --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/blackmanwindow.cpp @@ -0,0 +1,86 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "op/blackmanwindow.hpp" + +#include + +#include "default_opset.hpp" +#include "utils/common.hpp" +#define _USE_MATH_DEFINES +#include + +OPENVINO_SUPPRESS_DEPRECATED_START +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector blackmanwindow(const Node& node) { + const auto size = node.get_ng_inputs().at(0); + const auto output_datatype = + common::get_ngraph_element_type(node.get_attribute_value("output_datatype", 1)); + const bool periodic = node.get_attribute_value("periodic", 1) == 1; + + const ov::PartialShape shape = size.get_partial_shape(); + const std::vector axis_lengths = shape.to_shape(); + + // Weights as described in ONNX BlackmanWindow docs + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#blackmanwindow + const auto float_size = std::make_shared(size, ov::element::f32); + const auto a_0 = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.42f}); + const auto a_1 = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{-0.50f}); + const auto a_2 = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.08f}); + + const auto start = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.0f}); + const auto one_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}); + const auto two_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{2.0f}); + const auto four_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{4.0f}); + const auto range = std::make_shared(start, size, one_const, ov::element::f32); + const auto pi = + default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector{static_cast(M_PI)}); + std::shared_ptr factor_1, factor_2; + if (periodic) { + factor_1 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + float_size)); + factor_2 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, four_const), + float_size)); + } else { + factor_1 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + std::make_shared(float_size, one_const))); + factor_2 = std::make_shared( + range, + std::make_shared(std::make_shared(pi, four_const), + std::make_shared(float_size, one_const))); + } + + const auto cos_1 = std::make_shared(factor_1); + const auto cos_2 = std::make_shared(factor_2); + const auto scaled_cos_1 = std::make_shared(cos_1, a_1); + const auto scaled_cos_2 = std::make_shared(cos_2, a_2); + const auto y_values = + std::make_shared(std::make_shared(a_0, scaled_cos_1), scaled_cos_2); + + if (output_datatype == element::f32) { + return {y_values}; + } else { + return {std::make_shared(y_values, output_datatype)}; + } +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp b/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp new file mode 100644 index 00000000000000..ccff09c84817af --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/blackmanwindow.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/deprecated.hpp" +OPENVINO_SUPPRESS_DEPRECATED_START + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { + +OutputVector blackmanwindow(const Node& node); + +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hammingwindow.cpp b/src/frontends/onnx/frontend/src/op/hammingwindow.cpp new file mode 100644 index 00000000000000..25d557f7de6bdc --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hammingwindow.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "op/hammingwindow.hpp" + +#include + +#include "default_opset.hpp" +#include "utils/common.hpp" +#define _USE_MATH_DEFINES +#include + +OPENVINO_SUPPRESS_DEPRECATED_START +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector hammingwindow(const Node& node) { + const auto size = node.get_ng_inputs().at(0); + const auto output_datatype = + common::get_ngraph_element_type(node.get_attribute_value("output_datatype", 1)); + const bool periodic = node.get_attribute_value("periodic", 1) == 1; + + const ov::PartialShape shape = size.get_partial_shape(); + const std::vector axis_lengths = shape.to_shape(); + + // Weights as described in ONNX HammingWindow docs + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#hammingwindow + const auto float_size = std::make_shared(size, ov::element::f32); + const auto a_0 = std::make_shared( + std::make_shared(ov::element::f32, ov::Shape(), std::vector{25.0f}), + std::make_shared(ov::element::f32, ov::Shape(), std::vector{46.0f})); + const auto a_1 = std::make_shared( + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}), + a_0); + + const auto start = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.0f}); + const auto one_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}); + const auto two_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{2.0f}); + const auto range = std::make_shared(start, size, one_const, ov::element::f32); + const auto pi = + default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector{static_cast(M_PI)}); + std::shared_ptr factor; + if (periodic) { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + float_size)); + } else { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + std::make_shared(float_size, one_const))); + } + + const auto cos = std::make_shared(factor); + const auto scaled_cos = std::make_shared(cos, a_1); + const auto y_values = std::make_shared(a_0, scaled_cos); + if (output_datatype == element::f32) { + return {y_values}; + } else { + return {std::make_shared(y_values, output_datatype)}; + } +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hammingwindow.hpp b/src/frontends/onnx/frontend/src/op/hammingwindow.hpp new file mode 100644 index 00000000000000..d088b4105abc3a --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hammingwindow.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/deprecated.hpp" +OPENVINO_SUPPRESS_DEPRECATED_START + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { + +OutputVector hammingwindow(const Node& node); + +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hannwindow.cpp b/src/frontends/onnx/frontend/src/op/hannwindow.cpp new file mode 100644 index 00000000000000..b0e28afd2e5570 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hannwindow.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "op/hannwindow.hpp" + +#include + +#include "default_opset.hpp" +#include "utils/common.hpp" +#define _USE_MATH_DEFINES +#include + +OPENVINO_SUPPRESS_DEPRECATED_START +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector hannwindow(const Node& node) { + const auto size = node.get_ng_inputs().at(0); + const auto output_datatype = + common::get_ngraph_element_type(node.get_attribute_value("output_datatype", 1)); + const bool periodic = node.get_attribute_value("periodic", 1) == 1; + + const ov::PartialShape shape = size.get_partial_shape(); + const std::vector axis_lengths = shape.to_shape(); + + // Weights as described in ONNX HannWindow docs + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#hannwindow + const auto float_size = std::make_shared(size, ov::element::f32); + const auto a_0 = std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.5f}); + const auto a_1 = a_0; + + const auto start = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{0.0f}); + const auto one_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{1.0f}); + const auto two_const = + std::make_shared(ov::element::f32, ov::Shape(), std::vector{2.0f}); + const auto range = std::make_shared(start, size, one_const, ov::element::f32); + const auto pi = + default_opset::Constant::create(ov::element::f32, ov::Shape(), std::vector{static_cast(M_PI)}); + std::shared_ptr factor; + if (periodic) { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + float_size)); + } else { + factor = std::make_shared( + range, + std::make_shared(std::make_shared(pi, two_const), + std::make_shared(float_size, one_const))); + } + + const auto cos = std::make_shared(factor); + const auto scaled_cos = std::make_shared(cos, a_1); + const auto y_values = std::make_shared(a_0, scaled_cos); + if (output_datatype == element::f32) { + return {y_values}; + } else { + return {std::make_shared(y_values, output_datatype)}; + } +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/op/hannwindow.hpp b/src/frontends/onnx/frontend/src/op/hannwindow.hpp new file mode 100644 index 00000000000000..0c9e6993048ef3 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/hannwindow.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/deprecated.hpp" +OPENVINO_SUPPRESS_DEPRECATED_START + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { + +OutputVector hannwindow(const Node& node); + +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp index e6707335afd0b8..c4d9a50c4ca637 100644 --- a/src/frontends/onnx/frontend/src/ops_bridge.cpp +++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp @@ -29,6 +29,7 @@ #include "op/average_pool.hpp" #include "op/batch_norm.hpp" #include "op/bitshift.hpp" +#include "op/blackmanwindow.hpp" #include "op/cast.hpp" #include "op/cast_like.hpp" #include "op/ceil.hpp" @@ -75,6 +76,8 @@ #include "op/greater.hpp" #include "op/grid_sample.hpp" #include "op/gru.hpp" +#include "op/hammingwindow.hpp" +#include "op/hannwindow.hpp" #include "op/hard_sigmoid.hpp" #include "op/hard_swish.hpp" #include "op/hardmax.hpp" @@ -345,6 +348,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("BatchNormalization", 1, batch_norm); REGISTER_OPERATOR("BatchNormalization", 7, batch_norm); REGISTER_OPERATOR("BitShift", 1, bitshift); + REGISTER_OPERATOR("BlackmanWindow", 1, blackmanwindow); REGISTER_OPERATOR("Cast", 1, cast); REGISTER_OPERATOR("CastLike", 1, cast_like); REGISTER_OPERATOR("Ceil", 1, ceil); @@ -392,6 +396,8 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("Greater", 1, greater); REGISTER_OPERATOR("GridSample", 1, grid_sample); REGISTER_OPERATOR("GRU", 1, gru); + REGISTER_OPERATOR("HannWindow", 1, hannwindow); + REGISTER_OPERATOR("HammingWindow", 1, hammingwindow); REGISTER_OPERATOR("Hardmax", 1, hardmax); REGISTER_OPERATOR("Hardmax", 13, hardmax); REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid); diff --git a/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt b/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt new file mode 100644 index 00000000000000..f8759ce921028a --- /dev/null +++ b/src/frontends/onnx/tests/models/blackmanwindow_periodic.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "BlackmanWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 1 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_blackmanwindow_periodic" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt new file mode 100644 index 00000000000000..1d60e783ead99a --- /dev/null +++ b/src/frontends/onnx/tests/models/blackmanwindow_symmetric.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "BlackmanWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 0 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_blackmanwindow_symmetric" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt b/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt new file mode 100644 index 00000000000000..2bf75ed29fe7f6 --- /dev/null +++ b/src/frontends/onnx/tests/models/hammingwindow_periodic.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HammingWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 1 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_hammingwindow_periodic" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt new file mode 100644 index 00000000000000..1c9a9019829383 --- /dev/null +++ b/src/frontends/onnx/tests/models/hammingwindow_symmetric.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HammingWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 0 # Set to 0 for symmetric, 1 for periodic + type: INT + } + } + name: "test_hammingwindow_symmetric" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt b/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt new file mode 100644 index 00000000000000..2895bf5ad9b4d9 --- /dev/null +++ b/src/frontends/onnx/tests/models/hannwindow_periodic.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HannWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 1 # Set to 1 for periodic, 0 for non-periodic + type: INT + } + } + name: "test_hannwindow_periodic" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt b/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt new file mode 100644 index 00000000000000..ec2bc2b8e42bef --- /dev/null +++ b/src/frontends/onnx/tests/models/hannwindow_symmetric.prototxt @@ -0,0 +1,46 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "size" + output: "y" + op_type: "HannWindow" + attribute { + name: "output_datatype" + i: 1 # Use 1 for f32 + type: INT + } + attribute { + name: "periodic" + i: 0 # Set to 0 for symmetric, 1 for periodic + type: INT + } + } + name: "test_hannwindow_symmetric" + input { + name: "size" + type { + tensor_type { + elem_type: 7 # INT64 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 # FLOAT + shape { + dim { + dim_value: 10 # Modify this based on your expected output shape + } + } + } + } + } +} +opset_import { + version: 17 +} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index a442160ed2379c..361805e45cf0d4 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -6716,3 +6716,171 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unique_3d_with_duplicates_and_axis_2) test_case.run(); } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_blackmanwindow_periodic) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/blackmanwindow_periodic.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {-0.000000014901161f, + 0.040212844f, + 0.20077012f, + 0.50978714f, + 0.8492299f, + 0.99999994f, + 0.84922975f, + 0.5097869f, + 0.20077008f, + 0.040212862f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_blackmanwindow_symmetric) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/blackmanwindow_symmetric.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {-0.00000001f, + 0.05086961f, + 0.25800052f, + 0.63000000f, + 0.95112991f, + 0.95112979f, + 0.62999994f, + 0.25800028f, + 0.05086958f, + -0.00000001f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hammingwindow_periodic) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hammingwindow_periodic.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.08695650f, + 0.17414439f, + 0.40240526f, + 0.68455124f, + 0.91281211f, + 1.00000000f, + 0.91281211f, + 0.68455112f, + 0.40240520f, + 0.17414442f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hammingwindow_symmetric) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hammingwindow_symmetric.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.08695650f, + 0.19376230f, + 0.46420413f, + 0.77173913f, + 0.97246838f, + 0.97246838f, + 0.77173907f, + 0.46420389f, + 0.19376221f, + 0.08695650f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hannwindow_periodic) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hannwindow_periodic.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.00000000f, + 0.09549150f, + 0.34549153f, + 0.65450853f, + 0.90450847f, + 1.00000000f, + 0.90450847f, + 0.65450835f, + 0.34549144f, + 0.09549153f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hannwindow_symmetric) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/hannwindow_symmetric.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + + test_case.add_input({10}); + test_case.add_expected_output(Shape{10}, + {0.00000000f, + 0.11697778f, + 0.41317594f, + 0.75000000f, + 0.96984637f, + 0.96984625f, + 0.74999994f, + 0.41317570f, + 0.11697769f, + 0.00000000f}); + + // GPU has an accuracy drop, need to use different tolerance + if (std::string("${BACKEND_NAME}") != std::string("IE_GPU")) { + test_case.run_with_tolerance_as_fp(); + } else { + test_case.run_with_tolerance_as_fp(0.01f); + } +} diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index 14034898b7c693..779444658d1e28 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -378,12 +378,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None ), ( xfail_issue_90649, - "OnnxBackendNodeModelTest.test_blackmanwindow_cpu", - "OnnxBackendNodeModelTest.test_blackmanwindow_symmetric_cpu", - "OnnxBackendNodeModelTest.test_hammingwindow_cpu", - "OnnxBackendNodeModelTest.test_hammingwindow_symmetric_cpu", - "OnnxBackendNodeModelTest.test_hannwindow_cpu", - "OnnxBackendNodeModelTest.test_hannwindow_symmetric_cpu", "OnnxBackendNodeModelTest.test_melweightmatrix_cpu", "OnnxBackendNodeModelTest.test_sequence_map_add_1_sequence_1_tensor_cpu", "OnnxBackendNodeModelTest.test_sequence_map_add_2_sequences_cpu", From 984e4dbf35b1ffd64e06e0d6c08ac16f25e3f071 Mon Sep 17 00:00:00 2001 From: Oleksii Khovan Date: Tue, 24 Oct 2023 21:36:02 +0200 Subject: [PATCH 034/275] [GPU] NMSRotated-13 (#20411) * Add Rotation support to primitive and kernel * Add unit tests * Add transformation for NMSRotated * add single-layer tests * Fix: angle value for the same box may have its sign changed several times passing through iterations of batch and class loops. * fix review comments --- .../include/ov_ops/nms_ie_internal.hpp | 11 +- ...convert_nms_rotated_to_nms_ie_internal.hpp | 26 ++ .../src/ov_ops/nms_ie_internal.cpp | 21 +- ...convert_nms_rotated_to_nms_ie_internal.cpp | 109 ++++++ src/core/reference/src/op/nms_rotated.cpp | 6 +- .../primitives/non_max_suppression.hpp | 13 +- .../graph/impls/ocl/non_max_suppression.cpp | 11 + .../intel_gpu/src/graph/layout_optimizer.cpp | 22 +- .../cl_kernels/non_max_suppression_gpu_ref.cl | 354 +++++++++++++++++- .../src/kernel_selector/common_types.h | 9 + .../non_max_suppression_kernel_ref.cpp | 14 +- .../non_max_suppression_kernel_ref.h | 1 + .../src/plugin/ops/non_max_suppression.cpp | 15 +- .../src/plugin/transformations_pipeline.cpp | 2 + .../single_layer_tests/nms_rotated.cpp | 40 ++ .../test_cases/non_max_suppression_test.cpp | 247 ++++++++++++ .../single_layer_tests/nms_rotated.hpp | 15 + .../single_layer/nms_rotated.hpp | 46 +++ .../src/single_layer/nms_rotated.cpp | 230 ++++++++++++ 19 files changed, 1164 insertions(+), 28 deletions(-) create mode 100644 src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp create mode 100644 src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp create mode 100644 src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp create mode 100644 src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp diff --git a/src/common/transformations/include/ov_ops/nms_ie_internal.hpp b/src/common/transformations/include/ov_ops/nms_ie_internal.hpp index 797b89add6d4bb..75f4fa6e1b91fd 100644 --- a/src/common/transformations/include/ov_ops/nms_ie_internal.hpp +++ b/src/common/transformations/include/ov_ops/nms_ie_internal.hpp @@ -21,6 +21,10 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { NonMaxSuppressionIEInternal() = default; + static constexpr int Rotation_None = 0; + static constexpr int Rotation_Clockwise = 1; + static constexpr int Rotation_Counterclockwise = 2; + NonMaxSuppressionIEInternal(const Output& boxes, const Output& scores, const Output& max_output_boxes_per_class, @@ -29,7 +33,8 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { int center_point_box, bool sort_result_descending, const element::Type& output_type = element::i64, - const element::Type& score_output_type = element::f32); + const element::Type& score_output_type = element::f32, + const int rotation = Rotation_None); NonMaxSuppressionIEInternal(const Output& boxes, const Output& scores, @@ -40,7 +45,8 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { int center_point_box, bool sort_result_descending, const element::Type& output_type = element::i64, - const element::Type& score_output_type = element::f32); + const element::Type& score_output_type = element::f32, + const int rotation = Rotation_None); void validate_and_infer_types() override; @@ -52,6 +58,7 @@ class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { bool m_sort_result_descending = true; element::Type m_output_type; element::Type m_scores_output_type; + int m_rotation{Rotation_None}; private: int64_t max_boxes_output_from_input() const; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp b/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp new file mode 100644 index 00000000000000..5eb3b285365f92 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertNMSRotatedToNMSIEInternal; + +} // namespace pass +} // namespace ov + +class ov::pass::ConvertNMSRotatedToNMSIEInternal : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertNMSRotatedToNMSIEInternal", "0"); + ConvertNMSRotatedToNMSIEInternal(); +}; diff --git a/src/common/transformations/src/ov_ops/nms_ie_internal.cpp b/src/common/transformations/src/ov_ops/nms_ie_internal.cpp index c305304dbf7238..e879224dd935c7 100644 --- a/src/common/transformations/src/ov_ops/nms_ie_internal.cpp +++ b/src/common/transformations/src/ov_ops/nms_ie_internal.cpp @@ -20,12 +20,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out int center_point_box, bool sort_result_descending, const ov::element::Type& output_type, - const ov::element::Type& score_output_type) + const ov::element::Type& score_output_type, + const int rotation) : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}), m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type), - m_scores_output_type(score_output_type) { + m_scores_output_type(score_output_type), + m_rotation(rotation) { constructor_validate_and_infer_types(); } @@ -38,12 +40,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out int center_point_box, bool sort_result_descending, const ov::element::Type& output_type, - const ov::element::Type& score_output_type) + const ov::element::Type& score_output_type, + const int rotation) : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}), m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type), - m_scores_output_type(score_output_type) { + m_scores_output_type(score_output_type), + m_rotation{rotation} { constructor_validate_and_infer_types(); } @@ -59,7 +63,9 @@ std::shared_ptr op::internal::NonMaxSuppressionIEInternal::clone_with_new_ new_args.at(5), m_center_point_box, m_sort_result_descending, - m_output_type); + m_output_type, + m_scores_output_type, + m_rotation); } else if (new_args.size() == 5) { return make_shared(new_args.at(0), new_args.at(1), @@ -68,7 +74,9 @@ std::shared_ptr op::internal::NonMaxSuppressionIEInternal::clone_with_new_ new_args.at(4), m_center_point_box, m_sort_result_descending, - m_output_type); + m_output_type, + m_scores_output_type, + m_rotation); } OPENVINO_THROW("Unsupported number of inputs: " + std::to_string(new_args.size())); } @@ -79,6 +87,7 @@ bool op::internal::NonMaxSuppressionIEInternal::visit_attributes(AttributeVisito visitor.on_attribute("sort_result_descending", m_sort_result_descending); visitor.on_attribute("output_type", m_output_type); visitor.on_attribute("score_output_type", m_scores_output_type); + visitor.on_attribute("rotation", m_rotation); return true; } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp new file mode 100644 index 00000000000000..b3040cda132852 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.cpp @@ -0,0 +1,109 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp" + +#include +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/non_max_suppression.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/nms_ie_internal.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertNMSRotatedToNMSIEInternal::ConvertNMSRotatedToNMSIEInternal() { + MATCHER_SCOPE(ConvertNMSRotatedToNMSIEInternal); + auto nms = ov::pass::pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto nms_rotated = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms_rotated || transformation_callback(nms_rotated)) { + return false; + } + + const auto new_args = nms_rotated->input_values(); + const std::size_t num_of_inputs = new_args.size(); + OPENVINO_ASSERT(num_of_inputs == 5); + + const auto& max_per_class = new_args.at(2); + const auto& iou_threshold = new_args.at(3); + const auto& score_threshold = new_args.at(4); + + // vector of new openvino operations + NodeVector new_ops; + + auto one_dim_shape = Shape{1}; + + Output new_max_per_class; + Output new_iou_threshold; + Output new_score_threshold; + Output new_soft_nms_sigma; + + Output new_shape_for_max_per_class = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + Output new_shape_for_iou_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + Output new_shape_for_score_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + Output new_shape_for_soft_nms_sigma = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1}); + + new_max_per_class = std::make_shared(max_per_class, new_shape_for_max_per_class, true); + new_ops.emplace_back(new_max_per_class.get_node_shared_ptr()); + + new_iou_threshold = std::make_shared(iou_threshold, new_shape_for_iou_threshold, true); + new_ops.emplace_back(new_iou_threshold.get_node_shared_ptr()); + + new_score_threshold = + std::make_shared(score_threshold, new_shape_for_score_threshold, true); + new_ops.emplace_back(new_score_threshold.get_node_shared_ptr()); + + constexpr int BoxEncodingType_Center = 1; // see NonMaxSuppression::BoxEncodingType + const int center_point_box = BoxEncodingType_Center; // for NMSRotated is it always Center + + const auto rotation = nms_rotated->get_clockwise() + ? op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise + : op::internal::NonMaxSuppressionIEInternal::Rotation_Counterclockwise; + + std::shared_ptr nms_legacy{nullptr}; + + nms_legacy = + std::make_shared(new_args.at(0), + new_args.at(1), + + new_max_per_class, + new_iou_threshold, + new_score_threshold, + + center_point_box, + nms_rotated->get_sort_result_descending(), + element::i32, + nms_rotated->get_output_element_type(1), + rotation); + new_ops.push_back(nms_legacy); + + Output output_0 = nms_legacy->output(0); + if (nms_rotated->output(0).get_element_type() != output_0.get_element_type()) { + output_0 = std::make_shared(output_0, nms_rotated->output(0).get_element_type()); + output_0.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(0))); + new_ops.emplace_back(output_0.get_node_shared_ptr()); + } + + Output output_2 = nms_legacy->output(2); + if (nms_rotated->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms_rotated->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(2))); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_legacy->set_friendly_name(nms_rotated->get_friendly_name()); + ov::copy_runtime_info(nms_rotated, new_ops); + ov::replace_node(nms_rotated, {output_0, nms_legacy->output(1), output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/core/reference/src/op/nms_rotated.cpp b/src/core/reference/src/op/nms_rotated.cpp index fd604acd5cc6c7..3b4f21d4431c31 100644 --- a/src/core/reference/src/op/nms_rotated.cpp +++ b/src/core/reference/src/op/nms_rotated.cpp @@ -127,7 +127,7 @@ void nms_rotated(const float* boxes_data, for (int64_t batch = 0; batch < num_batches; batch++) { const float* boxesPtr = boxes_data + batch * num_boxes * 5; - RotatedBox* r = reinterpret_cast(const_cast(boxesPtr)); + const RotatedBox* r = reinterpret_cast(boxesPtr); for (int64_t class_idx = 0; class_idx < num_classes; class_idx++) { const float* scoresPtr = scores_data + batch * (num_classes * num_boxes) + class_idx * num_boxes; @@ -137,11 +137,11 @@ void nms_rotated(const float* boxes_data, for (int64_t box_idx = 0; box_idx < num_boxes; box_idx++) { if (scoresPtr[box_idx] > score_threshold) { + candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx); // Convert counterclockwise to clockwise if (!clockwise) { - r[box_idx].a *= -1; + candidate_boxes.back().box.a *= -1.f; } - candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx); } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp index b9614cd47258c0..a0c5c7138764bf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/non_max_suppression.hpp @@ -17,6 +17,12 @@ namespace cldnn { struct non_max_suppression : public primitive_base { CLDNN_DECLARE_PRIMITIVE(non_max_suppression) + enum Rotation { + NONE, + CLOCKWISE, + COUNTERCLOCKWISE + }; + non_max_suppression() : primitive_base("", {}), selected_indices_num(0), center_point_box(false), @@ -68,6 +74,7 @@ struct non_max_suppression : public primitive_base { primitive_id soft_nms_sigma; primitive_id second_output; primitive_id third_output; + Rotation rotation{Rotation::NONE}; size_t hash() const override { size_t seed = primitive::hash(); @@ -79,6 +86,7 @@ struct non_max_suppression : public primitive_base { seed = hash_combine(seed, soft_nms_sigma.empty()); seed = hash_combine(seed, second_output.empty()); seed = hash_combine(seed, third_output.empty()); + seed = hash_combine(seed, rotation); return seed; } @@ -97,7 +105,8 @@ struct non_max_suppression : public primitive_base { cmp_fields(score_threshold.empty()) && cmp_fields(soft_nms_sigma.empty()) && cmp_fields(second_output.empty()) && - cmp_fields(third_output.empty()); + cmp_fields(third_output.empty()) && + cmp_fields(rotation); #undef cmp_fields } @@ -130,6 +139,7 @@ struct non_max_suppression : public primitive_base { ob << soft_nms_sigma; ob << second_output; ob << third_output; + ob << make_data(&rotation, sizeof(rotation)); } void load(BinaryInputBuffer& ib) override { @@ -143,6 +153,7 @@ struct non_max_suppression : public primitive_base { ib >> soft_nms_sigma; ib >> second_output; ib >> third_output; + ib >> make_data(&rotation, sizeof(rotation)); } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp index 7405729120bfbd..f89980a3f936d4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_max_suppression.cpp @@ -143,6 +143,17 @@ struct non_max_suppression_impl : typed_primitive_impl_ocl params.sort_result_descending = primitive->sort_result_descending; params.box_encoding = primitive->center_point_box ? kernel_selector::BoxEncodingType::BOX_ENCODING_CENTER : kernel_selector::BoxEncodingType::BOX_ENCODING_CORNER; + switch (primitive->rotation) { + case non_max_suppression::Rotation::CLOCKWISE: + params.rotation = kernel_selector::NMSRotationType::CLOCKWISE; + break; + case non_max_suppression::Rotation::COUNTERCLOCKWISE: + params.rotation = kernel_selector::NMSRotationType::COUNTERCLOCKWISE; + break; + default: + params.rotation = kernel_selector::NMSRotationType::NONE; + } + if (impl_param.get_program().get_node(primitive->id).is_dynamic()) { params.reuse_internal_buffer = true; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 69b1e12fa3b4ae..ca4569a7df7099 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1484,17 +1484,21 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format if (blocked_formats.find(node.get_input_layout(0).format) != blocked_formats.end()) { preferred_impl = impl_types::ocl; } else { - auto& nms_node = node.as(); - auto scores_layout = nms_node.input_scores().get_output_layout(); - if (scores_layout.is_dynamic()) { + const auto& nms_node = node.as(); + if (nms_node.get_primitive()->rotation != non_max_suppression::Rotation::NONE) { preferred_impl = impl_types::ocl; } else { - const size_t kBatchNum = scores_layout.batch(); - const size_t kClassNum = scores_layout.feature(); - const size_t kNStreams = - static_cast(node.get_program().get_config().get_property(ov::streams::num)); - const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; - preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu; + const auto scores_layout = nms_node.input_scores().get_output_layout(); + if (scores_layout.is_dynamic()) { + preferred_impl = impl_types::ocl; + } else { + const size_t kBatchNum = scores_layout.batch(); + const size_t kClassNum = scores_layout.feature(); + const size_t kNStreams = + static_cast(node.get_program().get_config().get_property(ov::streams::num)); + const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; + preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu; + } } } } else if (node.is_type()) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl index 36651d8773fe6c..cf26d0cbc276c0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl @@ -62,7 +62,7 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const boxes[INPUT0_GET_INDEX(batch, boxId, 2, 0)], boxes[INPUT0_GET_INDEX(batch, boxId, 3, 0)]); -#if BOX_ENCODING == 0 +#if !defined(ROTATION) && BOX_ENCODING == 0 const COORD_TYPE ax1 = min(coords[1], coords[3]); const COORD_TYPE ax2 = max(coords[1], coords[3]); const COORD_TYPE ay1 = min(coords[0], coords[2]); @@ -76,9 +76,331 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const return coords; } +#ifdef ROTATION + +typedef struct { + float x, y; +} FUNC(Point2D); +#define POINT_2D FUNC(Point2D) + +inline void FUNC(getRotatedVertices)(const COORD_TYPE_4 box, const INPUT0_TYPE angle, POINT_2D* pts) { + const float theta = angle + #if ROTATION == 2 + * -1.0f + #endif + ; + float cosTheta2 = cos(theta) * 0.5f; + float sinTheta2 = sin(theta) * 0.5f; + + // y: top --> down; x: left --> right + // Left-Down + pts[0].x = box[0]/*.x_ctr*/ - sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/; + pts[0].y = box[1]/*.y_ctr*/ + cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/; + // Left-Top + pts[1].x = box[0]/*.x_ctr*/ + sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/; + pts[1].y = box[1]/*.y_ctr*/ - cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/; + // Right-Top + pts[2].x = 2 * box[0]/*.x_ctr*/ - pts[0].x; + pts[2].y = 2 * box[1]/*.y_ctr*/ - pts[0].y; + // Right-Down + pts[3].x = 2 * box[0]/*.x_ctr*/ - pts[1].x; + pts[3].y = 2 * box[1]/*.y_ctr*/ - pts[1].y; +} + +inline float FUNC(dot2D)(const POINT_2D A, const POINT_2D B) { + return A.x * B.x + A.y * B.y; +} + +inline float FUNC(cross2D)(const POINT_2D A, const POINT_2D B) { + return A.x * B.y - B.x * A.y; +} + +inline int FUNC(getIntersectionPoints)(const POINT_2D* pts1, const POINT_2D* pts2, POINT_2D* intersections) { + // Line vector + // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] + POINT_2D vec1[4], vec2[4]; + for (int i = 0; i < 4; i++) { + vec1[i].x = pts1[(i + 1) % 4].x - pts1[i].x; + vec1[i].y = pts1[(i + 1) % 4].y - pts1[i].y; + vec2[i].x = pts2[(i + 1) % 4].x - pts2[i].x; + vec2[i].y = pts2[(i + 1) % 4].y - pts2[i].y; + } + + // Line test - test all line combos for intersection + int num = 0; // number of intersections + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + // Solve for 2x2 Ax=b + float det = FUNC_CALL(cross2D)(vec2[j], vec1[i]); + // This takes care of parallel lines + if (fabs(det) <= 1e-14f) { + continue; + } + + POINT_2D vec12; + vec12.x= pts2[j].x - pts1[i].x; + vec12.y= pts2[j].y - pts1[i].y; + + float t1 = FUNC_CALL(cross2D)(vec2[j], vec12) / det; + float t2 = FUNC_CALL(cross2D)(vec1[i], vec12) / det; + + if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) { + intersections[num].x = pts1[i].x + vec1[i].x * t1; + intersections[num].y = pts1[i].y + vec1[i].y * t1; + ++num; + } + } + } + + // Check for vertices of rect1 inside rect2 + { + const POINT_2D AB = vec2[0]; + const POINT_2D DA = vec2[3]; + float ABdotAB = FUNC_CALL(dot2D)(AB, AB); + float ADdotAD = FUNC_CALL(dot2D)(DA, DA); + for (int i = 0; i < 4; i++) { + // assume ABCD is the rectangle, and P is the point to be judged + // P is inside ABCD iff. P's projection on AB lies within AB + // and P's projection on AD lies within AD + + POINT_2D AP; + AP.x = pts1[i].x - pts2[0].x; + AP.y = pts1[i].y - pts2[0].y; + + float APdotAB = FUNC_CALL(dot2D)(AP, AB); + float APdotAD = -FUNC_CALL(dot2D)(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { + intersections[num].x = pts1[i].x; + intersections[num].y = pts1[i].y; + ++num; + } + } + } + + // Reverse the check - check for vertices of rect2 inside rect1 + { + const POINT_2D AB = vec1[0]; + const POINT_2D DA = vec1[3]; + float ABdotAB = FUNC_CALL(dot2D)(AB, AB); + float ADdotAD = FUNC_CALL(dot2D)(DA, DA); + for (int i = 0; i < 4; i++) { + POINT_2D AP; + AP.x = pts2[i].x - pts1[0].x; + AP.y = pts2[i].y - pts1[0].y; + + float APdotAB = FUNC_CALL(dot2D)(AP, AB); + float APdotAD = -FUNC_CALL(dot2D)(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { + intersections[num].x = pts2[i].x; + intersections[num].y = pts2[i].y; + ++num; + } + } + } + + return num; +} + +inline void FUNC(swapPoints)(POINT_2D* a, POINT_2D* b) +{ + POINT_2D temp = *a; + *a = *b; + *b = temp; +} + +inline void FUNC(sortPoints)(POINT_2D* arr, int l, int h) +{ + for (int i = 0; i < h-l; i++) { + bool swapped = false; + + for (int j = l; j < h-i; j++) { + bool is_less = false; + const float temp = FUNC_CALL(cross2D)(arr[j], arr[j+1]); + if (fabs(temp) < 1e-6f) { + is_less = FUNC_CALL(dot2D)(arr[j], arr[j]) < FUNC_CALL(dot2D)(arr[j+1], arr[j+1]); + } else { + is_less = temp > 0; + } + + if (is_less) { + continue; + } + + FUNC_CALL(swapPoints)(&arr[j], &arr[j+1]); + swapped = true; + } + + if (!swapped) { + break; + } + } +} + +inline int FUNC(convex_hull_graham)(const POINT_2D* p, const int num_in, POINT_2D* q, bool shift_to_zero) { + if (num_in < 2) { + return -1; + } + + // Step 1: + // Find point with minimum y + // if more than 1 points have the same minimum y, + // pick the one with the minimum x. + int t = 0; + for (int i = 1; i < num_in; i++) { + if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { + t = i; + } + } + const POINT_2D start = p[t]; // starting point + + // Step 2: + // Subtract starting point from every points (for sorting in the next step) + for (int i = 0; i < num_in; i++) { + q[i].x = p[i].x - start.x; + q[i].y = p[i].y - start.y; + } + + // Swap the starting point to position 0 + FUNC_CALL(swapPoints)(&q[t], &q[0]); + + // Step 3: + // Sort point 1 ~ num_in according to their relative cross-product values + // (essentially sorting according to angles) + // If the angles are the same, sort according to their distance to origin + float dist[24]; + for (int i = 0; i < num_in; i++) { + dist[i] = FUNC_CALL(dot2D)(q[i], q[i]); + } + + FUNC_CALL(sortPoints)(q, 1, num_in - 1); + + // compute distance to origin after sort, since the points are now different. + for (int i = 0; i < num_in; i++) { + dist[i] = FUNC_CALL(dot2D)(q[i], q[i]); + } + + // Step 4: + // Make sure there are at least 2 points (that don't overlap with each other) + // in the stack + int k; // index of the non-overlapped second point + for (k = 1; k < num_in; k++) { + if (dist[k] > 1e-8f) { + break; + } + } + if (k == num_in) { + // We reach the end, which means the convex hull is just one point + q[0].x = p[t].x; + q[0].y = p[t].y; + return 1; + } + + q[1].x = q[k].x; + q[1].y = q[k].y; + int m = 2; // 2 points in the stack + // Step 5: + // Finally we can start the scanning process. + // When a non-convex relationship between the 3 points is found + // (either concave shape or duplicated points), + // we pop the previous point from the stack + // until the 3-point relationship is convex again, or + // until the stack only contains two points + for (int i = k + 1; i < num_in; i++) { + POINT_2D diff1, diff2; + diff1.x = q[i].x - q[m - 2].x; + diff1.y = q[i].y - q[m - 2].y; + diff2.x = q[m - 1].x - q[m - 2].x; + diff2.y = q[m - 1].y - q[m - 2].y; + + float cross2d_diff = FUNC_CALL(cross2D)(diff1, diff2); + + while (m > 1 && cross2d_diff >= 0) { + m--; + } + q[m].x = q[i].x; + q[m].y = q[i].y; + ++m; + } + + // Step 6 (Optional): + // In general sense we need the original coordinates, so we + // need to shift the points back (reverting Step 2) + // But if we're only interested in getting the area/perimeter of the shape + // We can simply return. + if (!shift_to_zero) { + for (int i = 0; i < m; i++) { + q[i].x += start.x; + q[i].y += start.y; + } + } + + return m; +} + +inline float FUNC(polygon_area)(const POINT_2D* q, const int m) { + if (m <= 2) { + return 0.f; + } + + float area = 0.f; + for (int i = 1; i < m - 1; i++) { + POINT_2D diff1, diff2; + diff1.x = q[i].x - q[0].x; + diff1.y = q[i].y - q[0].y; + diff2.x = q[i + 1].x - q[0].x; + diff2.y = q[i + 1].y - q[0].y; + float cross_result = FUNC_CALL(cross2D)(diff1, diff2); + + area += fabs(cross_result); + } + + return area / 2.0f; +} + +inline float FUNC(rotatedBoxesIntersection)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA, + const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB) { + // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned + // from get_intersection_points + POINT_2D intersectPts[24], orderedPts[24]; + POINT_2D pts1[4]; + POINT_2D pts2[4]; + FUNC_CALL(getRotatedVertices)(boxA, angleA, pts1); + FUNC_CALL(getRotatedVertices)(boxB, angleB, pts2); + // Find points defining area of the boxes intersection + int num = FUNC_CALL(getIntersectionPoints)(pts1, pts2, intersectPts); + + if (num <= 2) { + return 0.f; + } + + // Convex Hull to order the intersection points in clockwise order and find + // the contour area. + int num_convex = FUNC_CALL(convex_hull_graham)(intersectPts, num, orderedPts, true); + return FUNC_CALL(polygon_area)(orderedPts, num_convex); +} + + +inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA, + const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB) +{ + const float areaA = convert_float(boxA[3]) * convert_float(boxA[2]); + const float areaB = convert_float(boxB[3]) * convert_float(boxB[2]); + + if (areaA <= 0.0f || areaB <= 0.0f) + return 0.0f; + + const float intersection_area = FUNC_CALL(rotatedBoxesIntersection)(boxA, angleA, boxB, angleB); + const float union_area = areaA + areaB - intersection_area; + return intersection_area / union_area; +} + +#else + inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TYPE_4 boxB) { -#if BOX_ENCODING == 0 +#if !defined(ROTATION) && BOX_ENCODING == 0 /// CORNER const float areaA = convert_float(boxA[3] - boxA[1]) * convert_float(boxA[2] - boxA[0]); const float areaB = convert_float(boxB[3] - boxB[1]) * convert_float(boxB[2] - boxB[0]); @@ -110,6 +432,7 @@ inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TY const float union_area = areaA + areaB - intersection_area; return intersection_area / union_area; } +#endif // ROTATION inline float FUNC(scaleIOU)(float iou, float iou_threshold, float scale) { @@ -240,6 +563,16 @@ inline void FUNC(swap)(__global BOX_INFO* a, __global BOX_INFO* b) *b = temp; } +#ifdef ROTATION +inline void FUNC(reverseOutputBoxList)(__global BOX_INFO *outBoxes, int boxNum) +{ + for (int i = 0; i < boxNum / 2; ++i) { + FUNC_CALL(swap)(&outBoxes[i], &outBoxes[boxNum - 1 - i]); + } +} + +#else + inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNum) { for (int i = 0; i < boxNum - 1; ++i) { @@ -261,6 +594,7 @@ inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNu break; } } +#endif // ROTATION #ifdef NMS_STAGE_0 @@ -427,9 +761,11 @@ KERNEL (non_max_suppression_ref_stage_2)( const ushort classId = get_global_id(1); float scale = 0.0f; + #ifndef ROTATION if (SOFT_NMS_SIGMA_VAL > 0.0f) { scale = -0.5f / SOFT_NMS_SIGMA_VAL; } + #endif __global SBOX_INFO *sortedBoxList = (__global SBOX_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE]; const int kSortedBoxNum = buffer2[batchId * NUM_CLASSES + classId]; @@ -442,12 +778,22 @@ KERNEL (non_max_suppression_ref_stage_2)( SBOX_INFO next_candidate = sortedBoxList[i]; INPUT1_TYPE original_score = next_candidate.score; const COORD_TYPE_4 next_candidate_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, next_candidate.boxId); + #ifdef ROTATION + const INPUT0_TYPE next_candidate_angle = boxes[INPUT0_GET_INDEX(batchId, next_candidate.boxId, 4, 0)]; + #endif + ++i; bool should_hard_suppress = false; for (int j = selectedBoxNum - 1; j >= next_candidate.suppress_begin_index; --j) { const COORD_TYPE_4 selected_box_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, selectedBoxList[j].boxId); + #ifdef ROTATION + const INPUT0_TYPE selected_box_angle = boxes[INPUT0_GET_INDEX(batchId, selectedBoxList[j].boxId, 4, 0)]; + const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, next_candidate_angle, + selected_box_coord, selected_box_angle); + #else const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, selected_box_coord); + #endif next_candidate.score *= FUNC_CALL(scaleIOU)(iou, IOU_THRESHOLD_VAL, scale); if (iou >= IOU_THRESHOLD_VAL && !(SOFT_NMS_SIGMA_VAL > 0.0f)) { @@ -531,7 +877,11 @@ KERNEL (non_max_suppression_ref_stage_3)( } #if SORT_RESULT_DESCENDING == 1 +#ifdef ROTATION + FUNC_CALL(reverseOutputBoxList)(sortedBoxList, outputIdx); +#else FUNC_CALL(sortOutputBoxList)(sortedBoxList, outputIdx); +#endif #endif unroll_for (int i = 0; i < outputIdx; i++) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 8c841b6001f44d..1acc0aa89e6af6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -570,6 +570,15 @@ enum class BoxEncodingType { BOX_ENCODING_CENTER, }; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// NMSRotationType +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +enum class NMSRotationType { + NONE, + CLOCKWISE, + COUNTERCLOCKWISE +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // ConvertColor //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp index 28a6b2fa9e0bb6..fc85b23005ec84 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.cpp @@ -149,11 +149,17 @@ JitConstants NonMaxSuppressionKernelRef::GetJitConstants(const non_max_suppressi jit.AddConstant(MakeJitConstant("SCORE_THRESHOLD_VAL", params.score_threshold)); } - if (params.soft_nms_sigma_type == base_params::ArgType::Input) { - jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma()))); - jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])")); + if (params.rotation == NMSRotationType::NONE) { + if (params.soft_nms_sigma_type == base_params::ArgType::Input) { + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma()))); + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])")); + } else { + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma)); + } } else { - jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma)); + jit.AddConstant(MakeJitConstant("ROTATION", static_cast(params.rotation))); + // for NMSRotated it is always zero + jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", 0.0f)); } if (params.has_second_output) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h index 5ace6fbebffac3..8fc2dc2724a9bd 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/non_max_suppression/non_max_suppression_kernel_ref.h @@ -36,6 +36,7 @@ struct non_max_suppression_params : public base_params { bool has_third_output; bool use_multiple_outputs; bool reuse_internal_buffer = false; + NMSRotationType rotation = NMSRotationType::NONE; uint32_t GetIndexNumSelectPerClass() const { uint32_t input_idx = 2; diff --git a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp index fd36533e2a5d47..6e91cc7db9fe2f 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp @@ -17,7 +17,18 @@ namespace ov { namespace intel_gpu { static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::shared_ptr& op) { - validate_inputs_count(op, {2, 3, 4, 5, 6}); + cldnn::non_max_suppression::Rotation rotation = cldnn::non_max_suppression::Rotation::NONE; + const bool is_nms_rotated = op->m_rotation != ov::op::internal::NonMaxSuppressionIEInternal::Rotation_None; + if (is_nms_rotated) { + // For NMSRotated threshold inputs are mandatory, and soft_nms_sigma input is absent + validate_inputs_count(op, {5}); + + rotation = op->m_rotation == ov::op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise ? + cldnn::non_max_suppression::Rotation::CLOCKWISE + : cldnn::non_max_suppression::Rotation::COUNTERCLOCKWISE; + } else { + validate_inputs_count(op, {2, 3, 4, 5, 6}); + } auto inputs = p.GetInputInfo(op); std::vector reordered_inputs; reordered_inputs.resize(inputs.size()); @@ -75,6 +86,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh prim.output_paddings = get_output_paddings(); prim.output_data_types = get_output_data_types(); + prim.rotation = rotation; switch (reordered_inputs.size()) { case 6: prim.soft_nms_sigma = reordered_inputs[5].pid; @@ -142,6 +154,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh "", "", "", "", "", ""); prim.output_data_types = get_output_data_types(); + prim.rotation = rotation; switch (reordered_inputs.size()) { case 6: prim.soft_nms_sigma = reordered_inputs[5].pid; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index dfc24774fcd26b..ac567cd998f9a2 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -88,6 +88,7 @@ #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" #include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp" #include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp" +#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp" #include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp" #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" #include "transformations/op_conversions/convert_gather_downgrade.hpp" @@ -272,6 +273,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp new file mode 100644 index 00000000000000..80224b57ebcff7 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/nms_rotated.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/nms_rotated.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine; +using namespace ngraph; + +const std::vector inShapeParams = { + InputShapeParams{2, 50, 50}, + InputShapeParams {9, 10, 10} +}; + +const std::vector maxOutBoxPerClass = {5, 20}; +const std::vector threshold = {0.3f, 0.7f}; +const std::vector sortResDesc = {true, false}; +const std::vector outType = {element::i32, element::i64}; +const std::vector clockwise = {true, false}; + +const std::vector inputPrecisions = {Precision::FP32, Precision::FP16}; + +INSTANTIATE_TEST_SUITE_P(smoke_NmsRotatedLayerTest, + NmsRotatedLayerTest, + ::testing::Combine(::testing::ValuesIn(inShapeParams), + ::testing::Combine(::testing::ValuesIn(inputPrecisions), + ::testing::Values(Precision::I32), + ::testing::Values(Precision::FP32)), + ::testing::ValuesIn(maxOutBoxPerClass), + ::testing::ValuesIn(threshold), + ::testing::ValuesIn(threshold), + ::testing::ValuesIn(sortResDesc), + ::testing::ValuesIn(outType), + ::testing::ValuesIn(clockwise), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + NmsRotatedLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp index 909149b05e32fa..d14c0cab8d69a3 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/non_max_suppression_test.cpp @@ -709,3 +709,250 @@ TYPED_TEST(non_max_suppression_basic, soft_nms_sigma_cached) { TYPED_TEST(non_max_suppression_basic, multiple_outputs_cached) { this->test_multiple_outputs(true); } + +namespace { +template +struct NmsRotatedParams { + std::string test_name; + int num_batches; + int num_boxes; + int num_classes; + std::vector boxes; + std::vector scores; + int max_output_boxes_per_class; + float iou_threshold; + float score_threshold; + bool sort_result_descending; + bool clockwise; + std::vector expected_indices; + std::vector expected_scores; +}; + +template float getError(); + +template<> +float getError() { + return 0.001; +} + +template<> +float getError() { + return 0.1; +} + +template +struct nms_rotated_test : public ::testing::TestWithParam> { +public: + void test(bool is_caching_test = false + ) { + const NmsRotatedParams param = testing::TestWithParam>::GetParam(); + const auto data_type = ov::element::from(); + + auto& engine = tests::get_test_engine(); + + const auto boxes_layout = layout(ov::PartialShape{param.num_batches, param.num_boxes, 5}, data_type, + format::bfyx); + const auto scores_layout = layout(ov::PartialShape{param.num_batches, param.num_classes, param.num_boxes}, + data_type, format::bfyx); + + const int selected_indices_num = param.num_batches * param.num_classes * param.num_boxes; + const auto selected_scores_layout = layout(ov::PartialShape{selected_indices_num/*expected_indices_count*/, 3}, + data_type, format::bfyx); + const auto valid_outputs_layout = layout(ov::PartialShape{1}, cldnn::data_types::i32, format::bfyx); + + const auto boxes_mem = engine.allocate_memory(boxes_layout); + tests::set_values(boxes_mem, param.boxes); + + const auto scores_mem = engine.allocate_memory(scores_layout); + tests::set_values(scores_mem, param.scores); + + const auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); + tests::set_values(num_per_class_mem, {1.f * param.max_output_boxes_per_class}); + + const auto iou_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); + tests::set_values(iou_threshold_mem, {param.iou_threshold}); + + const auto score_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); + tests::set_values(score_threshold_mem, {param.score_threshold}); + + const auto selected_scores_mem = engine.allocate_memory(selected_scores_layout); + const auto valid_outputs_mem = engine.allocate_memory(valid_outputs_layout); + + topology topo; + topo.add(input_layout("boxes", boxes_layout)); + topo.add(input_layout("scores", scores_layout)); + topo.add(data("num_per_class", num_per_class_mem)); + topo.add(data("iou_threshold", iou_threshold_mem)); + topo.add(data("score_threshold", score_threshold_mem)); + topo.add(mutable_data("selected_scores", selected_scores_mem)); + topo.add(mutable_data("valid_outputs", valid_outputs_mem)); + auto nms = non_max_suppression("nms", + input_info("boxes"), + input_info("scores"), + selected_indices_num, + false, + param.sort_result_descending, + "num_per_class", + "iou_threshold", + "score_threshold", + "", + "selected_scores", + "valid_outputs"); + nms.rotation = param.clockwise ? non_max_suppression::Rotation::CLOCKWISE : + non_max_suppression::Rotation::COUNTERCLOCKWISE; + + topo.add(nms); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + + cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); + net->set_input_data("boxes", boxes_mem); + net->set_input_data("scores", scores_mem); + const auto result = net->execute(); + const auto indices_mem = result.at("nms").get_memory(); + const cldnn::mem_lock indices_ptr(indices_mem, get_test_stream()); + const cldnn::mem_lock selected_scores_ptr(selected_scores_mem, get_test_stream()); + const cldnn::mem_lock valid_outputs_ptr(valid_outputs_mem, get_test_stream()); + + const auto expected_valid_outputs = param.expected_indices.size() / 3; + const size_t num_valid_outputs = static_cast(valid_outputs_ptr[0]); + + EXPECT_EQ(num_valid_outputs, expected_valid_outputs); + ASSERT_GE(indices_ptr.size(), param.expected_indices.size()); + ASSERT_GE(selected_scores_ptr.size(), param.expected_scores.size()); + + for (size_t i = 0; i < indices_ptr.size(); ++i) { + if (i < num_valid_outputs * 3) { + EXPECT_EQ(param.expected_indices[i], indices_ptr[i]) << "at i = " << i; + EXPECT_NEAR(param.expected_scores[i], selected_scores_ptr[i], getError()) << "at i = " << i; + } else { + EXPECT_EQ(indices_ptr[i], -1) << "at i = " << i; + EXPECT_NEAR(selected_scores_ptr[i], -1, getError()) << "at i = " << i; + } + } + } +}; + + +struct PrintToStringParamName { + template + std::string operator()(const testing::TestParamInfo>& info) { + const auto& p = info.param; + std::ostringstream result; + result << p.test_name << "_"; + result << "DataType=" << ov::element::Type(ov::element::from()); + result << "_IndexType=" << ov::element::Type(ov::element::from()); + return result.str(); + } +}; + + +using nms_rotated_test_f32_i32 = nms_rotated_test; +using nms_rotated_test_f16_i32 = nms_rotated_test; + +TEST_P(nms_rotated_test_f32_i32, basic) { + ASSERT_NO_FATAL_FAILURE(test()); +} + +TEST_P(nms_rotated_test_f16_i32, basic) { + ASSERT_NO_FATAL_FAILURE(test()); +} + +template +std::vector> getNmsRotatedParams() { + const std::vector> params = { + {"basic", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 5000, 0.5f, 0.0f, false, true, + std::vector{0, 0, 3, 0, 0, 1, 0, 0, 0}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.7, 0.0, 0.0, 0.65}, + }, + {"max_out_2", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 2, 0.5f, 0.0f, false, true, + std::vector{0, 0, 3, 0, 0, 1}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.7}, + }, + {"score_thresold", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 5000, 0.5f, 0.67f, false, true, + std::vector{0, 0, 3, 0, 0, 1}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.7}, + }, + {"iou_thresold_2", + 1, 4, 1, + std::vector{ + 7.0, 4.0, 8.0, 7.0, 0.5, + 4.0, 7.0, 9.0, 11.0, 0.6, + 4.0, 8.0, 10.0, 12.0, 0.3, + 2.0, 5.0, 13.0, 7.0, 0.6}, + std::vector{0.65, 0.7, 0.55, 0.96}, + 5000, 0.3f, 0.0f, false, true, + std::vector{0, 0, 3, 0, 0, 0}, + std::vector{0.0, 0.0, 0.96, 0.0, 0.0, 0.65}, + }, + {"negative_cw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, true, + std::vector{0, 0, 0, 0, 0, 1}, + std::vector{0.0, 0.0, 0.8, 0.0, 0.0, 0.7} + }, + {"negative_ccw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, false, + std::vector{0, 0, 0}, + std::vector{0.0, 0.0, 0.8} + }, + {"positive_ccw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, false, + std::vector{0, 0, 0, 0, 0, 1}, + std::vector{0.0, 0.0, 0.8, 0.0, 0.0, 0.7} + }, + {"positive_cw", + 1, 2, 1, + std::vector{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0}, + std::vector{0.8, 0.7}, + 5000, 0.1f, 0.0f, false, true, + std::vector{0, 0, 0}, + std::vector{0.0, 0.0, 0.8} + } + }; + + return params; +} +INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test, + nms_rotated_test_f32_i32, + ::testing::ValuesIn(getNmsRotatedParams()), + PrintToStringParamName()); + +INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test, + nms_rotated_test_f16_i32, + ::testing::ValuesIn(getNmsRotatedParams()), + PrintToStringParamName()); +} // namespace diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp new file mode 100644 index 00000000000000..d02a115acaeb18 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/nms_rotated.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/nms_rotated.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(NmsRotatedLayerTest, CompareWithRefs) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp new file mode 100644 index 00000000000000..3d36cf3a2e0439 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/nms_rotated.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" + + +namespace LayerTestsDefinitions { + +using InputShapeParams = std::tuple; // Number of classes + +using InputPrecisions = + std::tuple; // iou_threshold, score_threshold, soft_nms_sigma precisions + +using NmsRotatedParams = std::tuple; // Device name + +class NmsRotatedLayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + void GenerateInputs() override; + void Compare(const std::vector>>& expectedOutputs, + const std::vector& actualOutputs) override; + +protected: + void SetUp() override; + InputShapeParams inShapeParams; +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp b/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp new file mode 100644 index 00000000000000..80e6cc98db203f --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_layer/nms_rotated.cpp @@ -0,0 +1,230 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/nms_rotated.hpp" +#include "openvino/op/nms_rotated.hpp" + +#include + +namespace LayerTestsDefinitions { + +using namespace InferenceEngine; +using namespace FuncTestUtils::PrecisionUtils; + +std::string NmsRotatedLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + InputShapeParams inShapeParams; + InputPrecisions inPrecisions; + int32_t maxOutBoxesPerClass; + float iouThr, scoreThr; + bool sortResDescend, clockwise; + ov::element::Type outType; + std::string targetDevice; + std::tie(inShapeParams, + inPrecisions, + maxOutBoxesPerClass, + iouThr, + scoreThr, + sortResDescend, + outType, + clockwise, + targetDevice) = obj.param; + + size_t numBatches, numBoxes, numClasses; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + + Precision inputPrec, maxBoxPrec, thrPrec; + std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions; + + std::ostringstream result; + result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_"; + result << "inputPrec=" << inputPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_"; + result << "maxOutBoxesPerClass=" << maxOutBoxesPerClass << "_"; + result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_"; + result << "sortResDescend=" << sortResDescend << "_outType=" << outType << "_"; + result << "clockwise=" << clockwise << "_"; + result << "TargetDevice=" << targetDevice; + return result.str(); +} + +void NmsRotatedLayerTest::GenerateInputs() { + size_t it = 0; + for (const auto& input : cnnNetwork.getInputsInfo()) { + const auto& info = input.second; + Blob::Ptr blob; + + if (it == 1) { + blob = make_blob_with_precision(info->getTensorDesc()); + blob->allocate(); + if (info->getTensorDesc().getPrecision() == Precision::FP32) { + ov::test::utils::fill_data_random_float(blob, 1, 0, 1000); + } else { + ov::test::utils::fill_data_random_float(blob, 1, 0, 1000); + } + } else { + blob = GenerateInput(*info); + } + inputs.push_back(blob); + it++; + } +} + +void NmsRotatedLayerTest::Compare( + const std::vector>>& expectedOutputs, + const std::vector& actualOutputs) { + size_t num_batches, num_boxes, num_classes; + std::tie(num_batches, num_boxes, num_classes) = inShapeParams; + + struct OutBox { + OutBox() = default; + + OutBox(int32_t batchId, int32_t classId, int32_t boxId, float score) { + this->batchId = batchId; + this->classId = classId; + this->boxId = boxId; + this->score = score; + } + + bool operator==(const OutBox& rhs) const { + return batchId == rhs.batchId && classId == rhs.classId && boxId == rhs.boxId; + } + + int32_t batchId; + int32_t classId; + int32_t boxId; + float score; + }; + + std::vector expected; + { + const auto selected_indices_size = expectedOutputs[0].second.size() / expectedOutputs[0].first.size(); + const auto selected_scores_size = expectedOutputs[1].second.size() / expectedOutputs[1].first.size(); + + ASSERT_EQ(selected_indices_size, selected_scores_size); + + const auto boxes_count = selected_indices_size / 3; + expected.resize(boxes_count); + + if (expectedOutputs[0].first.size() == 4) { + auto selected_indices_data = reinterpret_cast(expectedOutputs[0].second.data()); + + for (size_t i = 0; i < selected_indices_size; i += 3) { + expected[i / 3].batchId = selected_indices_data[i + 0]; + expected[i / 3].classId = selected_indices_data[i + 1]; + expected[i / 3].boxId = selected_indices_data[i + 2]; + } + } else { + auto selected_indices_data = reinterpret_cast(expectedOutputs[0].second.data()); + + for (size_t i = 0; i < selected_indices_size; i += 3) { + expected[i / 3].batchId = static_cast(selected_indices_data[i + 0]); + expected[i / 3].classId = static_cast(selected_indices_data[i + 1]); + expected[i / 3].boxId = static_cast(selected_indices_data[i + 2]); + } + } + + if (expectedOutputs[1].first.size() == 4) { + auto selected_scores_data = reinterpret_cast(expectedOutputs[1].second.data()); + for (size_t i = 0; i < selected_scores_size; i += 3) { + expected[i / 3].score = selected_scores_data[i + 2]; + } + } else { + auto selected_scores_data = reinterpret_cast(expectedOutputs[1].second.data()); + for (size_t i = 0; i < selected_scores_size; i += 3) { + expected[i / 3].score = static_cast(selected_scores_data[i + 2]); + } + } + } + + std::vector actual; + { + const auto selected_indices_size = actualOutputs[0]->byteSize() / sizeof(float); + const auto selected_indices_memory = as(actualOutputs[0]); + IE_ASSERT(selected_indices_memory); + const auto selected_indices_lockedMemory = selected_indices_memory->rmap(); + const auto selected_indices_data = selected_indices_lockedMemory.as(); + + const auto selected_scores_memory = as(actualOutputs[1]); + IE_ASSERT(selected_scores_memory); + const auto selected_scores_lockedMemory = selected_scores_memory->rmap(); + const auto selected_scores_data = selected_scores_lockedMemory.as(); + + for (size_t i = 0; i < selected_indices_size; i += 3) { + const int32_t batchId = selected_indices_data[i + 0]; + const int32_t classId = selected_indices_data[i + 1]; + const int32_t boxId = selected_indices_data[i + 2]; + const float score = selected_scores_data[i + 2]; + if (batchId == -1 || classId == -1 || boxId == -1) + break; + + actual.emplace_back(batchId, classId, boxId, score); + } + } + + ASSERT_EQ(expected.size(), actual.size()); + for (size_t i = 0; i < expected.size(); ++i) { + ASSERT_EQ(expected[i], actual[i]) << ", i=" << i; + ASSERT_NEAR(expected[i].score, actual[i].score, abs_threshold) << ", i=" << i; + } +} + +void NmsRotatedLayerTest::SetUp() { + InputPrecisions inPrecisions; + size_t maxOutBoxesPerClass; + float iouThr, scoreThr; + bool sortResDescend, clockwise; + ov::element::Type outType; + std::tie(inShapeParams, + inPrecisions, + maxOutBoxesPerClass, + iouThr, + scoreThr, + sortResDescend, + outType, + clockwise, + targetDevice) = this->GetParam(); + + size_t numBatches, numBoxes, numClasses; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + + Precision inputPrec, maxBoxPrec, thrPrec; + std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions; + + if (inputPrec == Precision::FP16) { + abs_threshold = 0.1; + } else { + abs_threshold = std::numeric_limits::epsilon(); + } + + ov::ParameterVector params; + + const std::vector boxesShape{numBatches, numBoxes, 5}, scoresShape{numBatches, numClasses, numBoxes}; + const auto ngPrc = convertIE2nGraphPrc(inputPrec); + + const auto boxesNode = std::make_shared(ngPrc, ov::Shape(boxesShape)); + params.push_back(boxesNode); + const auto scoresNode = std::make_shared(ngPrc, ov::Shape(scoresShape)); + params.push_back(scoresNode); + + const auto maxOutputBoxesPerClassNode = std::make_shared(ov::element::Type_t::u32, + ov::Shape{}, + std::vector{maxOutBoxesPerClass}); + const auto iouThresholdNode = std::make_shared(ov::element::Type_t::f32, + ov::Shape{}, + std::vector{iouThr}); + const auto scoreTresholdNode = std::make_shared(ov::element::Type_t::f32, + ov::Shape{}, + std::vector{scoreThr}); + + const auto nmsNode = std::make_shared(params[0], + params[1], + maxOutputBoxesPerClassNode, + iouThresholdNode, + scoreTresholdNode, + sortResDescend, + outType, + clockwise); + + function = std::make_shared(nmsNode, params, "NMS"); +} +} // namespace LayerTestsDefinitions From ee6263a1411fdb393a019fe5a05e713edf51678f Mon Sep 17 00:00:00 2001 From: Andrey Babushkin Date: Tue, 24 Oct 2023 20:52:56 +0100 Subject: [PATCH 035/275] [workflows/linux] Switch to sccache and Azure Blob Storage (#20484) * [workflows/linux] Switch to sccache and Azure Blob Storage * Install curl * Remove --show-config * Add sccache to other Linux workflows * sccache to Android, curl to riscv and CC * Use sccache action instead of manual install * Oops, missed sccache manual installation in two places * Use env vars instead of hardcoded CMAKE_C(XX)_COMPILER_LAUNCHER * Forgot one more stage in Linux CC pipeline * Temporarily disable Blob Storage for RISC-V For some reason sccache has no effect on build time and show 0 hits and 0 compilation requests despite being in CMake calls * forgot to add sccache installation to Linux CC * Revert "Temporarily disable Blob Storage for RISC-V" This reverts commit b528f41dad583a38b9ef93121e38044b9dccb71b. * Missing container option for CC build * Remove curl installation * Remove CCACHE* variables which have no effect on sccache * Revert sccache changes for Linux RISC-V workflow --- .github/workflows/android_arm64.yml | 19 +++--- .github/workflows/fedora.yml | 18 +++--- .github/workflows/linux.yml | 59 ++++++++++++------- .../linux_conditional_compilation.yml | 33 +++++++---- .github/workflows/webassembly.yml | 21 +++---- 5 files changed, 90 insertions(+), 60 deletions(-) diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index e4360eb08d3850..fb4b36c69f5a55 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -35,11 +35,12 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_GENERATOR: 'Ninja' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache OPENVINO_REPO: '/__w/openvino/openvino/openvino' VCPKG_ROOT: '/__w/openvino/openvino/vcpkg' BUILD_DIR: '/__w/openvino/openvino/build' @@ -49,10 +50,7 @@ jobs: ANDROID_ABI_CONFIG: arm64-v8a VCPKG_DEFAULT_BINARY_CACHE: '/mount/caches/ccache/android_arm64/vcpkg_cache' VCPKG_FORCE_SYSTEM_BINARIES: '1' - CCACHE_DIR: '/mount/caches/ccache/android_arm64' - CCACHE_TEMPDIR: '/__w/openvino/openvino/ccache_temp' - CCACHE_COMPILERCHECK: 'content' - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: android_arm64 steps: - name: Install git run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates @@ -102,6 +100,11 @@ jobs: unzip commandlinetools-linux-7583922_latest.zip echo "yes" | ./cmdline-tools/bin/sdkmanager --sdk_root=${ANDROID_TOOLS} --install "ndk-bundle" "platform-tools" "platforms;android-${{ env.ANDROID_SDK_VERSION }}" + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + # # Build # @@ -138,10 +141,10 @@ jobs: -B ${BUILD_DIR} - name: Clean ccache stats - run: ccache --zero-stats --show-config + run: sccache --zero-stats - name: Cmake - build run: cmake --build ${BUILD_DIR} --parallel - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index a554dfa98b462b..f398b1a3623fc1 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -36,19 +36,18 @@ jobs: image: fedora:33 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino INSTALL_DIR: /__w/openvino/openvino/openvino_install INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install BUILD_DIR: /__w/openvino/openvino/openvino_build - CCACHE_DIR: /mount/caches/ccache/fedora33_x86_64_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: fedora33_x86_64_Release steps: - name: Install git run: yum update -y && yum install -y git @@ -66,6 +65,11 @@ jobs: - name: Install build dependencies run: bash ${OPENVINO_REPO}/install_build_dependencies.sh + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - name: Install python dependencies run: | python3 -m pip install -U pip @@ -112,8 +116,8 @@ jobs: - name: Cmake build - OpenVINO run: cmake --build ${BUILD_DIR} --parallel --verbose - - name: Show ccache stats - run: ccache --show-stats + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats - name: Cmake install - OpenVINO run: | diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 639eca9957928d..9050ab3d161509 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -43,12 +43,13 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib @@ -56,9 +57,7 @@ jobs: INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install DEVELOPER_PACKAGE_DIR: /__w/openvino/openvino/developer_package_install BUILD_DIR: /__w/openvino/openvino/openvino_build - CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release ONNX_RUNTIME_UTILS: /__w/openvino/openvino/openvino/.ci/azure/ci_utils/onnxruntime steps: @@ -92,6 +91,11 @@ jobs: # libssl1.1 - 'python3 -m pip' in self-hosted runner apt install --assume-yes --no-install-recommends default-jdk libssl1.1 + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} @@ -143,14 +147,14 @@ jobs: -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} - - name: Clean ccache stats - run: ccache --zero-stats --show-config + - name: Clean sccache stats + run: sccache --zero-stats - name: Cmake build - OpenVINO run: cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} - - name: Show ccache stats - run: ccache --show-stats + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats - name: Cmake install - OpenVINO run: | @@ -497,16 +501,15 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache OPENVINO_REPO: /__w/openvino/openvino/openvino INSTALL_DIR: /__w/openvino/openvino/install - CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_onnxruntime - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_onnxruntime ONNX_RUNTIME_REPO: /__w/openvino/openvino/onnxruntime ONNX_RUNTIME_UTILS: /__w/openvino/openvino/install/onnxruntime ONNX_RUNTIME_BUILD_DIR: /__w/openvino/openvino/onnxruntime/build @@ -561,6 +564,11 @@ jobs: - name: Install Build Dependencies run: bash ${OPENVINO_REPO}/install_build_dependencies.sh + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - name: Build Lin ONNX Runtime run: | source ${INSTALL_DIR}/setupvars.sh @@ -576,6 +584,9 @@ jobs: env: CXXFLAGS: "-Wno-error=deprecated-declarations" + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats + - name: Run onnxruntime_test_all run: | source ${INSTALL_DIR}/setupvars.sh @@ -1346,21 +1357,20 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/nvidia/cuda:11.8.0-runtime-ubuntu20.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CUDA_COMPILER_LAUNCHER: ccache - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CUDA_COMPILER_LAUNCHER: sccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache INSTALL_DIR: /__w/openvino/openvino/install OPENVINO_DEVELOPER_PACKAGE: /__w/openvino/openvino/install/developer_package OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_CONTRIB_REPO: /__w/openvino/openvino/openvino_contrib NVIDIA_BUILD_DIR: /__w/openvino/openvino/nvidia_plugin_build DEBIAN_FRONTEND: 'noninteractive' - CCACHE_DIR: /mount/caches/ccache/ubuntu20_x86_64_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release steps: - name: Install Prerequisites @@ -1417,7 +1427,12 @@ jobs: - name: Install build dependencies run: | ${OPENVINO_REPO}/install_build_dependencies.sh - apt -y --no-install-recommends install software-properties-common + apt -y --no-install-recommends install software-properties-common curl + + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" - name: Install CUDA run: | @@ -1452,4 +1467,4 @@ jobs: cmake --build ${NVIDIA_BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --verbose -- ov_nvidia_func_tests ov_nvidia_unit_tests - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index cb4470ef496606..15acba9d441696 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -39,21 +39,20 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino INSTALL_DIR: /__w/openvino/openvino/openvino_install BUILD_DIR: /__w/openvino/openvino/openvino_build SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat MODELS_PATH: /__w/openvino/openvino/testdata - CCACHE_DIR: /mount/caches/ccache/ubuntu22_x86_64_itt_clang_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 20G + SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_itt_clang_Release steps: - name: Install git @@ -88,6 +87,11 @@ jobs: update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100 update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 100 + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} @@ -141,8 +145,8 @@ jobs: cmake --build ${BUILD_DIR} --parallel 8 --config ${{ env.CMAKE_BUILD_TYPE }} cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib - - name: Show ccache stats - run: ccache --show-stats + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats - name: Cmake install - OpenVINO run: cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake @@ -205,18 +209,17 @@ jobs: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04 volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache GITHUB_WORKSPACE: '/__w/openvino/openvino' OPENVINO_REPO: /__w/openvino/openvino/openvino BUILD_DIR: /__w/openvino/openvino/openvino_build SELECTIVE_BUILD_STAT_DIR: /__w/openvino/openvino/selective_build_stat MODELS_PATH: /__w/openvino/openvino/testdata - CCACHE_DIR: /mount/caches/ccache/ubuntu22_x86_64_cc_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 20G + SCCACHE_AZURE_KEY_PREFIX: ubuntu22_x86_64_cc_Release steps: - name: Install git @@ -252,6 +255,10 @@ jobs: - name: Install build dependencies run: bash ${OPENVINO_REPO}/install_build_dependencies.sh + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" # # Build # @@ -281,7 +288,7 @@ jobs: run: cmake --build ${BUILD_DIR} --parallel 8 --target benchmark_app - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats - name: Run with CC-ed runtime run: ${OPENVINO_REPO}/bin/intel64/Release/benchmark_app -niter 1 -nireq 1 -m ${MODELS_PATH}/models/test_model/test_model_fp32.xml -d CPU diff --git a/.github/workflows/webassembly.yml b/.github/workflows/webassembly.yml index 1cedaa107bf63d..0dc685275d2f84 100644 --- a/.github/workflows/webassembly.yml +++ b/.github/workflows/webassembly.yml @@ -36,15 +36,14 @@ jobs: image: emscripten/emsdk volumes: - /mount/caches:/mount/caches + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING env: CMAKE_BUILD_TYPE: 'Release' - CMAKE_CXX_COMPILER_LAUNCHER: ccache - CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache OPENVINO_REPO: /__w/openvino/openvino/openvino OPENVINO_BUILD_DIR: /__w/openvino/openvino/openvino_build - CCACHE_DIR: /mount/caches/ccache/webassembly_Release - CCACHE_TEMPDIR: /__w/openvino/openvino/ccache_temp - CCACHE_MAXSIZE: 50G + SCCACHE_AZURE_KEY_PREFIX: webassembly_Release steps: - name: Install git run: apt-get update && apt-get install --assume-yes --no-install-recommends git ca-certificates @@ -55,8 +54,10 @@ jobs: path: 'openvino' submodules: 'true' - - name: Install ccache - run: apt-get install --assume-yes --no-install-recommends ccache + - name: Install sccache + uses: mozilla-actions/sccache-action@v0.0.3 + with: + version: "v0.5.4" - name: emcmake cmake - configure run: | @@ -64,8 +65,8 @@ jobs: -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations" \ -DCMAKE_C_FLAGS="-Wno-deprecated-declarations" \ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ + -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -S ${OPENVINO_REPO} \ -B ${OPENVINO_BUILD_DIR} @@ -74,4 +75,4 @@ jobs: run: emmake make -j$(nproc) hello_query_device -C ${OPENVINO_BUILD_DIR} - name: Show ccache stats - run: ccache --show-stats + run: ${SCCACHE_PATH} --show-stats From 2b6585579357c91902f319024e7147e0b4a0ec41 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 25 Oct 2023 05:25:05 +0200 Subject: [PATCH 036/275] [core]Migrate Less and Greater operators to new API (#20628) * Migrate Less operator to new API * Migrate Greater operator to new API - use less implementation in greater to reduce bin size --------- Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/greater.hpp | 4 +- src/core/include/openvino/op/less.hpp | 4 +- .../include/openvino/reference/greater.hpp | 31 +++-- .../include/openvino/reference/less.hpp | 35 ++++-- src/core/src/op/greater.cpp | 109 +++++++++--------- src/core/src/op/less.cpp | 107 ++++++++--------- 6 files changed, 143 insertions(+), 147 deletions(-) diff --git a/src/core/include/openvino/op/greater.hpp b/src/core/include/openvino/op/greater.hpp index de889a0acae370..daf38ed8c9d16a 100644 --- a/src/core/include/openvino/op/greater.hpp +++ b/src/core/include/openvino/op/greater.hpp @@ -26,9 +26,7 @@ class OPENVINO_API Greater : public util::BinaryElementwiseComparison { const AutoBroadcastSpec& auto_broadcast = AutoBroadcastSpec(AutoBroadcastType::NUMPY)); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/include/openvino/op/less.hpp b/src/core/include/openvino/op/less.hpp index 3d87ab9b6ffbe3..f57ebbc07bf2e8 100644 --- a/src/core/include/openvino/op/less.hpp +++ b/src/core/include/openvino/op/less.hpp @@ -26,9 +26,7 @@ class OPENVINO_API Less : public util::BinaryElementwiseComparison { const AutoBroadcastSpec& auto_broadcast = AutoBroadcastSpec(AutoBroadcastType::NUMPY)); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/greater.hpp b/src/core/reference/include/openvino/reference/greater.hpp index 2dff5e6c4899b8..98f1ecfff5186a 100644 --- a/src/core/reference/include/openvino/reference/greater.hpp +++ b/src/core/reference/include/openvino/reference/greater.hpp @@ -4,25 +4,26 @@ #pragma once -#include - -#include "openvino/core/shape.hpp" -#include "openvino/op/util/attr_types.hpp" -#include "openvino/reference/autobroadcast_binop.hpp" +#include "openvino/reference/less.hpp" namespace ov { namespace reference { + template -void greater(const T* arg0, - const T* arg1, - char* out, - size_t count) // TODO: using char for bool, is this right? -{ - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] > arg1[i]; - } +void greater(const T* arg0, const T* arg1, char* out, size_t count) { + less(arg1, arg0, out, count); } +/** + * @brief Reference implementation of binary elementwise Greater operator. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg0_shape Input 0 shape. + * @param arg1_shape Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ template void greater(const T* arg0, const T* arg1, @@ -30,9 +31,7 @@ void greater(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U { - return static_cast(x > y); - }); + less(arg1, arg0, out, arg1_shape, arg0_shape, broadcast_spec); } } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/less.hpp b/src/core/reference/include/openvino/reference/less.hpp index 21d2321f5664f6..6ab5f41f6d06a5 100644 --- a/src/core/reference/include/openvino/reference/less.hpp +++ b/src/core/reference/include/openvino/reference/less.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include "openvino/core/shape.hpp" #include "openvino/op/util/attr_types.hpp" @@ -12,17 +12,30 @@ namespace ov { namespace reference { +namespace func { +// Use custom implementation as function instead std::less functor, gives smaller binary size. +// If removed or replace check impact on library binary size. +template +constexpr bool less(const T lhs, const T rhs) { + return lhs < rhs; +} +} // namespace func + template -void less(const T* arg0, - const T* arg1, - char* out, - size_t count) // TODO: using char for bool, is this right? -{ - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] < arg1[i]; - } +void less(const T* arg0, const T* arg1, char* out, const size_t count) { + std::transform(arg0, std::next(arg0, count), arg1, out, func::less); } +/** + * @brief Reference implementation of binary elementwise Less operator. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg0_shape Input 0 shape. + * @param arg1_shape Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ template void less(const T* arg0, const T* arg1, @@ -30,9 +43,7 @@ void less(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U { - return static_cast(x < y); - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::less); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/greater.cpp b/src/core/src/op/greater.cpp index dfc838c2f9c795..76715745a5fba7 100644 --- a/src/core/src/op/greater.cpp +++ b/src/core/src/op/greater.cpp @@ -2,86 +2,81 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/greater.hpp" +#include "openvino/op/greater.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/greater.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace greater { -OPENVINO_SUPPRESS_DEPRECATED_START -namespace greaterop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::greater(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_greater(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_greater, boolean, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in0, + const Tensor& in1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::greater(in0.data(), + in1.data(), + out.data>(), + shape0, + shape1, + broadcast_spec); + return true; } - return rc; -} -} // namespace -} // namespace greaterop +}; +} // namespace greater //-------------------------------------- v1 ------------------------------------ - -op::v1::Greater::Greater(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +Greater::Greater(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseComparison(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Greater::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Greater::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Greater_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::Greater::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Greater::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Greater_evaluate); - return greaterop::evaluate_greater(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::Greater::has_evaluate() const { +bool Greater::has_evaluate() const { OV_OP_SCOPE(v1_Greater_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/less.cpp b/src/core/src/op/less.cpp index aada1ff872d481..910876c3a58853 100644 --- a/src/core/src/op/less.cpp +++ b/src/core/src/op/less.cpp @@ -2,85 +2,80 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/less.hpp" +#include "openvino/op/less.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/less.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace less { -OPENVINO_SUPPRESS_DEPRECATED_START -namespace lessop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::less(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_less(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_less, boolean, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in0, + const Tensor& in1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::less(in0.data(), + in1.data(), + out.data>(), + shape0, + shape1, + broadcast_spec); + return true; } - return rc; -} -} // namespace -} // namespace lessop +}; +} // namespace less // ----------------------------- v1 -------------------------------------------- -op::v1::Less::Less(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +Less::Less(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseComparison(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Less::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Less::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Less_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::Less::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Less::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Less_evaluate); - return lessop::evaluate_less(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::Less::has_evaluate() const { +bool Less::has_evaluate() const { OV_OP_SCOPE(v1_Less_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov From 5fee2ef67eefea29953837ef149dd281485f3adb Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 25 Oct 2023 05:26:31 +0200 Subject: [PATCH 037/275] Migrate LessEqual and GreaterEqual to new API (#20645) Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/greater_eq.hpp | 4 +- src/core/include/openvino/op/less_eq.hpp | 4 +- .../include/openvino/reference/greater_eq.hpp | 20 +-- .../include/openvino/reference/less_eq.hpp | 35 ++++-- src/core/src/op/greater_eq.cpp | 117 ++++++++---------- src/core/src/op/less_eq.cpp | 117 ++++++++---------- 6 files changed, 137 insertions(+), 160 deletions(-) diff --git a/src/core/include/openvino/op/greater_eq.hpp b/src/core/include/openvino/op/greater_eq.hpp index 1f5fe1f984c95d..2eddf32444ffde 100644 --- a/src/core/include/openvino/op/greater_eq.hpp +++ b/src/core/include/openvino/op/greater_eq.hpp @@ -26,9 +26,7 @@ class OPENVINO_API GreaterEqual : public util::BinaryElementwiseComparison { const AutoBroadcastSpec& auto_broadcast = AutoBroadcastSpec(AutoBroadcastType::NUMPY)); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/include/openvino/op/less_eq.hpp b/src/core/include/openvino/op/less_eq.hpp index 111f4c07140af5..f11fe2d7a77232 100644 --- a/src/core/include/openvino/op/less_eq.hpp +++ b/src/core/include/openvino/op/less_eq.hpp @@ -27,9 +27,7 @@ class OPENVINO_API LessEqual : public util::BinaryElementwiseComparison { const AutoBroadcastSpec& auto_broadcast = AutoBroadcastSpec(AutoBroadcastType::NUMPY)); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/greater_eq.hpp b/src/core/reference/include/openvino/reference/greater_eq.hpp index 79f66e3280fdd5..6b8f5759c41f60 100644 --- a/src/core/reference/include/openvino/reference/greater_eq.hpp +++ b/src/core/reference/include/openvino/reference/greater_eq.hpp @@ -4,23 +4,13 @@ #pragma once -#include - -#include "openvino/core/shape.hpp" -#include "openvino/op/util/attr_types.hpp" -#include "openvino/reference/autobroadcast_binop.hpp" +#include "openvino/reference/less_eq.hpp" namespace ov { namespace reference { template -void greater_eq(const T* arg0, - const T* arg1, - char* out, - size_t count) // TODO: using char for bool, is this right? -{ - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] >= arg1[i]; - } +void greater_eq(const T* arg0, const T* arg1, char* out, size_t count) { + less_eq(arg1, arg0, out, count); } template @@ -30,9 +20,7 @@ void greater_eq(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U { - return static_cast(x >= y); - }); + less_eq(arg1, arg0, out, arg1_shape, arg0_shape, broadcast_spec); } } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/less_eq.hpp b/src/core/reference/include/openvino/reference/less_eq.hpp index d4ab3c2775bea6..d5442620c45baf 100644 --- a/src/core/reference/include/openvino/reference/less_eq.hpp +++ b/src/core/reference/include/openvino/reference/less_eq.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include "openvino/core/shape.hpp" #include "openvino/op/util/attr_types.hpp" @@ -12,17 +12,30 @@ namespace ov { namespace reference { +namespace func { +// Use custom implementation as function instead std::less_equal functor, gives smaller binary size. +// If removed or replace check impact on library binary size. +template +constexpr bool less_eq(const T lhs, const T rhs) { + return lhs <= rhs; +} +} // namespace func + template -void less_eq(const T* arg0, - const T* arg1, - char* out, - size_t count) // TODO: using char for bool, is this right? -{ - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] <= arg1[i]; - } +void less_eq(const T* arg0, const T* arg1, char* out, const size_t count) { + std::transform(arg0, std::next(arg0, count), arg1, out, func::less_eq); } +/** + * @brief Reference implementation of binary elementwise LessEqual operator. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg0_shape Input 0 shape. + * @param arg1_shape Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ template void less_eq(const T* arg0, const T* arg1, @@ -30,9 +43,7 @@ void less_eq(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U { - return static_cast(x <= y); - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::less_eq); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/greater_eq.cpp b/src/core/src/op/greater_eq.cpp index 0eb36149de2083..a3bd099262a6b3 100644 --- a/src/core/src/op/greater_eq.cpp +++ b/src/core/src/op/greater_eq.cpp @@ -2,92 +2,81 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/greater_eq.hpp" +#include "openvino/op/greater_eq.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/reference/greater_eq.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace greater_equal { -OPENVINO_SUPPRESS_DEPRECATED_START -namespace greater_equalop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::greater_eq(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_greater_equal(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_greater_equal, boolean, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater_equal, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater_equal, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater_equal, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater_equal, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater_equal, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_greater_equal, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in0, + const Tensor& in1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::greater_eq(in0.data(), + in1.data(), + out.data>(), + shape0, + shape1, + broadcast_spec); + return true; } - return rc; -} -} // namespace -} // namespace greater_equalop +}; +} // namespace greater_equal -//---------------------------------- v1 ---------------------------------------- - -op::v1::GreaterEqual::GreaterEqual(const Output& arg0, - const Output& arg1, - const AutoBroadcastSpec& auto_broadcast) +//-------------------------------------- v1 ------------------------------------ +namespace v1 { +GreaterEqual::GreaterEqual(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseComparison(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::GreaterEqual::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr GreaterEqual::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_GreaterEqual_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::GreaterEqual::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool GreaterEqual::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_GreaterEqual_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2)); - OPENVINO_SUPPRESS_DEPRECATED_END - return greater_equalop::evaluate_greater_equal(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::GreaterEqual::has_evaluate() const { +bool GreaterEqual::has_evaluate() const { OV_OP_SCOPE(v1_GreaterEqual_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/less_eq.cpp b/src/core/src/op/less_eq.cpp index ff15661fd88d6d..76c94ad91cba65 100644 --- a/src/core/src/op/less_eq.cpp +++ b/src/core/src/op/less_eq.cpp @@ -2,88 +2,81 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/less_eq.hpp" +#include "openvino/op/less_eq.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/less_eq.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace less_equal { -// ---------------------------------- v1 --------------------------------------- +struct Evaluate : element::NoAction { + using element::NoAction::visit; -op::v1::LessEqual::LessEqual(const Output& arg0, - const Output& arg1, - const AutoBroadcastSpec& auto_broadcast) + template > + static result_type visit(const Tensor& in0, + const Tensor& in1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::less_eq(in0.data(), + in1.data(), + out.data>(), + shape0, + shape1, + broadcast_spec); + return true; + } +}; +} // namespace less_equal + +// ----------------------------- v1 -------------------------------------------- +namespace v1 { +LessEqual::LessEqual(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseComparison(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::LessEqual::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr LessEqual::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_LessEqual_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace less_equalop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::less_eq(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} - -bool evaluate_less_equal(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_less_equal, boolean, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less_equal, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less_equal, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less_equal, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less_equal, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less_equal, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_less_equal, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; - } - return rc; + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -} // namespace -} // namespace less_equalop -bool op::v1::LessEqual::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool LessEqual::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_LessEqual_evaluate); - return less_equalop::evaluate_less_equal(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::LessEqual::has_evaluate() const { +bool LessEqual::has_evaluate() const { OV_OP_SCOPE(v1_LessEqual_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov From 6fa4f9fd78bdc4a19ee733ca3f6924c60da43080 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Wed, 25 Oct 2023 12:26:36 +0900 Subject: [PATCH 038/275] [GPU] fixed to create Graphs with different stream_ids (#20626) * fixed to create Graphs with different stream_ids * added num_streams config for auto batch test --- src/plugins/intel_gpu/src/plugin/compiled_model.cpp | 2 +- .../behavior/plugin/configuration_tests.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index fc04e35748fe6c..60d9a66bca3122 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -169,7 +169,7 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer ib, auto pos = ib.tellg(); for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { ib.seekg(pos); - auto graph = std::make_shared(ib, context, m_config, 0); + auto graph = std::make_shared(ib, context, m_config, n); m_graphs.push_back(graph); } } diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp index 623246fafe0b1d..64af95f132bb51 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp @@ -68,6 +68,8 @@ namespace { {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), ov::test::utils::DEVICE_GPU}}, {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), ov::test::utils::DEVICE_GPU}, {CONFIG_KEY(AUTO_BATCH_TIMEOUT), "1"}}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), ov::test::utils::DEVICE_GPU}, + {ov::num_streams.name(), "AUTO"}}, }; }; From 84732515b42c28fcb3b4986fe89bf10333f4afdb Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 25 Oct 2023 05:27:21 +0200 Subject: [PATCH 039/275] Fix check repeats in values in Tile (#20654) - no action if any of repeats is zero Co-authored-by: Michal Lukaszewski --- src/core/reference/src/op/tile.cpp | 2 +- .../template/tests/functional/op_reference/tile.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/core/reference/src/op/tile.cpp b/src/core/reference/src/op/tile.cpp index 0a3132e32b4807..95162f62e7e1d4 100644 --- a/src/core/reference/src/op/tile.cpp +++ b/src/core/reference/src/op/tile.cpp @@ -26,7 +26,7 @@ void tile(const char* arg, const Shape& out_shape, const size_t elem_size, const std::vector& repeats) { - if (std::all_of(repeats.begin(), repeats.end(), [](int64_t repeat) { + if (std::any_of(repeats.begin(), repeats.end(), [](int64_t repeat) { return repeat == 0; })) { return; diff --git a/src/plugins/template/tests/functional/op_reference/tile.cpp b/src/plugins/template/tests/functional/op_reference/tile.cpp index 6caa3d64154e95..4cb0a002327696 100644 --- a/src/plugins/template/tests/functional/op_reference/tile.cpp +++ b/src/plugins/template/tests/functional/op_reference/tile.cpp @@ -102,6 +102,14 @@ std::vector generateParams() { reference_tests::Tensor(ET_INT, {2}, std::vector{2, 1}), reference_tests::Tensor(ET, {2, 2, 3}, std::vector{1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6}), "tile_3d_to_3d_repeats_broadcast"), + TileParams(reference_tests::Tensor(ET, {1}, std::vector{1}), + reference_tests::Tensor(ET_INT, {3}, std::vector{0, 2, 3}), + reference_tests::Tensor(ET, {0}, std::vector{}), + "tile_1d_to_3d_with_zero_on_axis_0"), + TileParams(reference_tests::Tensor(ET, {3}, std::vector{1, 2, 3}), + reference_tests::Tensor(ET_INT, {3}, std::vector{2, 0, 3}), + reference_tests::Tensor(ET, {0}, std::vector{}), + "tile_1d_to_3d_with_zero_on_axis_1"), }; return params; } From 973b194776ceec9e15d29135feb21eeacb9ae817 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Wed, 25 Oct 2023 07:10:54 +0200 Subject: [PATCH 040/275] [core] Migrate DepthToSpace operator to new API (#20515) * Move into ov namespace * Use ov::Tensor in place of HostTensor --------- Co-authored-by: Michal Lukaszewski --- .../include/openvino/op/depth_to_space.hpp | 4 +- src/core/src/op/depth_to_space.cpp | 85 ++++++++----------- 2 files changed, 37 insertions(+), 52 deletions(-) diff --git a/src/core/include/openvino/op/depth_to_space.hpp b/src/core/include/openvino/op/depth_to_space.hpp index 802eddbd665d4c..c7b946e71e998a 100644 --- a/src/core/include/openvino/op/depth_to_space.hpp +++ b/src/core/include/openvino/op/depth_to_space.hpp @@ -55,9 +55,7 @@ class OPENVINO_API DepthToSpace : public Op { } std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; void validate_and_infer_types() override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; protected: diff --git a/src/core/src/op/depth_to_space.cpp b/src/core/src/op/depth_to_space.cpp index ce3003ed83f600..e914f83d4a88d4 100644 --- a/src/core/src/op/depth_to_space.cpp +++ b/src/core/src/op/depth_to_space.cpp @@ -2,46 +2,45 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/depth_to_space.hpp" +#include "openvino/op/depth_to_space.hpp" #include #include -#include #include -#include -#include +#include "depth_to_space_shape_inference.hpp" #include "itt.hpp" -#include "ngraph/shape.hpp" +#include "openvino/core/shape.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/reference/depth_to_space.hpp" -using namespace ngraph; - -op::DepthToSpace::DepthToSpace(const Output& data, const DepthToSpaceMode& mode, const size_t block_size) +namespace ov { +namespace op { +namespace v0 { +DepthToSpace::DepthToSpace(const Output& data, const DepthToSpaceMode& mode, const size_t block_size) : Op({data}), m_blocksize(block_size), m_mode(mode) { constructor_validate_and_infer_types(); } -op::DepthToSpace::DepthToSpace(const Output& data, const std::string& mode, const size_t block_size) +DepthToSpace::DepthToSpace(const Output& data, const std::string& mode, const size_t block_size) : DepthToSpace(data, as_enum(mode), block_size) {} -bool op::DepthToSpace::visit_attributes(AttributeVisitor& visitor) { +bool DepthToSpace::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_DepthToSpace_visit_attributes); visitor.on_attribute("block_size", m_blocksize); visitor.on_attribute("mode", m_mode); return true; } -std::shared_ptr op::DepthToSpace::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr DepthToSpace::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_DepthToSpace_clone_with_new_inputs); check_new_args_count(this, new_args); return std::make_shared(new_args.at(0), m_mode, m_blocksize); } -void op::DepthToSpace::validate_and_infer_types() { +void DepthToSpace::validate_and_infer_types() { OV_OP_SCOPE(v0_DepthToSpace_validate_and_infer_types); OPENVINO_SUPPRESS_DEPRECATED_START @@ -50,60 +49,48 @@ void op::DepthToSpace::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), output_shape); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace { -bool evaluate_depth_to_space(const HostTensorVector& outputs, - const HostTensorVector& inputs, - const std::size_t block_size, - const op::DepthToSpace::DepthToSpaceMode mode) { +bool DepthToSpace::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_DepthToSpace_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + const auto& in = inputs[0]; const auto& out = outputs[0]; - const size_t elem_size = in->get_element_type().size(); - if (in->get_partial_shape().is_dynamic()) { - return false; - } - ov::reference::depth_to_space(in->get_data_ptr(), - in->get_shape(), - out->get_data_ptr(), - out->get_shape(), - block_size, - mode, - elem_size); + reference::depth_to_space(static_cast(in.data()), + in.get_shape(), + static_cast(out.data()), + out.get_shape(), + m_blocksize, + m_mode, + in.get_element_type().size()); return true; } -} // namespace - -bool op::DepthToSpace::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_DepthToSpace_evaluate); - return evaluate_depth_to_space(outputs, inputs, m_blocksize, m_mode); -} -OPENVINO_SUPPRESS_DEPRECATED_END -bool op::DepthToSpace::has_evaluate() const { +bool DepthToSpace::has_evaluate() const { OV_OP_SCOPE(v0_DepthToSpace_has_evaluate); return !get_input_partial_shape(0).is_dynamic(); } -std::ostream& ov::operator<<(std::ostream& s, const ov::op::v0::DepthToSpace::DepthToSpaceMode& type) { - return s << as_string(type); -} - -void op::v0::DepthToSpace::set_block_size(size_t block_size) { +void DepthToSpace::set_block_size(size_t block_size) { m_blocksize = block_size; } -void op::v0::DepthToSpace::set_mode(DepthToSpaceMode mode) { +void DepthToSpace::set_mode(DepthToSpaceMode mode) { m_mode = mode; } +} // namespace v0 +} // namespace op + +std::ostream& operator<<(std::ostream& s, const op::v0::DepthToSpace::DepthToSpaceMode& type) { + return s << as_string(type); +} -namespace ov { template <> -NGRAPH_API EnumNames& -EnumNames::get() { - static auto enum_names = EnumNames( +OPENVINO_API EnumNames& +EnumNames::get() { + static auto enum_names = EnumNames( "op::DepthToSpace::DepthToSpaceMode", - {{"blocks_first", ngraph::op::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST}, - {"depth_first", ngraph::op::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST}}); + {{"blocks_first", op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST}, + {"depth_first", op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST}}); return enum_names; } } // namespace ov From 30260e3c7c10aff56c6f311b4dc8b2e8a2842c27 Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Wed, 25 Oct 2023 07:37:29 +0200 Subject: [PATCH 041/275] =?UTF-8?q?Refa=D1=81tor=20ActivationLayerTest=20(?= =?UTF-8?q?#20180)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Refator ActivationLayerTest --- .../single_layer_tests/activation.cpp | 197 ++++++------- .../skip_tests_config.cpp | 2 + .../include/single_op_tests/activation.hpp | 29 ++ .../shared_test_classes/base/utils/ranges.hpp | 89 +++++- .../single_op/activation.hpp | 93 ++++++ .../src/base/utils/generate_inputs.cpp | 269 +++--------------- .../src/single_op/activation.cpp | 139 +++++++++ 7 files changed, 482 insertions(+), 336 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/activation.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/activation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp index 614ec6c66d60ec..fcf598f54cd400 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp @@ -5,144 +5,129 @@ #include #include "common_test_utils/test_enums.hpp" -#include "single_layer_tests/activation.hpp" +#include "single_op_tests/activation.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ov::test::utils; namespace { -// Common params -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32 - // TODO: Fix Issue-27390 - // InferenceEngine::Precision::I16, - // InferenceEngine::Precision::U8 -}; - -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 -}; +using ov::test::ActivationLayerTest; +using ov::test::ActivationParamLayerTest; +using ov::test::utils::ActivationTypes; -const std::vector intPrecisions = { - InferenceEngine::Precision::I32, +const std::vector model_types = { + ov::element::f32, + ov::element::f16 }; -const std::map>> activationTypes = { - {Sigmoid, {}}, - {Tan, {}}, - {Tanh, {}}, - {Relu, {}}, - {Exp, {}}, - {Log, {}}, - {Sign, {}}, - {Abs, {}}, - {Clamp, {{-2.0f, 2.0f}}}, - {Negative, {}}, - {Acos, {}}, - {Acosh, {}}, - {Asin, {}}, - {Asinh, {}}, - {Atan, {}}, - {Atanh, {}}, - {Cos, {}}, - {Cosh, {}}, - {Floor, {}}, - {Sin, {}}, - {Sinh, {}}, - {Sqrt, {}}, - {Elu, {{0.1f}}}, - {Erf, {}}, - {HardSigmoid, {{0.2f, 0.5f}}}, - {Selu, {{1.6732f, 1.0507f}}}, - {Ceiling, {}}, - {Mish, {}}, - {HSwish, {}}, - {SoftPlus, {}}, - {HSigmoid, {}}, - {RoundHalfToEven, {}}, - {RoundHalfAwayFromZero, {}}, - {GeluErf, {}}, - {GeluTanh, {}}, - {Swish, {{0.4f}}} +const std::map>> activationTypes = { + {ActivationTypes::Sigmoid, {}}, + {ActivationTypes::Tan, {}}, + {ActivationTypes::Tanh, {}}, + {ActivationTypes::Relu, {}}, + {ActivationTypes::Exp, {}}, + {ActivationTypes::Log, {}}, + {ActivationTypes::Sign, {}}, + {ActivationTypes::Abs, {}}, + {ActivationTypes::Clamp, {{-2.0f, 2.0f}}}, + {ActivationTypes::Negative, {}}, + {ActivationTypes::Acos, {}}, + {ActivationTypes::Acosh, {}}, + {ActivationTypes::Asin, {}}, + {ActivationTypes::Asinh, {}}, + {ActivationTypes::Atan, {}}, + {ActivationTypes::Atanh, {}}, + {ActivationTypes::Cos, {}}, + {ActivationTypes::Cosh, {}}, + {ActivationTypes::Floor, {}}, + {ActivationTypes::Sin, {}}, + {ActivationTypes::Sinh, {}}, + {ActivationTypes::Sqrt, {}}, + {ActivationTypes::Elu, {{0.1f}}}, + {ActivationTypes::Erf, {}}, + {ActivationTypes::HardSigmoid, {{0.2f, 0.5f}}}, + {ActivationTypes::Selu, {{1.6732f, 1.0507f}}}, + {ActivationTypes::Ceiling, {}}, + {ActivationTypes::Mish, {}}, + {ActivationTypes::HSwish, {}}, + {ActivationTypes::SoftPlus, {}}, + {ActivationTypes::HSigmoid, {}}, + {ActivationTypes::RoundHalfToEven, {}}, + {ActivationTypes::RoundHalfAwayFromZero, {}}, + {ActivationTypes::GeluErf, {}}, + {ActivationTypes::GeluTanh, {}}, + {ActivationTypes::Swish, {{0.4f}}} }; // List of operations that should be tested also with integer precision const std::map>> intActivationTypes = { - {Acosh, {}}, - {Asinh, {}}, - {Atan, {}}, - {Negative, {}}, - {Ceiling, {}}, - {Cos, {}}, - {Cosh, {}}, - {Sign, {}}, - {Sinh, {}}, - {Sqrt, {}}, - {Tan, {}}, - {Tanh, {}}, + {ActivationTypes::Acosh, {}}, + {ActivationTypes::Asinh, {}}, + {ActivationTypes::Atan, {}}, + {ActivationTypes::Negative, {}}, + {ActivationTypes::Ceiling, {}}, + {ActivationTypes::Cos, {}}, + {ActivationTypes::Cosh, {}}, + {ActivationTypes::Sign, {}}, + {ActivationTypes::Sinh, {}}, + {ActivationTypes::Sqrt, {}}, + {ActivationTypes::Tan, {}}, + {ActivationTypes::Tanh, {}}, }; const std::map>> activationParamTypes = { - {PReLu, {{}}}, // Slope will be filled with increasing values from -10 to match slope input shape - {LeakyRelu, {{0.01f}}} + {ActivationTypes::PReLu, {{}}}, // Slope will be filled with increasing values from -10 to match slope input shape + {ActivationTypes::LeakyRelu, {{0.01f}}} }; -std::map, std::vector>> basic = { - {{1, 50}, {{}}}, - {{5, 128}, {{}}}, - {{2, 2, 2, 2, 2, 2, 2, 2}, {{}}}, +std::map, std::vector> basic_input_shapes_static = { + {{{1, 50}}, {}}, + {{{5, 128}}, {}}, + {{{2, 2, 2, 2, 2, 2, 2, 2}}, {}}, }; -std::map, std::vector>> preluBasic = { - {{1, 50}, {{1}, {50}}}, - {{1, 128}, {{1}, {128}}}, +std::map, std::vector> prelu_basic_input_shapes_static = { + {{{1, 50}}, {{1}, {50}}}, + {{{1, 128}}, {{1}, {128}}}, // Broadcast check - {{3, 2}, {{1}, {2}, {3, 2}}}, - {{3, 2, 5}, {{1}, {2}, {5}, {2, 5}, {3, 1, 5}, {1, 2, 1}, {1, 1, 5}, {3, 1, 1}, {3, 2, 5}}}, - {{2, 1, 2}, {{2}, {2, 1, 1}}}, - {{3, 2, 5, 7}, {{1}, {7}, {2}, {5, 7}, {2, 5, 7}, {2, 1, 1}, {1, 2, 1, 1}, {3, 2, 1, 1}, {3, 2, 5, 7}}}, - {{2, 2, 2, 2, 2, 2, 2, 2}, {{2}, {2, 2}, {2, 1, 1, 2}}}, + {{{3, 2}}, {{1}, {2}, {3, 2}}}, + {{{3, 2, 5}}, {{1}, {2}, {5}, {2, 5}, {3, 1, 5}, {1, 2, 1}, {1, 1, 5}, {3, 1, 1}, {3, 2, 5}}}, + {{{2, 1, 2}}, {{2}, {2, 1, 1}}}, + {{{3, 2, 5, 7}}, {{1}, {7}, {2}, {5, 7}, {2, 5, 7}, {2, 1, 1}, {1, 2, 1, 1}, {3, 2, 1, 1}, {3, 2, 5, 7}}}, + {{{2, 2, 2, 2, 2, 2, 2, 2}}, {{2}, {2, 2}, {2, 1, 1, 2}}}, +}; + +auto static_shapes_param_transform = [](const std::vector, ov::Shape>>& original_shapes) { + std::vector, ov::Shape>> new_shapes; + for (const auto& shape_element : original_shapes) { + new_shapes.emplace_back(ov::test::static_shapes_to_test_representation(shape_element.first), shape_element.second); + } + return new_shapes; }; -const auto basicCases = ::testing::Combine( +const auto basic_case_params = ::testing::Combine( ::testing::ValuesIn(ov::test::utils::combineParams(activationTypes)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(ov::test::utils::combineParams(basic)), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(static_shapes_param_transform(ov::test::utils::combineParams(basic_input_shapes_static))), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto basicPreluCases = ::testing::Combine( +const auto basic_prelu_cases_params = ::testing::Combine( ::testing::ValuesIn(ov::test::utils::combineParams(activationParamTypes)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(ov::test::utils::combineParams(preluBasic)), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(static_shapes_param_transform(ov::test::utils::combineParams(prelu_basic_input_shapes_static))), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto basicIntegerOperations = ::testing::Combine( +const auto basic_integer_operations_params = ::testing::Combine( ::testing::ValuesIn(ov::test::utils::combineParams(intActivationTypes)), - ::testing::ValuesIn(intPrecisions), - ::testing::ValuesIn(intPrecisions), - ::testing::ValuesIn(intPrecisions), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(ov::test::utils::combineParams(basic)), + ::testing::Values(ov::element::i32), + ::testing::ValuesIn(static_shapes_param_transform(ov::test::utils::combineParams(basic_input_shapes_static))), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_Activation_Basic, ActivationLayerTest, basicCases, ActivationLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Activation_Basic, ActivationDynamicLayerTest, basicCases, ActivationLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Integer_Activation_Basic, ActivationLayerTest, basicIntegerOperations, ActivationLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Activation_Basic, ActivationLayerTest, basic_case_params, ActivationLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Integer_Activation_Basic, ActivationLayerTest, basic_integer_operations_params, ActivationLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Activation_Basic_Prelu_Const, ActivationLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Activation_Basic_Prelu_Param, ActivationParamLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Activation_Basic_Prelu_Const, ActivationLayerTest, basic_prelu_cases_params, ActivationLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Activation_Basic_Prelu_Param, ActivationParamLayerTest, basic_prelu_cases_params, ActivationLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index e942043dd3fbf2..508e766728b24b 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -187,6 +187,8 @@ std::vector disabledTestPatterns() { R"(smoke_GroupConvBackpropData.*paddingDefined/GroupConvBackpropLayerTest.Inference.*f32.*)", // Issue: 122177 R"(smoke_LSTMSequenceCommon.*LSTMSequenceTest.Inference.*CONVERT_TO_TI.*)", + // Issue: 122081 + R"(smoke_Activation_Basic_Prelu_Const/ActivationLayerTest.Inference/.*_TS=\(3.2.5.7\).*)", // Issue: 122094 R"(smoke_Interpolate_Basic_Down_Sample_Tail/InterpolateLayerTest.Inference.*(asymmetric|align_corners).*f16.*)", // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling. diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/activation.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/activation.hpp new file mode 100644 index 00000000000000..9b1668facb3971 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/activation.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/activation.hpp" + +namespace ov { +namespace test { + +TEST_P(ActivationLayerTest, Inference) { + run(); +} + +TEST_P(ActivationParamLayerTest, Inference) { + run(); +} + +TEST_P(ActivationLayerTest, QueryModel) { + query_model(); +} + +TEST_P(ActivationParamLayerTest, QueryModel) { + query_model(); +} + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp index 4315fd9ef5d5a6..339c2a98845d54 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp @@ -4,6 +4,9 @@ #pragma once +#include +#include + #include "ngraph/node.hpp" #include "ngraph/op/proposal.hpp" #include "ngraph/op/power.hpp" @@ -26,9 +29,44 @@ #include "openvino/op/logical_or.hpp" #include "openvino/op/logical_xor.hpp" #include "openvino/op/logical_not.hpp" - -#include -#include +#include "openvino/op/abs.hpp" +#include "openvino/op/acos.hpp" +#include "openvino/op/acosh.hpp" +#include "openvino/op/asin.hpp" +#include "openvino/op/asinh.hpp" +#include "openvino/op/atan.hpp" +#include "openvino/op/atanh.hpp" +#include "openvino/op/ceiling.hpp" +#include "openvino/op/clamp.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/cos.hpp" +#include "openvino/op/cosh.hpp" +#include "openvino/op/elu.hpp" +#include "openvino/op/erf.hpp" +#include "openvino/op/exp.hpp" +#include "openvino/op/floor.hpp" +#include "openvino/op/gelu.hpp" +#include "openvino/op/hard_sigmoid.hpp" +#include "openvino/op/hsigmoid.hpp" +#include "openvino/op/hswish.hpp" +#include "openvino/op/log.hpp" +#include "openvino/op/mish.hpp" +#include "openvino/op/negative.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/prelu.hpp" +#include "openvino/op/relu.hpp" +#include "openvino/op/round.hpp" +#include "openvino/op/selu.hpp" +#include "openvino/op/sigmoid.hpp" +#include "openvino/op/sign.hpp" +#include "openvino/op/sin.hpp" +#include "openvino/op/sinh.hpp" +#include "openvino/op/softplus.hpp" +#include "openvino/op/softsign.hpp" +#include "openvino/op/sqrt.hpp" +#include "openvino/op/swish.hpp" +#include "openvino/op/tan.hpp" +#include "openvino/op/tanh.hpp" namespace ov { namespace test { @@ -94,6 +132,51 @@ static std::map>> i { ov::op::v1::LogicalOr::get_type_info_static(), {{{0, 2}}, {{0, 2, 1}}} }, { ov::op::v1::LogicalNot::get_type_info_static(), {{{0, 2}}, {{0, 2, 1}}} }, { ov::op::v1::LogicalXor::get_type_info_static(), {{{0, 2}}, {{0, 2, 1}}} }, + { ov::op::v7::IDFT::get_type_info_static(), {{{0, 1}}, {{0, 1, 1000000}}} }, + { ov::op::v0::Sigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Tanh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Relu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::PRelu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Exp::get_type_info_static(), {{{0, 15}}, {{-10, 20, 32768}}} }, + { ov::op::v0::Log::get_type_info_static(), {{{0, 15}}, {{1, 20, 32768}}} }, + { ov::op::v0::Sign::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Abs::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Clamp::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Negative::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Acos::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v3::Acosh::get_type_info_static(), {{{1, 15}}, {{1, 200, 32768}}} }, + { ov::op::v0::Asin::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v3::Asinh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Atan::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v3::Atanh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Cos::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Cosh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Floor::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Sin::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Sinh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Sqrt::get_type_info_static(), {{{0, 15}}, {{1, 20, 32768}}} }, + { ov::op::v0::Tan::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Elu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Erf::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::HardSigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Selu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Sigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Tanh::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Relu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Exp::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Log::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Sign::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Abs::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Gelu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v0::Ceiling::get_type_info_static(), {{{0, 15}}, {{-1000, 2000, 32768}}} }, + { ov::op::v4::Mish::get_type_info_static(), {{{0, 15}}, {{-10, 60, 32768}}} }, + { ov::op::v4::HSwish::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v4::SoftPlus::get_type_info_static(), {{{0, 15}}, {{-100, 200, 32768}}} }, + { ov::op::v4::Swish::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v5::HSigmoid::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v5::Round::get_type_info_static(), {{{0, 15}}, {{-10, 20, 4}}} }, + { ov::op::v7::Gelu::get_type_info_static(), {{{0, 15}}, {{-1, 2, 32768}}} }, + { ov::op::v9::SoftSign::get_type_info_static(), {{{0, 15}}, {{-100, 200, 32768}}} }, }; } // namespace utils diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp new file mode 100644 index 00000000000000..5dd149e0aff98d --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp @@ -0,0 +1,93 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include + + +#include "shared_test_classes/base/ov_subgraph.hpp" + +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { +using ov::test::utils::ActivationTypes; + +static std::map activationNames = { + {ActivationTypes::Sigmoid, "Sigmoid"}, + {ActivationTypes::Tanh, "Tanh"}, + {ActivationTypes::Relu, "Relu"}, + {ActivationTypes::LeakyRelu, "LeakyRelu"}, + {ActivationTypes::Exp, "Exp"}, + {ActivationTypes::Log, "Log"}, + {ActivationTypes::Sign, "Sign"}, + {ActivationTypes::Abs, "Abs"}, + {ActivationTypes::Clamp, "Clamp"}, + {ActivationTypes::Negative, "Negative"}, + {ActivationTypes::Acos, "Acos"}, + {ActivationTypes::Acosh, "Acosh"}, + {ActivationTypes::Asin, "Asin"}, + {ActivationTypes::Asinh, "Asinh"}, + {ActivationTypes::Atan, "Atan"}, + {ActivationTypes::Atanh, "Atanh"}, + {ActivationTypes::Cos, "Cos"}, + {ActivationTypes::Cosh, "Cosh"}, + {ActivationTypes::Floor, "Floor"}, + {ActivationTypes::Sin, "Sin"}, + {ActivationTypes::Sinh, "Sinh"}, + {ActivationTypes::Sqrt, "Sqrt"}, + {ActivationTypes::Tan, "Tan"}, + {ActivationTypes::Elu, "Elu"}, + {ActivationTypes::Erf, "Erf"}, + {ActivationTypes::HardSigmoid, "HardSigmoid"}, + {ActivationTypes::Selu, "Selu"}, + {ActivationTypes::Sigmoid, "Sigmoid"}, + {ActivationTypes::Tanh, "Tanh"}, + {ActivationTypes::Relu, "Relu"}, + {ActivationTypes::Exp, "Exp"}, + {ActivationTypes::Log, "Log"}, + {ActivationTypes::Sign, "Sign"}, + {ActivationTypes::Abs, "Abs"}, + {ActivationTypes::Gelu, "Gelu"}, + {ActivationTypes::Ceiling, "Ceiling"}, + {ActivationTypes::PReLu, "PReLu"}, + {ActivationTypes::Mish, "Mish"}, + {ActivationTypes::HSwish, "HSwish"}, + {ActivationTypes::SoftPlus, "SoftPlus"}, + {ActivationTypes::Swish, "Swish"}, + {ActivationTypes::HSigmoid, "HSigmoid"}, + {ActivationTypes::RoundHalfToEven, "RoundHalfToEven"}, + {ActivationTypes::RoundHalfAwayFromZero, "RoundHalfAwayFromZero"}, + {ActivationTypes::GeluErf, "GeluErf"}, + {ActivationTypes::GeluTanh, "GeluTanh"}, + {ActivationTypes::SoftSign, "SoftSign"}, +}; + +typedef std::tuple< + std::pair>, // Activation type and constant value + ov::element::Type, // Model type + std::pair, // Input shapes + ov::Shape>, // 2nd input const shape + std::string> activationParams; + +class ActivationLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; + +class ActivationParamLayerTest : public ActivationLayerTest { +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp index 0d8909c46581e0..3917291fc61a44 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp @@ -83,60 +83,63 @@ ov::runtime::Tensor generate(const ov::element::Type& elemType, } } // namespace Activation -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(-1, 2, 32768, 1)); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(-1, 2, 32768, 1)); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(-1, 2, 32768, 1)); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, +ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(-1000, 2000, 32768, 1)); -} + switch (port) { + case 1: { + return ov::test::utils::create_and_fill_tensor(elemType, targetShape, 0, 0.2f); + } + case 2: { + return ov::test::utils::create_and_fill_tensor(elemType, targetShape, 0, 0.5f); + } + default: { + return Activation::generate(elemType, targetShape); + } + } -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { return Activation::generate(elemType, targetShape); } -ov::runtime::Tensor generate(const std::shared_ptr& node, +ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); + switch (port) { + case 1: { + auto name = node->input(1).get_node()->get_friendly_name(); + if (0 == name.compare("leakySlope")) { + return ov::test::utils::create_and_fill_tensor(elemType, targetShape, 0, 0.01f, 100); + } else if (0 == name.compare("negativeSlope")) { + return ov::test::utils::create_and_fill_tensor(elemType, targetShape, 0, -0.01f, 100); + } else { + return Activation::generate(elemType, targetShape); + } + } + default: { + return Activation::generate(elemType, targetShape); + } + } } -ov::runtime::Tensor generate(const std::shared_ptr& node, +ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); + switch (port) { + case 1: { + std::vector alpha(node->get_input_shape(1).size(), 1.6732f); + return ov::test::utils::create_tensor(elemType, targetShape, alpha, alpha.size()); + } + case 2: { + std::vector lambda(node->get_input_shape(2).size(), 1.0507f); + return ov::test::utils::create_tensor(elemType, targetShape, lambda, lambda.size()); + } + default: { + return Activation::generate(elemType, targetShape); + } + } } ov::runtime::Tensor generate(const std::shared_ptr& node, @@ -166,56 +169,6 @@ ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(-10, 20, 32768, 1)); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - switch (port) { - case 1: { - std::vector alpha(node->get_input_shape(1).size(), 0.2f); - return ov::test::utils::create_tensor(elemType, targetShape, alpha, alpha.size()); - } - case 2: { - std::vector beta(node->get_input_shape(2).size(), 0.5f); - return ov::test::utils::create_tensor(elemType, targetShape, beta, beta.size()); - } - default: { - return Activation::generate(elemType, targetShape); - } - } - - return Activation::generate(elemType, targetShape); -} - ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, @@ -279,35 +232,6 @@ ov::runtime::Tensor generate(const std::shared_ptr } } -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(1, 20, 32768, 1)); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - switch (port) { - case 1: { - std::vector negativeSlope(node->get_input_shape(1).size(), -0.01f); - return ov::test::utils::create_tensor(elemType, targetShape, negativeSlope, negativeSlope.size()); - } - default: { - return Activation::generate(elemType, targetShape); - } - } -} - ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, @@ -371,73 +295,6 @@ ov::runtime::Tensor generate(const std::shared_ptr& node return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); } -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - switch (port) { - case 1: { - std::vector alpha(node->get_input_shape(1).size(), 1.6732f); - return ov::test::utils::create_tensor(elemType, targetShape, alpha, alpha.size()); - } - case 2: { - std::vector lambda(node->get_input_shape(2).size(), 1.0507f); - return ov::test::utils::create_tensor(elemType, targetShape, lambda, lambda.size()); - } - default: { - return Activation::generate(elemType, targetShape); - } - } -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(1, 20, 32768, 1)); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, @@ -559,20 +416,6 @@ ov::runtime::Tensor generate(const std::shared_ptr& node, } } -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, @@ -593,20 +436,6 @@ ov::runtime::Tensor generate(const std::shared_ptr& no return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); } -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, @@ -625,13 +454,6 @@ ov::runtime::Tensor generate(const std::shared_ptr& return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); } -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape); -} - ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, @@ -833,13 +655,6 @@ ov::runtime::Tensor generate(const std::shared_ptr& return generate(std::dynamic_pointer_cast(node), port, elemType, targetShape); } -ov::runtime::Tensor generate(const std::shared_ptr& node, - size_t port, - const ov::element::Type& elemType, - const ov::Shape& targetShape) { - return Activation::generate(elemType, targetShape, InputGenerateData(-10, 20, 4)); -} - ov::runtime::Tensor generate(const std::shared_ptr& node, size_t port, const ov::element::Type& elemType, diff --git a/src/tests/functional/shared_test_classes/src/single_op/activation.cpp b/src/tests/functional/shared_test_classes/src/single_op/activation.cpp new file mode 100644 index 00000000000000..c30c1f7d46af7f --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/activation.cpp @@ -0,0 +1,139 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/activation.hpp" + +#include "ov_models/builders.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/result.hpp" + +namespace ov { +namespace test { +using ov::test::utils::ActivationTypes; + +std::string ActivationLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + ov::element::Type model_type; + std::pair, ov::Shape> input_shapes; + std::string target_device; + std::pair> activationDecl; + std::tie(activationDecl, model_type, input_shapes, target_device) = obj.param; + + auto shapes = input_shapes.first; + auto const_shape = input_shapes.second; + + std::ostringstream result; + const char separator = '_'; + result << "IS=("; + for (size_t i = 0lu; i < shapes.size(); i++) { + result << ov::test::utils::partialShape2str({shapes[i].first}) << (i < shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < shapes.size(); j++) { + result << ov::test::utils::vec2str(shapes[j].second[i]) << (j < shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "TS=" << ov::test::utils::vec2str(const_shape) << separator; + result << activationNames[activationDecl.first] << separator; + result << "constants_value=" << ov::test::utils::vec2str(activationDecl.second) << separator; + result << "netPRC=" << model_type.get_type_name() << separator; + result << "trgDev=" << target_device; + return result.str(); +} + +void ActivationLayerTest::SetUp() { + ov::element::Type model_type; + std::pair, ov::Shape> input_shapes; + std::pair> activationDecl; + std::tie(activationDecl, model_type, input_shapes, targetDevice) = GetParam(); + init_input_shapes(input_shapes.first); + auto const_shape = input_shapes.second; + + auto activationType = activationDecl.first; + auto constants_value = activationDecl.second; + + auto param = std::make_shared(model_type, inputDynamicShapes.front()); + param->set_friendly_name("Input"); + + if (activationType == ActivationTypes::PReLu && constants_value.empty()) { + auto elemnts_count = ov::shape_size(const_shape); + constants_value.resize(elemnts_count); + std::iota(constants_value.begin(), constants_value.end(), -10); + } + + auto activation = ngraph::builder::makeActivation(param, model_type, activationType, const_shape, constants_value); + + auto result = std::make_shared(activation); + + function = std::make_shared(result, ov::ParameterVector{param}, "Activation"); +} + +void ActivationParamLayerTest::SetUp() { + ov::element::Type model_type; + std::pair, ov::Shape> input_shapes; + std::pair> activationDecl; + std::tie(activationDecl, model_type, input_shapes, targetDevice) = GetParam(); + auto shapes = input_shapes.first; + auto const_shape = input_shapes.second; + + auto activationType = activationDecl.first; + auto constants_value = activationDecl.second; + + switch (activationType) { + case ActivationTypes::PReLu: + case ActivationTypes::LeakyRelu: { + shapes.push_back(ov::test::static_shapes_to_test_representation({const_shape}).front()); + break; + } + case ActivationTypes::HardSigmoid: + case ActivationTypes::Selu: { + shapes.push_back(ov::test::static_shapes_to_test_representation({const_shape}).front()); + shapes.push_back(ov::test::static_shapes_to_test_representation({const_shape}).front()); + break; + } + default: + OPENVINO_THROW("Unsupported activation type for Params test type"); + } + + init_input_shapes(shapes); + + ov::ParameterVector params; + for (const auto& shape : inputDynamicShapes) { + params.push_back(std::make_shared(model_type, shape)); + } + + switch (activationType) { + case ActivationTypes::PReLu: { + params[1]->set_friendly_name("negativeSlope"); + break; + } + case ActivationTypes::LeakyRelu: { + params[1]->set_friendly_name("leakySlope"); + break; + } + case ActivationTypes::HardSigmoid: { + params[1]->set_friendly_name("alpha"); + params[2]->set_friendly_name("beta"); + break; + } + case ActivationTypes::Selu: { + params[1]->set_friendly_name("alpha"); + params[2]->set_friendly_name("lambda"); + break; + } + default: + OPENVINO_THROW("Unsupported activation type for Params test type"); + } + + params[0]->set_friendly_name("Input"); + + auto activation = ngraph::builder::makeActivation(params, model_type, activationType); + auto result = std::make_shared(activation); + function = std::make_shared(result, params); +} +} // namespace test +} // namespace ov From 46f46c6cc65c6029a4212b27e98cec2449cabc26 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Wed, 25 Oct 2023 08:00:34 +0200 Subject: [PATCH 042/275] [PT FE] Fix xmod model test (#20682) --- .../model_hub_tests/torch_tests/test_hf_transformers.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index caeb2e0ff2a01d..8b595e5425668a 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -250,8 +250,8 @@ def forward(self, x): if model is None: from transformers import AutoModel model = AutoModel.from_pretrained(name, torchscript=True) - if hasattr(model, "set_default_language"): - model.set_default_language("en_XX") + if hasattr(model, "set_default_language"): + model.set_default_language("en_XX") if example is None: if "encodec" in mi.tags: example = (torch.randn(1, 1, 100),) @@ -294,14 +294,11 @@ def teardown_method(self): @pytest.mark.parametrize("name,type", [("allenai/led-base-16384", "led"), ("bert-base-uncased", "bert"), - ("facebook/bart-large-mnli", "bart"), ("google/flan-t5-base", "t5"), ("google/tapas-large-finetuned-wtq", "tapas"), ("gpt2", "gpt2"), ("openai/clip-vit-large-patch14", "clip"), - ("RWKV/rwkv-4-169m-pile", "rwkv"), - ("microsoft/layoutlmv3-base", "layoutlmv3"), - ("microsoft/xprophetnet-large-wiki100-cased", "xlm-prophetnet"), + ("facebook/xmod-base","xmod") ]) @pytest.mark.precommit def test_convert_model_precommit(self, name, type, ie_device): From a71283ea94ba622121619367b1b1dc3b5dbf4b69 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Wed, 25 Oct 2023 10:16:28 +0400 Subject: [PATCH 043/275] `RNNSequence`, `Result`, `Reshape`, `ReorgYolo`, `RegionYolo` layer tests to API2.0 (#20644) * `RNNSequenceTest` to API2.0 * `Result` to API2.0 * `Reshape` to API2.0 * `ReorgYolo` to API2.0 * `RegionYolo` to API2.0 * Alignment fixes * Skip more `RNNSequenceTest` cases --- .../single_layer_tests/region_yolo.cpp | 34 ++--- .../single_layer_tests/reorg_yolo.cpp | 56 +++---- .../single_layer_tests/reshape.cpp | 28 ++-- .../single_layer_tests/result.cpp | 35 +++-- .../single_layer_tests/rnn_sequence.cpp | 39 ++--- .../skip_tests_config.cpp | 4 + .../include/single_op_tests/region_yolo.hpp | 15 ++ .../include/single_op_tests/reorg_yolo.hpp | 15 ++ .../include/single_op_tests/reshape.hpp | 15 ++ .../shared/include/single_op_tests/result.hpp | 15 ++ .../include/single_op_tests/rnn_sequence.hpp | 15 ++ .../single_op/region_yolo.hpp | 37 +++++ .../single_op/reorg_yolo.hpp | 31 ++++ .../shared_test_classes/single_op/reshape.hpp | 32 ++++ .../shared_test_classes/single_op/result.hpp | 31 ++++ .../single_op/rnn_sequence.hpp | 43 ++++++ .../src/single_op/region_yolo.cpp | 52 +++++++ .../src/single_op/reorg_yolo.cpp | 33 +++++ .../src/single_op/reshape.cpp | 39 +++++ .../src/single_op/result.cpp | 32 ++++ .../src/single_op/rnn_sequence.cpp | 139 ++++++++++++++++++ 21 files changed, 643 insertions(+), 97 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/region_yolo.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/reorg_yolo.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/reshape.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/result.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/rnn_sequence.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/region_yolo.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reorg_yolo.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reshape.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/result.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_sequence.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/region_yolo.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/reorg_yolo.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/reshape.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/result.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/rnn_sequence.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp index ab9700b56c9f3b..1be3333cfbf3db 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp @@ -4,16 +4,16 @@ #include -#include "single_layer_tests/region_yolo.hpp" +#include "single_op_tests/region_yolo.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RegionYoloLayerTest; -const std::vector inShapes_caffe = { +const std::vector> in_shapes_caffe = { {1, 125, 13, 13} }; -const std::vector inShapes_mxnet = { +const std::vector> in_shapes_mxnet = { {1, 75, 52, 52}, {1, 75, 32, 32}, {1, 75, 26, 26}, @@ -22,7 +22,7 @@ const std::vector inShapes_mxnet = { {1, 75, 8, 8} }; -const std::vector inShapes_v3 = { +const std::vector> in_shapes_v3 = { {1, 255, 52, 52}, {1, 255, 26, 26}, {1, 255, 13, 13} @@ -41,8 +41,8 @@ const size_t coords = 4; const int start_axis = 1; const int end_axis = 3; -const auto testCase_yolov3 = ::testing::Combine( - ::testing::ValuesIn(inShapes_v3), +const auto test_case_yolov3 = ::testing::Combine( + ::testing::ValuesIn(in_shapes_v3), ::testing::Values(classes[0]), ::testing::Values(coords), ::testing::Values(num_regions[1]), @@ -50,12 +50,12 @@ const auto testCase_yolov3 = ::testing::Combine( ::testing::Values(masks[2]), ::testing::Values(start_axis), ::testing::Values(end_axis), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto testCase_yolov3_mxnet = ::testing::Combine( - ::testing::ValuesIn(inShapes_mxnet), +const auto test_case_yolov3_mxnet = ::testing::Combine( + ::testing::ValuesIn(in_shapes_mxnet), ::testing::Values(classes[1]), ::testing::Values(coords), ::testing::Values(num_regions[1]), @@ -63,12 +63,12 @@ const auto testCase_yolov3_mxnet = ::testing::Combine( ::testing::Values(masks[1]), ::testing::Values(start_axis), ::testing::Values(end_axis), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto testCase_yolov2_caffe = ::testing::Combine( - ::testing::ValuesIn(inShapes_caffe), +const auto test_case_yolov2_caffe = ::testing::Combine( + ::testing::ValuesIn(in_shapes_caffe), ::testing::Values(classes[1]), ::testing::Values(coords), ::testing::Values(num_regions[0]), @@ -76,10 +76,10 @@ const auto testCase_yolov2_caffe = ::testing::Combine( ::testing::Values(masks[0]), ::testing::Values(start_axis), ::testing::Values(end_axis), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3, RegionYoloLayerTest, testCase_yolov3, RegionYoloLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnet, RegionYoloLayerTest, testCase_yolov3_mxnet, RegionYoloLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffe, RegionYoloLayerTest, testCase_yolov2_caffe, RegionYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3, RegionYoloLayerTest, test_case_yolov3, RegionYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnet, RegionYoloLayerTest, test_case_yolov3_mxnet, RegionYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffe, RegionYoloLayerTest, test_case_yolov2_caffe, RegionYoloLayerTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp index 48b38d09c845eb..f0eb0076330554 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp @@ -4,16 +4,16 @@ #include -#include "single_layer_tests/reorg_yolo.hpp" +#include "single_op_tests/reorg_yolo.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ReorgYoloLayerTest; -const std::vector inShapes_caffe_yolov2 = { +const std::vector> in_shapes_caffe_yolov2 = { {1, 64, 26, 26}, }; -const std::vector inShapes = { +const std::vector> in_shapes = { {1, 4, 4, 4}, {1, 8, 4, 4}, {1, 9, 3, 3}, @@ -25,51 +25,51 @@ const std::vector strides = { 2, 3 }; -const auto testCase_caffe_yolov2 = ::testing::Combine( - ::testing::ValuesIn(inShapes_caffe_yolov2), +const auto test_case_caffe_yolov2 = ::testing::Combine( + ::testing::ValuesIn(in_shapes_caffe_yolov2), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto testCase_smallest = ::testing::Combine( - ::testing::Values(inShapes[0]), +const auto test_case_smallest = ::testing::Combine( + ::testing::Values(in_shapes[0]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto testCase_stride_2 = ::testing::Combine( - ::testing::Values(inShapes[1]), +const auto test_case_stride_2 = ::testing::Combine( + ::testing::Values(in_shapes[1]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto testCase_stride_3 = ::testing::Combine( - ::testing::Values(inShapes[2]), +const auto test_case_stride_3 = ::testing::Combine( + ::testing::Values(in_shapes[2]), ::testing::Values(strides[1]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto testCase_smaller_h = ::testing::Combine( - ::testing::Values(inShapes[4]), +const auto test_case_smaller_h = ::testing::Combine( + ::testing::Values(in_shapes[4]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto testCase_batch_2 = ::testing::Combine( - ::testing::Values(inShapes[3]), +const auto test_case_batch_2 = ::testing::Combine( + ::testing::Values(in_shapes[3]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_caffe_YoloV2, ReorgYoloLayerTest, testCase_caffe_yolov2, ReorgYoloLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_2_smallest, ReorgYoloLayerTest, testCase_smallest, ReorgYoloLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_2, ReorgYoloLayerTest, testCase_stride_2, ReorgYoloLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_3, ReorgYoloLayerTest, testCase_stride_3, ReorgYoloLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_smaller_h, ReorgYoloLayerTest, testCase_smaller_h, ReorgYoloLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_batch_2, ReorgYoloLayerTest, testCase_batch_2, ReorgYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_caffe_YoloV2, ReorgYoloLayerTest, test_case_caffe_yolov2, ReorgYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_2_smallest, ReorgYoloLayerTest, test_case_smallest, ReorgYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_2, ReorgYoloLayerTest, test_case_stride_2, ReorgYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_3, ReorgYoloLayerTest, test_case_stride_3, ReorgYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_smaller_h, ReorgYoloLayerTest, test_case_smaller_h, ReorgYoloLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_batch_2, ReorgYoloLayerTest, test_case_batch_2, ReorgYoloLayerTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp index e27fd06948cf4f..2f690eaeef01a7 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp @@ -4,42 +4,32 @@ #include -#include "single_layer_tests/reshape.hpp" +#include "single_op_tests/reshape.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ReshapeLayerTest; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector model_types = { + ov::element::f32, + ov::element::f16 }; INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheck, ReshapeLayerTest, ::testing::Combine( ::testing::Values(true), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(model_types), ::testing::Values(std::vector({10, 10, 10, 10})), ::testing::Values(std::vector({10, 0, 100})), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(std::map({}))), + ::testing::Values(ov::test::utils::DEVICE_CPU)), ReshapeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheckNegative, ReshapeLayerTest, ::testing::Combine( ::testing::Values(true), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(model_types), ::testing::Values(std::vector({10, 10, 10, 10})), ::testing::Values(std::vector({10, -1, 100})), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(std::map({}))), + ::testing::Values(ov::test::utils::DEVICE_CPU)), ReshapeLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/result.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/result.cpp index dfa40dd6fb1f60..5ba9264ae07c14 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/result.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/result.cpp @@ -4,28 +4,33 @@ #include -#include "shared_test_classes/single_layer/result.hpp" +#include "single_op_tests/result.hpp" -using namespace LayerTestsDefinitions; -using namespace InferenceEngine; +using ov::test::ResultLayerTest; namespace { -std::vector inputDims = { - {7}, {1000}, {3, 5}, {65, 33}, {33, 65}, - {1, 1000}, {223, 217, 21}, {3, 4, 5, 1}, {3, 4, 1, 5, 1}}; - -std::vector inputPrecisions = { - InferenceEngine::Precision::U8, InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, +std::vector> input_shapes = { + {7}, + {1000}, + {3, 5}, + {65, 33}, + {33, 65}, + {1, 1000}, + {223, 217, 21}, + {3, 4, 5, 1}, + {3, 4, 1, 5, 1} }; -ConfigMap config; +std::vector model_types = { + ov::element::u8, + ov::element::f32, + ov::element::i32, +}; INSTANTIATE_TEST_SUITE_P( smoke_ResultLayerTest, ResultLayerTest, - ::testing::Combine(::testing::ValuesIn(inputDims), - ::testing::ValuesIn(inputPrecisions), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(config)), + ::testing::Combine(::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::utils::DEVICE_CPU)), ResultLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_sequence.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_sequence.cpp index 2ad71e186d7451..18ecb101bf0824 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_sequence.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_sequence.cpp @@ -3,21 +3,24 @@ // #include -#include -#include "single_layer_tests/rnn_sequence.hpp" +#include "openvino/op/util/attr_types.hpp" +#include "single_op_tests/rnn_sequence.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RNNSequenceTest; +using ov::test::utils::SequenceTestsMode; +using ov::test::utils::InputLayerType; +using ov::op::RecurrentSequenceDirection; namespace { - std::vector mode{ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST, - ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST, - ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM, - ngraph::helpers::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM, - ngraph::helpers::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_CONST, - ngraph::helpers::SequenceTestsMode::PURE_SEQ}; + std::vector mode{SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST, + SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST, + SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM, + SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM, + SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_CONST, + SequenceTestsMode::PURE_SEQ}; // output values increase rapidly without clip, so use only seq_lengths = 2 - std::vector seq_lengths_zero_clip{2}; + std::vector seq_lengths_zero_clip{10}; std::vector seq_lengths_clip_non_zero{20}; std::vector batch{1, 10}; std::vector hidden_size{1, 10}; @@ -25,11 +28,11 @@ namespace { std::vector> activations = {{"relu"}, {"sigmoid"}, {"tanh"}}; std::vector clip{0.f}; std::vector clip_non_zeros{0.7f}; - std::vector direction = {ngraph::op::RecurrentSequenceDirection::FORWARD, - ngraph::op::RecurrentSequenceDirection::REVERSE, - ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL, + std::vector direction = {RecurrentSequenceDirection::FORWARD, + RecurrentSequenceDirection::REVERSE, + RecurrentSequenceDirection::BIDIRECTIONAL, }; - std::vector netPrecisions = {InferenceEngine::Precision::FP32}; + std::vector model_types = {ov::element::f32}; INSTANTIATE_TEST_SUITE_P(smoke_RNNSequenceCommonZeroClip, RNNSequenceTest, ::testing::Combine( @@ -41,8 +44,8 @@ namespace { ::testing::ValuesIn(activations), ::testing::ValuesIn(clip), ::testing::ValuesIn(direction), - ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), - ::testing::ValuesIn(netPrecisions), + ::testing::Values(InputLayerType::CONSTANT), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU)), RNNSequenceTest::getTestCaseName); @@ -56,8 +59,8 @@ namespace { ::testing::ValuesIn(activations), ::testing::ValuesIn(clip_non_zeros), ::testing::ValuesIn(direction), - ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), - ::testing::ValuesIn(netPrecisions), + ::testing::Values(InputLayerType::CONSTANT), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU)), RNNSequenceTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 508e766728b24b..4a3f3f8ddffa35 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -193,6 +193,8 @@ std::vector disabledTestPatterns() { R"(smoke_Interpolate_Basic_Down_Sample_Tail/InterpolateLayerTest.Inference.*(asymmetric|align_corners).*f16.*)", // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling. R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)", + // Issue: 123321 + R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=10.*relu.*)", }; #if defined(OPENVINO_ARCH_X86) @@ -217,6 +219,8 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_CPU_OVClassLoadNetworkAndCheckWithSecondaryPropertiesDoubleTest/OVClassLoadNetworkAndCheckSecondaryPropertiesTest.LoadNetworkAndCheckSecondaryPropertiesTest.*)"); retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckSecondaryPropertiesTest.*)"); retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckWithSecondaryPropertiesDoubleTest.*)"); + // Issue: 123321 + retVector.emplace_back(R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=1.*relu.*direction=reverse.*)"); } // invalid test: checks u8 precision for runtime graph, while it should be f32 retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)"); diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/region_yolo.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/region_yolo.hpp new file mode 100644 index 00000000000000..14b9235cf3f3d2 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/region_yolo.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/region_yolo.hpp" + +namespace ov { +namespace test { +TEST_P(RegionYoloLayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/reorg_yolo.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/reorg_yolo.hpp new file mode 100644 index 00000000000000..6088da8a97648c --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/reorg_yolo.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/reorg_yolo.hpp" + +namespace ov { +namespace test { +TEST_P(ReorgYoloLayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/reshape.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/reshape.hpp new file mode 100644 index 00000000000000..741f44038983a4 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/reshape.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/reshape.hpp" + +namespace ov { +namespace test { +TEST_P(ReshapeLayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/result.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/result.hpp new file mode 100644 index 00000000000000..4915f5126e3100 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/result.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/result.hpp" + +namespace ov { +namespace test { +TEST_P(ResultLayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/rnn_sequence.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/rnn_sequence.hpp new file mode 100644 index 00000000000000..18985f620b9452 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/rnn_sequence.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/rnn_sequence.hpp" + +namespace ov { +namespace test { +TEST_P(RNNSequenceTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/region_yolo.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/region_yolo.hpp new file mode 100644 index 00000000000000..8fd145748b5148 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/region_yolo.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using regionYoloParamsTuple = std::tuple< + std::vector, // Input shape + size_t, // Classes + size_t, // Coordinates + size_t, // Num regions + bool, // Do softmax + std::vector, // Mask + int, // Start axis + int, // End axis + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class RegionYoloLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reorg_yolo.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reorg_yolo.hpp new file mode 100644 index 00000000000000..4a7e586cadcd40 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reorg_yolo.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using ReorgYoloParamsTuple = typename std::tuple< + std::vector, // Input shape + size_t, // Stride + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class ReorgYoloLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reshape.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reshape.hpp new file mode 100644 index 00000000000000..1a2567a6c010b5 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reshape.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using reshapeParams = std::tuple< + bool, // SpecialZero + ov::element::Type, // Model type + std::vector, // Input shapes + std::vector, // OutForm shapes + ov::test::TargetDevice // Device name +>; +class ReshapeLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/result.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/result.hpp new file mode 100644 index 00000000000000..024d38cd6edc09 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/result.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using ResultTestParamSet = std::tuple< + std::vector, // Input shapes + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class ResultLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_sequence.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_sequence.hpp new file mode 100644 index 00000000000000..31b1386c168505 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_sequence.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ov_models/builders.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { + +using RNNSequenceParams = typename std::tuple< + ov::test::utils::SequenceTestsMode, // Pure Sequence or TensorIterator + size_t, // Sequence lengths + size_t, // Batch + size_t, // Hidden size + size_t, // Input size + std::vector, // Activations + float, // Clip + ov::op::RecurrentSequenceDirection, // Direction + ov::test::utils::InputLayerType, // WRB input type (Constant or Parameter) + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class RNNSequenceTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/region_yolo.cpp b/src/tests/functional/shared_test_classes/src/single_op/region_yolo.cpp new file mode 100644 index 00000000000000..d5c0e0481f20f8 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/region_yolo.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/region_yolo.hpp" + +namespace ov { +namespace test { +std::string RegionYoloLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector input_shape; + ov::element::Type model_type; + std::string target_device; + size_t classes; + size_t coords; + size_t num_regions; + bool do_softmax; + std::vector mask; + int start_axis; + int end_axis; + std::tie(input_shape, classes, coords, num_regions, do_softmax , mask, start_axis, end_axis, model_type, target_device) = obj.param; + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "classes=" << classes << "_"; + result << "coords=" << coords << "_"; + result << "num=" << num_regions << "_"; + result << "doSoftmax=" << do_softmax << "_"; + result << "axis=" << start_axis << "_"; + result << "endAxis=" << end_axis << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void RegionYoloLayerTest::SetUp() { + std::vector input_shape; + ov::element::Type model_type; + size_t classes; + size_t coords; + size_t num_regions; + bool do_softmax; + std::vector mask; + int start_axis; + int end_axis; + std::tie(input_shape, classes, coords, num_regions, do_softmax, mask, start_axis, end_axis, model_type, targetDevice) = this->GetParam(); + + auto param = std::make_shared(model_type, ov::Shape(input_shape)); + auto region_yolo = std::make_shared(param, coords, classes, num_regions, do_softmax, mask, start_axis, end_axis); + function = std::make_shared(region_yolo->outputs(), ov::ParameterVector{param}, "RegionYolo"); +} +} // namespace test +} // namespace ov + diff --git a/src/tests/functional/shared_test_classes/src/single_op/reorg_yolo.cpp b/src/tests/functional/shared_test_classes/src/single_op/reorg_yolo.cpp new file mode 100644 index 00000000000000..9b7efc87220515 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/reorg_yolo.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/reorg_yolo.hpp" + +namespace ov { +namespace test { +std::string ReorgYoloLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector input_shape; + ov::element::Type model_type; + size_t stride; + std::string target_device; + std::tie(input_shape, stride, model_type, target_device) = obj.param; + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "stride=" << stride << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ReorgYoloLayerTest::SetUp() { + std::vector input_shape; + ov::element::Type model_type; + size_t stride; + std::tie(input_shape, stride, model_type, targetDevice) = this->GetParam(); + auto param = std::make_shared(model_type, ov::Shape(input_shape)); + auto reorg_yolo = std::make_shared(param, stride); + function = std::make_shared(reorg_yolo->outputs(), ov::ParameterVector{param}, "ReorgYolo"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/reshape.cpp b/src/tests/functional/shared_test_classes/src/single_op/reshape.cpp new file mode 100644 index 00000000000000..27c058f50aa68b --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/reshape.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/reshape.hpp" + +namespace ov { +namespace test { +std::string ReshapeLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shape; + ov::element::Type model_type; + std::vector out_form_shapes; + std::string target_device; + bool special_zero; + std::tie(special_zero, model_type, input_shape, out_form_shapes, target_device) = obj.param; + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "OS=" << ov::test::utils::vec2str(out_form_shapes) << "_"; + result << "specialZero=" << special_zero << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ReshapeLayerTest::SetUp() { + std::vector input_shape; + ov::element::Type model_type; + std::vector out_form_shapes; + bool special_zero; + std::tie(special_zero, model_type, input_shape, out_form_shapes, targetDevice) = this->GetParam(); + + auto param = std::make_shared(model_type, ov::Shape(input_shape)); + auto const_node = std::make_shared(ov::element::i64, ov::Shape{out_form_shapes.size()}, out_form_shapes); + auto reshape = std::make_shared(param, const_node, special_zero); + function = std::make_shared(reshape->outputs(), ov::ParameterVector{param}, "Reshape"); +} +} // namespace test +} // namespace ov + diff --git a/src/tests/functional/shared_test_classes/src/single_op/result.cpp b/src/tests/functional/shared_test_classes/src/single_op/result.cpp new file mode 100644 index 00000000000000..0a8fef223e062c --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/result.cpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/result.hpp" + +namespace ov { +namespace test { +std::string ResultLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shape; + ov::element::Type model_type; + std::string target_device; + std::tie(input_shape, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ResultLayerTest::SetUp() { + std::vector input_shape; + ov::element::Type model_type; + std::tie(input_shape, model_type, targetDevice) = GetParam(); + + ov::ParameterVector params{std::make_shared(model_type, ov::Shape(input_shape))}; + auto result = std::make_shared(params[0]); + function = std::make_shared(result->outputs(), params, "result"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/rnn_sequence.cpp b/src/tests/functional/shared_test_classes/src/single_op/rnn_sequence.cpp new file mode 100644 index 00000000000000..109aacdacbc277 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/rnn_sequence.cpp @@ -0,0 +1,139 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/pass/manager.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" +#include "shared_test_classes/single_op/rnn_sequence.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +// #include "ov_models/utils/ov_helpers.hpp" + +using ov::test::utils::InputLayerType; +using ov::test::utils::SequenceTestsMode; + +namespace ov { +namespace test { + +std::string RNNSequenceTest::getTestCaseName(const testing::TestParamInfo &obj) { + SequenceTestsMode mode; + size_t seq_lengths; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + ov::op::RecurrentSequenceDirection direction; + ov::element::Type model_type; + InputLayerType WRBType; + std::string target_device; + std::tie(mode, seq_lengths, batch, hidden_size, input_size, activations, clip, direction, WRBType, + model_type, target_device) = obj.param; + std::vector> input_shapes = { + {{batch, input_size}, {batch, hidden_size}, {batch, hidden_size}, {hidden_size, input_size}, + {hidden_size, hidden_size}, {hidden_size}}, + }; + std::ostringstream result; + result << "mode=" << mode << "_"; + result << "seq_lengths=" << seq_lengths << "_"; + result << "batch=" << batch << "_"; + result << "hidden_size=" << hidden_size << "_"; + result << "input_size=" << input_size << "_"; + result << "IS=" << ov::test::utils::vec2str(input_shapes) << "_"; + result << "activations=" << ov::test::utils::vec2str(activations) << "_"; + result << "direction=" << direction << "_"; + result << "clip=" << clip << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "targetDevice=" << target_device; + return result.str(); +} + +void RNNSequenceTest::SetUp() { + SequenceTestsMode mode; + size_t seq_lengths; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + ov::op::RecurrentSequenceDirection direction; + InputLayerType WRBType; + ov::element::Type model_type; + std::tie(mode, seq_lengths, batch, hidden_size, input_size, activations, clip, direction, WRBType, + model_type, targetDevice) = this->GetParam(); + + size_t num_directions = direction == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1; + std::vector input_shapes = { + {{batch, seq_lengths, input_size}, {batch, num_directions, hidden_size}, {batch}, + {num_directions, hidden_size, input_size}, {num_directions, hidden_size, hidden_size}, + {num_directions, hidden_size}}, + }; + + ov::ParameterVector params{std::make_shared(model_type, ov::Shape(input_shapes[0])), + std::make_shared(model_type, ov::Shape(input_shapes[1]))}; + std::shared_ptr seq_lengths_node; + if (mode == SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM || + mode == SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM || + mode == SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM) { + auto param = std::make_shared(ov::element::i64, input_shapes[2]); + param->set_friendly_name("seq_lengths"); + params.push_back(param); + seq_lengths_node = param; + } else if (mode == ov::test::utils::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST || + mode == ov::test::utils::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_CONST) { + auto tensor = ov::test::utils::create_and_fill_tensor(ov::element::i64, input_shapes[2], static_cast(seq_lengths), 0.f); + seq_lengths_node = std::make_shared(tensor); + } else { + std::vector lengths(batch, seq_lengths); + seq_lengths_node = std::make_shared(ov::element::i64, input_shapes[2], lengths); + } + + const auto& W_shape = input_shapes[3]; + const auto& R_shape = input_shapes[4]; + const auto& B_shape = input_shapes[5]; + + std::shared_ptr W, R, B; + if (WRBType == InputLayerType::PARAMETER) { + const auto W_param = std::make_shared(model_type, W_shape); + const auto R_param = std::make_shared(model_type, R_shape); + const auto B_param = std::make_shared(model_type, B_shape); + W = W_param; + R = R_param; + B = B_param; + params.push_back(W_param); + params.push_back(R_param); + params.push_back(B_param); + } else { + const auto W_tensor = ov::test::utils::create_and_fill_tensor(model_type, W_shape); + const auto R_tensor = ov::test::utils::create_and_fill_tensor(model_type, R_shape); + const auto B_tensor = ov::test::utils::create_and_fill_tensor(model_type, B_shape); + W = std::make_shared(W_tensor); + R = std::make_shared(R_tensor); + B = std::make_shared(B_tensor); + } + + auto rnn_sequence = std::make_shared(params[0], params[1], seq_lengths_node, W, R, B, hidden_size, direction, + activations, activations_alpha, activations_beta, clip); + function = std::make_shared(rnn_sequence->outputs(), params, "rnn_sequence"); + bool is_pure_sequence = (mode == SequenceTestsMode::PURE_SEQ || + mode == SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM || + mode == SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_CONST); + if (!is_pure_sequence) { + ov::pass::Manager manager; + if (direction == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL) + manager.register_pass(); + manager.register_pass(); + manager.run_passes(function); + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, true); + } else { + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, false); + } +} +} // namespace test +} // namespace ov From 9d56c315814bb01258c7bfa9dab229c498615db5 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Wed, 25 Oct 2023 16:01:52 +0900 Subject: [PATCH 044/275] [GPU] add shape infer in scatter elements update (#20250) * add shape infer in scatter elements update * output shape is same with input shape in dynamic case --- .../src/graph/scatter_elements_update.cpp | 8 ++-- .../dynamic/scatter_nd_update.cpp | 46 +++++++++++++++++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp index 88224259ce5a58..6ae1a83447c98c 100644 --- a/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp +++ b/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp @@ -9,6 +9,8 @@ #include "json_object.h" #include +#include "scatter_elements_update_shape_inference.hpp" + namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(scatter_elements_update) @@ -16,11 +18,11 @@ layout scatter_elements_update_inst::calc_output_layout(scatter_elements_update_ auto desc = impl_param.typed_desc(); const int32_t axis = desc->axis; - const size_t input_number_of_dims = impl_param.get_input_layout().get_tensor().sizes().size(); + const size_t input_number_of_dims = impl_param.get_input_layout().get_partial_shape().size(); auto input_layout = impl_param.get_input_layout(); - auto output_shape = input_layout.get_tensor(); + auto output_shape = input_layout.get_partial_shape(); auto input_format = input_layout.format; auto output_type = input_layout.data_type; @@ -31,7 +33,7 @@ layout scatter_elements_update_inst::calc_output_layout(scatter_elements_update_ if (static_cast(axis) < 0 || static_cast(axis) >= input_number_of_dims) CLDNN_ERROR_MESSAGE(desc->id, "Incorrect axis value for ScatterElementsUpdate: Axis must be positive and less than the input tensor dimension."); - return layout{output_type, input_format, output_shape}; + return layout{output_shape, output_type, input_format}; } std::string scatter_elements_update_inst::to_string(scatter_elements_update_node const& node) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scatter_nd_update.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scatter_nd_update.cpp index 81e87ec1eb69e0..b1c010d80f4198 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scatter_nd_update.cpp @@ -173,7 +173,7 @@ TEST_P(ScatterUpdateLayerGPUTest, CompareWithRefs) { namespace ScatterNDUpdate { -const std::vector scatterParams = { +const std::vector scatterNDParams = { ScatterUpdateLayerParams{ ScatterUpdateShapes{ {{-1, -1, -1, -1, -1}, {{10, 9, 10, 9, 10}, {10, 1, 11, 2, 5}, {10, 15, 8, 1, 7}}}, @@ -212,6 +212,39 @@ const std::vector scatterParams = { }, }; +const std::vector scatterElementsParams = { + ScatterUpdateLayerParams{ + ScatterUpdateShapes{ + {{-1, -1, -1, -1, -1}, {{10, 9, 10, 9, 10}, {10, 5, 11, 4, 5}, {10, 15, 8, 1, 7}}}, + {{-1, -1, -1, -1, -1 }, {{3, 2, 1, 2, 1}, {3, 2, 1, 2, 1}, {3, 2, 1, 2, 1}}}, + {{-1, -1, -1, -1, -1 }, {{3, 2, 1, 2, 1}, {3, 2, 1, 2, 1}, {3, 2, 1, 2, 1}}}, + {{1}, {{1}}} + }, + IndicesValues{ 5, 6, 2, 8, 5, 6, 2, 8, 5, 6, 2, 8 }, + Scatterupdate_type::Elements + }, + ScatterUpdateLayerParams{ + ScatterUpdateShapes{ + {{-1, -1, -1, -1}, {{ 10, 9, 9, 11 }, { 7, 5, 3, 12 }, { 3, 4, 9, 8 }}}, + {{-1, -1, -1, -1}, {{3, 1, 2, 3}, {3, 1, 2, 3}, {3, 1, 2, 3}}}, + {{-1, -1, -1, -1}, {{3, 1, 2, 3}, {3, 1, 2, 3}, {3, 1, 2, 3}}}, + {{1}, {{1}}} + }, + IndicesValues{ 0, 1, 1, 2, 2, 2, 0, 1, 1, 2, 2, 2, 0, 1, 1, 2, 2, 2 }, + Scatterupdate_type::Elements + }, + ScatterUpdateLayerParams{ + ScatterUpdateShapes{ + {{{3, 10}, -1, {3, 9}, -1}, {{ 10, 9, 9, 11 }, { 7, 5, 3, 12 }, { 3, 4, 9, 8 }}}, + {{2, -1, 3, -1}, {{2, 1, 3, 1}, {2, 1, 3, 1}, {2, 1, 3, 1}}}, + {{2, -1, 3, -1}, {{2, 1, 3, 1}, {2, 1, 3, 1}, {2, 1, 3, 1}}}, + {{1}, {{1}}} + }, + IndicesValues{ 0, 1, 1, 2, 2, 2 }, + Scatterupdate_type::Elements + }, +}; + const std::vector inputPrecisions = { ElementType::f32, }; @@ -260,7 +293,14 @@ const std::vector scatterElementsUpdate_EmptyInput1_2P INSTANTIATE_TEST_SUITE_P(smoke_ScatterNDUpdate_CompareWithRefs_dynamic, ScatterUpdateLayerGPUTest, ::testing::Combine( - ::testing::ValuesIn(scatterParams), + ::testing::ValuesIn(scatterNDParams), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(constantPrecisions)), + ScatterUpdateLayerGPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ScatterElementsUpdate_CompareWithRefs_dynamic, ScatterUpdateLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(scatterElementsParams), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(constantPrecisions)), ScatterUpdateLayerGPUTest::getTestCaseName); @@ -280,7 +320,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ScatterNDUpdate_EmptyInput1_2_CompareWithRefs_dyn ScatterUpdateLayerGPUTest::getTestCaseName); // ScatterELementsUpdate doesn't support dynamic shape yet. Need to enable when it supports. -INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_ScatterElementsUpdate_EmptyInput1_2_CompareWithRefs_dynamic, ScatterUpdateLayerGPUTest, +INSTANTIATE_TEST_SUITE_P(smoke_ScatterElementsUpdate_EmptyInput1_2_CompareWithRefs_dynamic, ScatterUpdateLayerGPUTest, ::testing::Combine( ::testing::ValuesIn(scatterElementsUpdate_EmptyInput1_2Params), ::testing::ValuesIn(inputPrecisions), From c70f0ca45d04f9a3b7a0a9ce4999afc2b43ecc06 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Wed, 25 Oct 2023 16:09:11 +0900 Subject: [PATCH 045/275] [GPU] skip excessive mem alloc request in build (#20399) * skip excessive mem alloc request in build * update mem check function * fix os behavior * update mem size check location * only dynamic shape case takes check_allocatable * update check condition --- .../include/intel_gpu/runtime/engine.hpp | 2 + .../intel_gpu/src/graph/primitive_inst.cpp | 7 +- .../intel_gpu/src/runtime/ocl/ocl_engine.cpp | 34 +-- .../intel_gpu/src/runtime/ocl/ocl_engine.hpp | 2 +- .../gpu_dyn_huge_input_range.cpp | 235 ++++++++++++++++++ 5 files changed, 263 insertions(+), 17 deletions(-) create mode 100644 src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp index ee8d10bb580f9e..6b9195097b349b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp @@ -79,6 +79,8 @@ class engine { /// Checks whether two memory objects represents the same physical memory virtual bool is_the_same_buffer(const memory& mem1, const memory& mem2) = 0; + virtual bool check_allocatable(const layout& layout, allocation_type type) = 0; + /// Returns basic allocation type which will be used as a fallback when allocation type is not specified or device doesn't support some features. virtual allocation_type get_default_allocation_type() const = 0; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 92f9f60743b9f5..a81d0bd10ad58d 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -991,7 +991,6 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool , _outputs({memory::ptr()}) , _reordered_weights_cache(network.get_weights_cache_capacity()) , _output_changed(false) - , _mem_allocated(allocate_memory) , _is_dynamic(node.is_dynamic() || node.generates_dynamic_output()) , _type(node.type()) , _id(node.id()) @@ -1006,6 +1005,12 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool , _can_share_buffer(node.can_share_buffer()) , _is_constant(node.is_constant()) , _needs_completion_event(is_any_user_cpu(node.get_users()) || node.is_output()) { + // When dynamic shape node has huge upper boundary which causes bigger mem size than system max allocable mem size, do not allocate in build time. + auto output_layout = node.get_output_layout(); + if (allocate_memory && node.is_dynamic() && (!network.get_engine().check_allocatable(output_layout, allocation_type::usm_host))) { + allocate_memory = false; + } + _mem_allocated = allocate_memory; if (allocate_memory) { // In case when output is mutable_data primitive, and other users dependencies are only used for // suychronization, The output memory of such primitive will be fused with mutable_data diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index 325bb3aa5816bc..9e4bbd9aa6bbdf 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -125,29 +125,33 @@ allocation_type ocl_engine::detect_usm_allocation_type(const void* memory) const : allocation_type::unknown; } -bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) const { +bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) { OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem, "[GPU] Unsupported allocation type: ", type); - auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host); -#ifdef __unix__ - // Prevent from being killed by Ooo Killer of Linux - OPENVINO_ASSERT(layout.bytes_count() + used_mem <= get_max_memory_size(), - "[GPU] Exceeded max size of memory allocation: ", - "Required ", layout.bytes_count(), " bytes, already occupied : ", used_mem, " bytes, ", - "but available memory size is ", get_max_memory_size(), " bytes"); -#else - if (layout.bytes_count() + used_mem > get_max_memory_size()) { - GPU_DEBUG_COUT << "[Warning] [GPU] Exceeded max size of memory allocation: " << "Required " << layout.bytes_count() << " bytes, already occupied : " - << used_mem << " bytes, but available memory size is " << get_max_memory_size() << " bytes" << std::endl; - GPU_DEBUG_COUT << "Please note that performance might drop due to memory swap." << std::endl; + auto alloc_mem_size = layout.bytes_count(); + auto max_mem_size = get_device_info().max_alloc_mem_size; + if (alloc_mem_size > max_mem_size) { + auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host); + GPU_DEBUG_LOG << "[GPU] Mem size info: " << "Required " << alloc_mem_size << " bytes, already occupied : " + << used_mem << " bytes, available memory size is " << get_max_memory_size() << " bytes, but max allocable memory size is " + << max_mem_size << " bytes." << std::endl; + return false; } -#endif + return true; } memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) { OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate memory for dynamic layout"); - check_allocatable(layout, type); + bool allocatable = check_allocatable(layout, type); + if (!allocatable) { +#ifdef __unix__ + OPENVINO_ASSERT(allocatable, "[GPU] Exceeded max size of memory allocation, check debug message for size info"); +#else + GPU_DEBUG_COUT << "[Warning][GPU] Please note that performance might drop due to memory swap caused by exceeded mem size alloc." << std::endl; +#endif + } + try { memory::ptr res = nullptr; if (layout.format.is_image_2d()) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp index 6d4141396518e6..ee76fcca82a2d2 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp @@ -28,7 +28,7 @@ class ocl_engine : public engine { memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) override; memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) override; bool is_the_same_buffer(const memory& mem1, const memory& mem2) override; - bool check_allocatable(const layout& layout, allocation_type type) const; + bool check_allocatable(const layout& layout, allocation_type type) override; void* get_user_context() const override; diff --git a/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp new file mode 100644 index 00000000000000..62eb867df971fa --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/dynamic_tests/gpu_dyn_huge_input_range.cpp @@ -0,0 +1,235 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/strided_slice.hpp" +#include "shared_test_classes/single_layer/shape_of.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ov_models/builders.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +struct StridedSliceParams { + std::vector begin; + std::vector end; + std::vector stride; + std::vector beginMask; + std::vector endMask; + std::vector newAxisMask; + std::vector shrinkAxisMask; + std::vector ellipsisAxisMask; +}; + +typedef std::tuple< + InputShape, // Input shapes + StridedSliceParams, + ElementType, // Element type + std::vector, // begin/end/stride input type + std::map // Additional network configuration +> StridedSliceLayerParamSet; + +class DynamicShapeHugeRangeGPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + InputShape shapes; + StridedSliceParams params; + ElementType elementType; + std::vector restInputType; + TargetDevice targetDevice; + std::map additionalConfig; + std::tie(shapes, params, elementType, restInputType, additionalConfig) = obj.param; + + std::ostringstream results; + results << "IS=" << ov::test::utils::partialShape2str({shapes.first}) << "_"; + results << "TS="; + for (const auto& item : shapes.second) { + results << ov::test::utils::vec2str(item) << "_"; + } + results << "netPRC=" << elementType << "_"; + results << "begin=" << ov::test::utils::vec2str(params.begin) << "_"; + results << "end=" << ov::test::utils::vec2str(params.end) << "_"; + results << "stride=" << ov::test::utils::vec2str(params.stride) << "_"; + results << "begin_m=" << ov::test::utils::vec2str(params.beginMask) << "_"; + results << "end_m=" << ov::test::utils::vec2str(params.endMask) << "_"; + results << "new_axis_m=" << (params.newAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.newAxisMask)) << "_"; + results << "shrink_m=" << (params.shrinkAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.shrinkAxisMask)) << "_"; + results << "ellipsis_m=" << (params.ellipsisAxisMask.empty() ? "def" : ov::test::utils::vec2str(params.ellipsisAxisMask)) << "_"; + results << "beginType=" << restInputType[0] << "_"; + results << "endType=" << restInputType[1] << "_"; + results << "strideType=" << restInputType[2] << "_"; + results << "config=("; + for (const auto& configEntry : additionalConfig) { + results << configEntry.first << ", " << configEntry.second << ":"; + } + results << ")"; + + return results.str(); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + ov::Tensor tensor; + + // input0: data + int32_t idx = 0; + tensor = ov::test::utils::create_and_fill_tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]); + inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor}); + + // input1: begin + if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER) { + idx += 1; + tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < begin.size(); i++) { + dataPtr[i] = static_cast(begin[i]); + } + inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor}); + } + + // input2: end + if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER) { + idx += 1; + tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < end.size(); i++) { + dataPtr[i] = static_cast(end[i]); + } + inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor}); + } + + // input3: stride + if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER) { + idx += 1; + tensor = ov::Tensor(funcInputs[idx].get_element_type(), targetInputStaticShapes[idx]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < stride.size(); i++) { + dataPtr[i] = static_cast(stride[i]); + } + inputs.insert({funcInputs[idx].get_node_shared_ptr(), tensor}); + } + + inferRequestNum++; + } + +protected: + std::vector begin; + std::vector end; + std::vector stride; + std::vector restInputType; + size_t inferRequestNum = 0; + + void SetUp() override { + InputShape shapes; + StridedSliceParams ssParams; + std::map additionalConfig; + std::tie(shapes, ssParams, inType, restInputType, additionalConfig) = this->GetParam(); + + begin = ssParams.begin; + end = ssParams.end; + stride = ssParams.stride; + + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector inputShapes; + inputShapes.push_back(shapes); + if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER) + inputShapes.push_back(InputShape({static_cast(begin.size())}, std::vector(shapes.second.size(), {begin.size()}))); + if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER) + inputShapes.push_back(InputShape({static_cast(end.size())}, std::vector(shapes.second.size(), {end.size()}))); + if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER) + inputShapes.push_back(InputShape({static_cast(stride.size())}, std::vector(shapes.second.size(), {stride.size()}))); + + init_input_shapes(inputShapes); + + ov::ParameterVector params{std::make_shared(inType, inputDynamicShapes.front())}; + + std::shared_ptr beginInput, endInput, strideInput; + if (restInputType[0] == ngraph::helpers::InputLayerType::PARAMETER) { + auto beginNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}); + params.push_back(beginNode); + beginInput = beginNode; + } else { + beginInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}, begin); + } + + if (restInputType[1] == ngraph::helpers::InputLayerType::PARAMETER) { + auto endNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}); + params.push_back(endNode); + endInput = endNode; + } else { + endInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}, end); + } + + if (restInputType[2] == ngraph::helpers::InputLayerType::PARAMETER) { + auto strideNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{stride.size()}); + params.push_back(strideNode); + strideInput = strideNode; + } else { + strideInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{stride.size()}, stride); + } + + auto stridedSliceOp = std::make_shared(params[0], beginInput, endInput, strideInput, ssParams.beginMask, ssParams.endMask, + ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask); + + auto shapeOfOp = std::make_shared(stridedSliceOp, ov::element::Type_t::i32); + + ngraph::ResultVector results; + for (size_t i = 0; i < shapeOfOp->get_output_size(); i++) { + results.push_back(std::make_shared(shapeOfOp->output(i))); + } + + function = std::make_shared(results, params, "result"); + } +}; + +TEST_P(DynamicShapeHugeRangeGPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); +} + +namespace { + +std::map emptyAdditionalConfig; + +const std::vector inputPrecisions = { + ElementType::f32 +}; + +const std::vector> restInputTypes = { + {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT}, + {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER}, + {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT}, + {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT}, + {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER}, + {ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER}, + {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT, ngraph::helpers::InputLayerType::PARAMETER}, + {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT}, +}; + +const std::vector inputShapesDynamic2D_excessive_uppper_boundary = { + {{{0, 1000}, {0, 364000000}, 4}, + {{640, 640, 4}}}, +}; + +const std::vector paramsPlain2D_excessive_uppper_boundary = { + StridedSliceParams{ { 0, 1 }, { 0, 2147483647 }, { 1, 1 }, { 1, 0 }, { 1, 0 }, { }, { }, { } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Dynamic_2D_excessive_uppper_boundary, DynamicShapeHugeRangeGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic2D_excessive_uppper_boundary), + ::testing::ValuesIn(paramsPlain2D_excessive_uppper_boundary), + ::testing::ValuesIn(inputPrecisions), + ::testing::Values(restInputTypes[0]), + ::testing::Values(emptyAdditionalConfig)), + DynamicShapeHugeRangeGPUTest::getTestCaseName); +} // namespace +} // namespace GPULayerTestsDefinitions From 706d6576375224d50b09d58eb34d5c99f4feaccd Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Wed, 25 Oct 2023 09:12:41 +0200 Subject: [PATCH 046/275] [DOCS] Install Guide Update for master (#20677) * missing info * System Requirements * Update installing-openvino-from-archive-macos.md * system requirements update --- .../installing-openvino-apt.md | 2 +- .../installing-openvino-from-archive-linux.md | 2 +- .../installing-openvino-yum.md | 4 ++-- .../installing-openvino-from-archive-macos.md | 3 ++- .../installing-openvino-shared/installing-openvino-brew.md | 2 +- .../installing-openvino-shared/installing-openvino-conan.md | 2 +- .../installing-openvino-shared/installing-openvino-conda.md | 2 +- .../installing-openvino-shared/installing-openvino-pip.md | 5 +++-- .../installing-openvino-shared/installing-openvino-vcpkg.md | 2 +- .../installing-openvino-from-archive-windows.md | 2 +- 10 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-apt.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-apt.md index b70f91b611c924..a0dc39569761db 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-apt.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-apt.md @@ -22,7 +22,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. tab-item:: Processor Notes :sync: processor-notes diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-from-archive-linux.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-from-archive-linux.md index 5034c94dc4ea5c..edd3c1a164bf7e 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-from-archive-linux.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-from-archive-linux.md @@ -38,7 +38,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. tab-item:: Processor Notes :sync: processor-notes diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md index 3730cdab8e7ff5..c9646b212828fe 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md @@ -10,7 +10,7 @@ Note that the YUM distribution: - * offers C/C++ APIs only + * offers both C/C++ and Python APIs * does not offer support for GNA and NPU inference * is dedicated to Linux users only * additionally includes code samples @@ -21,7 +21,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. note:: diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header/installing-openvino-from-archive-macos.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header/installing-openvino-from-archive-macos.md index 316a42d6d8c5a1..f4933b1c0b9569 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header/installing-openvino-from-archive-macos.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header/installing-openvino-from-archive-macos.md @@ -14,6 +14,7 @@ * offers both C/C++ and Python APIs * additionally includes code samples * is dedicated to macOS users (archives for other systems are also available) + * is only supported for CPU Plugin .. tab-set:: @@ -22,7 +23,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. tab-item:: Software Requirements :sync: software-requirements diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-brew.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-brew.md index d9c3545f57aaac..3c482de9f6f777 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-brew.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-brew.md @@ -21,7 +21,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. tab-item:: Processor Notes :sync: processor-notes diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conan.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conan.md index 1748a9086ab6c3..e36e70ff9e6d92 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conan.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conan.md @@ -22,7 +22,7 @@ :sync: system-requirements Full requirement listing is available in: - `System Requirements Page `__ + :doc:`System Requirements Page ` .. tab-item:: Processor Notes :sync: processor-notes diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conda.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conda.md index bed553e73ad6fb..84c69cf0d4392c 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conda.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-conda.md @@ -22,7 +22,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. tab-item:: Processor Notes diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-pip.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-pip.md index 1356373d1b5ad1..e8bff7d36cec4a 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-pip.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-pip.md @@ -12,9 +12,10 @@ Note that the PyPi distribution: * offers the Python API only - * does not offer support for GNA and NPU inference * is dedicated to users of all major OSes: Windows, Linux, and macOS (all x86_64 / arm64 architectures) + * Windows and Linux do not offer support for GNA and NPU inference + * macOS offers support only for CPU inference .. tab-set:: @@ -22,7 +23,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` | `PyPi OpenVINO page `__ diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md index 39c82520907d35..d0d502cfced7e2 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md @@ -21,7 +21,7 @@ :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. tab-item:: Processor Notes :sync: processor-notes diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md index 270948d13406bd..d24670343d770a 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md @@ -25,7 +25,7 @@ System Requirements :sync: system-requirements | Full requirement listing is available in: - | `System Requirements Page `__ + | :doc:`System Requirements Page ` .. tab-item:: Processor Notes :sync: processor-notes From dc4240bc61c08dcc77eae60310d67f5e748f669b Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Wed, 25 Oct 2023 11:15:03 +0400 Subject: [PATCH 047/275] [CPU] Removed custom ShapeInference impl for RandomUniform (#20599) --- .../intel_cpu/src/nodes/grid_sample.cpp | 31 +++++------- .../intel_cpu/src/nodes/grid_sample.hpp | 4 -- .../nodes/kernels/x64/gather_uni_kernel.cpp | 6 +-- .../nodes/kernels/x64/gather_uni_kernel.hpp | 1 + .../src/nodes/kernels/x64/grid_sample.cpp | 2 +- .../src/nodes/kernels/x64/jit_kernel_base.cpp | 26 +++++----- .../intel_cpu/src/nodes/random_uniform.cpp | 7 ++- .../shape_inference/custom/random_uniform.cpp | 47 ------------------- .../shape_inference/custom/random_uniform.hpp | 37 --------------- 9 files changed, 34 insertions(+), 127 deletions(-) delete mode 100644 src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp delete mode 100644 src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index 6868e907fa7ae8..28a2e63283c9ce 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -2,12 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include - #include "grid_sample.hpp" #include "ie_parallel.hpp" -#include +#include "openvino/op/grid_sample.hpp" using namespace InferenceEngine; using namespace ov::intel_cpu; @@ -16,8 +13,6 @@ using namespace ov::intel_cpu::node; using namespace dnnl::impl::cpu; #endif // OPENVINO_ARCH_X86_64 -#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' " - bool GridSample::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { @@ -46,21 +41,21 @@ GridSample::GridSample(const std::shared_ptr& op, const GraphContext:: : Node(op, context, NgraphShapeInferFactory(op, PortMask(1))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { - IE_THROW(NotImplemented) << errorMessage; + THROW_CPU_NODE_ERR(errorMessage); } if (op->get_input_size() != 2 || op->get_output_size() != 1) - THROW_ERROR << "has incorrect number of input/output ports."; + THROW_CPU_NODE_ERR("has incorrect number of input/output ports."); const auto& dataShape = getInputShapeAtPort(IN_DATA); if (dataShape.getRank() != 4) - THROW_ERROR << "has incorrect rank of the Data input."; + THROW_CPU_NODE_ERR("has incorrect rank of the Data input."); const auto& gridShape = getInputShapeAtPort(IN_GRID); if (gridShape.getRank() != 4) - THROW_ERROR << "has incorrect rank of the Grid input."; + THROW_CPU_NODE_ERR("has incorrect rank of the Grid input."); if (gridShape.isStatic() && gridShape.getDims()[3] != 2) - THROW_ERROR << "has incorrect shape of the Grid input. The 4th dimension should be equal to 2."; + THROW_CPU_NODE_ERR("has incorrect shape of the Grid input. The 4th dimension should be equal to 2."); const auto& attributes = ov::as_type_ptr(op)->get_attributes(); alignCorners = attributes.align_corners; @@ -75,7 +70,7 @@ GridSample::GridSample(const std::shared_ptr& op, const GraphContext:: interpolationMode = GridSampleInterpolationMode::NEAREST; break; default: - THROW_ERROR << "supports only BILINEAR, BICUBIC, NEAREST interpolation modes."; + THROW_CPU_NODE_ERR("supports only BILINEAR, BICUBIC, NEAREST interpolation modes."); } switch (attributes.padding_mode) { case op::v9::GridSample::PaddingMode::ZEROS: @@ -88,7 +83,7 @@ GridSample::GridSample(const std::shared_ptr& op, const GraphContext:: paddingMode = GridSamplePaddingMode::REFLECTION; break; default: - THROW_ERROR << "supports only BORDER, REFLECTION, ZEROS paddings modes."; + THROW_CPU_NODE_ERR("supports only BORDER, REFLECTION, ZEROS paddings modes."); } } @@ -149,7 +144,7 @@ void GridSample::createPrimitive() { jitKernel.reset(new kernel::GridSampleKernel(jcp)); } if (!jitKernel) { - THROW_ERROR << " could not create JIT kernel."; + THROW_CPU_NODE_ERR("could not create JIT kernel."); } jitKernel->create_ker(); @@ -187,15 +182,15 @@ void GridSample::createPrimitive() { void GridSample::prepareParams() { auto dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) - THROW_ERROR << " has not allocated input data memory."; + THROW_CPU_NODE_ERR("has not allocated input data memory."); auto gridMemPtr = getParentEdgeAt(IN_GRID)->getMemoryPtr(); if (!gridMemPtr || !gridMemPtr->isAllocated()) - THROW_ERROR << " has not allocated input grid memory."; + THROW_CPU_NODE_ERR("has not allocated input grid memory."); auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_ERROR << " has not allocated output memory."; + THROW_CPU_NODE_ERR("has not allocated output memory."); if (getSelectedPrimitiveDescriptor() == nullptr) - THROW_ERROR << " has unidentified preferable primitive descriptor."; + THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor."); const uint64_t dataElPerVec = jitKernel->getDataElPerVec(); const auto& srcDataShape = dataMemPtr->getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp index 78b5f9d66710ca..0bbd337273a81f 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp @@ -7,10 +7,6 @@ #include #include "kernels/x64/grid_sample.hpp" -#include -#include -#include - namespace ov { namespace intel_cpu { namespace node { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp index 4f24e7ac2d7a34..dbc7aa08b79770 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp @@ -48,7 +48,7 @@ template void jitUniGatherKernel::create_ker() { auto code = x64::jit_generator::create_kernel(); if (code != dnnl::impl::status::success) - IE_THROW() << "Could not create Gather kernel. Error code: " << std::to_string(code); + OPENVINO_THROW("Could not create Gather kernel. Error code: ", std::to_string(code)); ker_ = (decltype(ker_))jit_ker(); } @@ -154,7 +154,7 @@ void jitUniGatherKernel::generate() { process(true, true); } else { // Long case. - IE_THROW() << "Gather kernel does not support static shape with after axis size greater than elements in vector."; + OPENVINO_THROW("Gather kernel does not support static shape with after axis size greater than elements in vector."); } } } else { // Dynamic shapes. @@ -526,7 +526,7 @@ template void jitUniGatherKernel::calcSrcShiftLongBlock(Vmm* vAuxPool, bool shiftFirst) { // Most likely there will no significant performance gain vs memcpy in reference implementation on big blocks after axis, // therefore no time was invested to this case yet. - IE_THROW() << "Unsupported case."; + OPENVINO_THROW("Unsupported case."); } // Requires vAuxPool length 3. diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp index aec991ba26360c..c061fa80a4c181 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.hpp @@ -22,6 +22,7 @@ #pragma once +#include "jit_kernel_base.hpp" #include "cpu/x64/jit_generator.hpp" #include diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp index 89e658a7d6a6fc..d70736dbe17234 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp @@ -30,7 +30,7 @@ template void GridSampleKernel::create_ker() { auto code = x64::jit_generator::create_kernel(); if (code != dnnl::impl::status::success) - IE_THROW() << "Could not create GridSample kernel. Error code: " << std::to_string(code); + OPENVINO_THROW("Could not create GridSample kernel. Error code: ", std::to_string(code)); ker_ = (decltype(ker_))jit_ker(); } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp index bc0daaf6e33e2a..3110e616a202ac 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp @@ -89,13 +89,13 @@ void JitKernelBase::uni_vpaddd(const Xbyak::Ymm& v_dst, paddd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]); vperm2f128(v_dst, v_dst, v_dst, 0x1); } else { - IE_THROW() << "Not supported operand type."; + OPENVINO_THROW("Not supported operand type."); } } else if (isValidIsa(x64::sse41)) { assert(v_dst.getIdx() != v_src.getIdx()); paddd(v_dst, op); } else { - IE_THROW() << "Not defined behavior for instruction 'vpaddd' in current instructions set."; + OPENVINO_THROW("Not defined behavior for instruction 'vpaddd' in current instructions set."); } } @@ -136,13 +136,13 @@ void JitKernelBase::uni_vpsubd(const Xbyak::Ymm& v_dst, psubd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]); vperm2f128(v_dst, v_dst, v_dst, 0x1); } else { - IE_THROW() << "Not supported operand type."; + OPENVINO_THROW("Not supported operand type."); } } else if (isValidIsa(x64::sse41)) { assert(v_dst.getIdx() != v_src.getIdx()); psubd(v_dst, op); } else { - IE_THROW() << "Not defined behavior for instruction 'vpsubd' in current instructions set."; + OPENVINO_THROW("Not defined behavior for instruction 'vpsubd' in current instructions set."); } } @@ -244,7 +244,7 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, const bool useMask, const bool zeroFill) { if (kReadMask.getIdx() == 0) { - IE_THROW() << "The vpgatherdd instruction cannot use the register k0 as mask."; + OPENVINO_THROW("The vpgatherdd instruction cannot use the register k0 as mask."); } if (!useMask) kxnord(kReadMask, kReadMask, kReadMask); @@ -261,7 +261,7 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, const bool useMask, const bool zeroFill) { if (v_dst.getIdx() == vSrcShift.getIdx() || v_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { - IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same."; + OPENVINO_THROW("Any pair of the index, mask, or destination registers cannot be the same."); } if (zeroFill) pxor(v_dst, v_dst); // Don't use vpxor. It zeros the rest of the YMM register. @@ -299,7 +299,7 @@ void JitKernelBase::gatherdd(const Xbyak::Ymm& v_dst, const bool useMask, const bool zeroFill) { if (v_dst.getIdx() == vSrcShift.getIdx() || v_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { - IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same."; + OPENVINO_THROW("Any pair of the index, mask, or destination registers cannot be the same."); } if (isValidIsa(x64::avx2)) { if (!useMask) @@ -430,7 +430,7 @@ void JitKernelBase::fillRestWorkMask(const Xbyak::Xmm& xmmDstMask, const Xbyak::Reg64& rWorkRest, const uint64_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { - IE_THROW() << "Could not fill data with type size " << typeSize; + OPENVINO_THROW("Could not fill data with type size ", typeSize); } Xbyak::Label lEnd; auto r32Ones = getReg32(); @@ -459,7 +459,7 @@ void JitKernelBase::fillRestWorkMask(const Xbyak::Ymm& ymmDstMask, const Xbyak::Reg64& rWorkRest, const uint64_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { - IE_THROW() << "Could not fill data with type size " << typeSize; + OPENVINO_THROW("Could not fill data with type size ", typeSize); } Xbyak::Label lEnd; auto elPerVec = x64::cpu_isa_traits::vlen / typeSize; @@ -499,7 +499,7 @@ void JitKernelBase::load(const Xbyak::Xmm& v_dst, const size_t typeSize, const bool zeroFilling) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { - IE_THROW() << "Could not load data with type size " << typeSize; + OPENVINO_THROW("Could not load data with type size ", typeSize); } const uint8_t elPerVec = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; @@ -529,7 +529,7 @@ void JitKernelBase::load(const Xbyak::Ymm& v_dst, const size_t typeSize, const bool zeroFilling) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { - IE_THROW() << "Could not load data with type size " << typeSize; + OPENVINO_THROW("Could not load data with type size ", typeSize); } const size_t elPerXmm = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; @@ -568,7 +568,7 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr, const Xbyak::Reg64& rToStoreNum, const size_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { - IE_THROW() << "Could not store data with type size " << typeSize; + OPENVINO_THROW("Could not store data with type size ", typeSize); } Xbyak::Label lEnd; const size_t elPerVec = x64::cpu_isa_traits::vlen / typeSize; @@ -596,7 +596,7 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr, const Xbyak::Reg64& rToStoreNum, const size_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { - IE_THROW() << "Could not store data with type size " << typeSize; + OPENVINO_THROW("Could not store data with type size ", typeSize); } Xbyak::Label lEnd; Xbyak::Xmm xmmSrc(v_src.getIdx()); diff --git a/src/plugins/intel_cpu/src/nodes/random_uniform.cpp b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp index 77d823710c942f..49584ae62dd653 100644 --- a/src/plugins/intel_cpu/src/nodes/random_uniform.cpp +++ b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp @@ -6,9 +6,8 @@ #include "ie_parallel.hpp" #include "ie_ngraph_utils.hpp" -#include -#include -#include "shape_inference/custom/random_uniform.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/random_uniform.hpp" namespace ov { namespace intel_cpu { @@ -27,7 +26,7 @@ bool RandomUniform::isSupportedOperation(const std::shared_ptr& } RandomUniform::RandomUniform(const std::shared_ptr& op, const GraphContext::CPtr& context) - : Node(op, context, RandomUniformShapeInferFactory(op)) { + : Node(op, context, NgraphShapeInferFactory(op, PortMask(0, 1, 2))) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { THROW_CPU_NODE_ERR(errorMessage); diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp deleted file mode 100644 index cca3c74cce86b0..00000000000000 --- a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "random_uniform.hpp" -#include - -namespace ov { -namespace intel_cpu { -namespace node { - -// TODO: remove after fixing the issue 123011 -IShapeInfer::Result RandomUniformShapeInfer::infer( - const std::vector>& input_shapes, - const std::unordered_map& data_dependency) { - VectorDims dims; - const auto& mem = data_dependency.at(0); - const auto rank = mem->getShape().getElementsCount(); - auto shape_prc = mem->getDesc().getPrecision(); - switch (shape_prc) { - case InferenceEngine::Precision::I32: { - auto data = reinterpret_cast(mem->getData()); - dims.assign(data, data + rank); - } break; - case InferenceEngine::Precision::I64: { - auto data = reinterpret_cast(mem->getData()); - dims.assign(data, data + rank); - } break; - default: - OPENVINO_THROW("Unexpected Shape input precision: ", shape_prc); - } - - return {{dims}, ShapeInferStatus::success}; -} - -RandomUniformShapeInferFactory::RandomUniformShapeInferFactory(const std::shared_ptr& op) : m_op(op) { - OPENVINO_ASSERT(ov::is_type(m_op), - "Unexpected op type in RandomUniform shape inference factory: ", m_op->get_type_name()); -} - -ShapeInferPtr RandomUniformShapeInferFactory::makeShapeInfer() const { - return std::make_shared(); -} - -} // namespace node -} // namespace intel_cpu -} // namespace ov diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp deleted file mode 100644 index ce87a966a9cbc9..00000000000000 --- a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "shape_inference/shape_inference_cpu.hpp" -#include - -#pragma once - -namespace ov { -namespace intel_cpu { -namespace node { - -class RandomUniformShapeInfer : public ShapeInferEmptyPads { -public: - explicit RandomUniformShapeInfer() {} - IShapeInfer::Result infer( - const std::vector>& input_shapes, - const std::unordered_map& data_dependency) override; - - port_mask_t get_port_mask() const override { - return PortMask(0); - } -}; - -class RandomUniformShapeInferFactory : public ShapeInferFactory { -public: - explicit RandomUniformShapeInferFactory(const std::shared_ptr& op); - ShapeInferPtr makeShapeInfer() const override; - -private: - std::shared_ptr m_op; -}; - -} // namespace node -} // namespace intel_cpu -} // namespace ov From 7874adb58ef81952a408fa91ebce338c5229be0c Mon Sep 17 00:00:00 2001 From: Evgenya Nugmanova Date: Wed, 25 Oct 2023 11:54:47 +0400 Subject: [PATCH 048/275] [Symbolic SI] Refactor Table of Equivalence (#20627) --- src/core/src/dimension_tracker.cpp | 41 ++++++++++---------- src/core/tests/dimension.cpp | 60 +++++++++++++++--------------- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/src/core/src/dimension_tracker.cpp b/src/core/src/dimension_tracker.cpp index 07bb1f52435657..4de869ead861e9 100644 --- a/src/core/src/dimension_tracker.cpp +++ b/src/core/src/dimension_tracker.cpp @@ -8,26 +8,23 @@ using namespace ov; void TableOfEquivalence::set_as_equal(const Dimension& lhs, const Dimension& rhs) { const auto &l_label = DimensionTracker::get_label(lhs), r_label = DimensionTracker::get_label(rhs); - bool l_known = dimension_table_of_equivalence.count(l_label) && dimension_table_of_equivalence[l_label], - r_known = dimension_table_of_equivalence.count(r_label) && dimension_table_of_equivalence[r_label]; - if (l_known && r_known) { - auto soup_l = dimension_table_of_equivalence[l_label]; - soup_l->insert(r_label); - auto soup_r = dimension_table_of_equivalence[r_label]; - soup_r->insert(l_label); - soup_l->insert(soup_r->begin(), soup_r->end()); - soup_r->insert(soup_l->begin(), soup_l->end()); - } else { - auto soup = std::make_shared>(); - if (l_known) - soup = dimension_table_of_equivalence[l_label]; - else if (r_known) - soup = dimension_table_of_equivalence[r_label]; - soup->insert(l_label); - soup->insert(r_label); - dimension_table_of_equivalence[l_label] = soup; - dimension_table_of_equivalence[r_label] = soup; - } + if (l_label == ov::no_label || r_label == ov::no_label) + // TODO after value restriction enabling: non labeled dim propagates restriction (if any) to labeled dim + return; + + auto get_soup = [](const label_t& label, EqTable& table) -> EqualitySoup { + if (!table.count(label) || !table.at(label)) + table[label] = std::make_shared>(std::set{label}); + return table.at(label); + }; + + auto l_soup = get_soup(l_label, dimension_table_of_equivalence); + auto r_soup = get_soup(r_label, dimension_table_of_equivalence); + if (r_soup->size() > l_soup->size()) // we would like to minimize number of iterations in the following for-loop + std::swap(l_soup, r_soup); + l_soup->insert(r_soup->begin(), r_soup->end()); + for (const auto& label : *r_soup) + dimension_table_of_equivalence[label] = l_soup; } const ValTable& TableOfEquivalence::get_value_equivalence_table() const { @@ -43,7 +40,9 @@ label_t TableOfEquivalence::get_next_label() { } bool TableOfEquivalence::are_equal(const Dimension& lhs, const Dimension& rhs) { - const auto &l_label = DimensionTracker::get_label(lhs), r_label = DimensionTracker::get_label(rhs); + if (!DimensionTracker::has_label(lhs) || !DimensionTracker::has_label(rhs)) + return false; + const auto &l_label = DimensionTracker::get_label(lhs), &r_label = DimensionTracker::get_label(rhs); if (l_label == r_label) return true; if (dimension_table_of_equivalence.count(l_label) && dimension_table_of_equivalence[l_label]) diff --git a/src/core/tests/dimension.cpp b/src/core/tests/dimension.cpp index 69eb16a6ded9d7..262d7c75ea4561 100644 --- a/src/core/tests/dimension.cpp +++ b/src/core/tests/dimension.cpp @@ -127,38 +127,38 @@ TEST(dimension, dimension_equality) { DimensionTracker dt(te); // labeling dimensions - Dimension A, B, C; - dt.set_up_for_tracking(A); - dt.set_up_for_tracking(B); - dt.set_up_for_tracking(C); + PartialShape dimensions = PartialShape::dynamic(5); // A, B, C, D, E + for (auto& dimension : dimensions) + dt.set_up_for_tracking(dimension); // checking labels are unique - EXPECT_NE(DimensionTracker::get_label(A), no_label); - EXPECT_NE(DimensionTracker::get_label(B), no_label); - EXPECT_NE(DimensionTracker::get_label(C), no_label); - EXPECT_NE(DimensionTracker::get_label(A), DimensionTracker::get_label(B)); - EXPECT_NE(DimensionTracker::get_label(B), DimensionTracker::get_label(C)); - EXPECT_NE(DimensionTracker::get_label(A), DimensionTracker::get_label(C)); - EXPECT_EQ(DimensionTracker::get_label(A), DimensionTracker::get_label(A)); - EXPECT_EQ(DimensionTracker::get_label(B), DimensionTracker::get_label(B)); - EXPECT_EQ(DimensionTracker::get_label(C), DimensionTracker::get_label(C)); - - // setting A == B and B == C - te->set_as_equal(A, B); - te->set_as_equal(C, B); - - // expected to see A == B, B == C and A == C - EXPECT_TRUE(te->are_equal(A, B)); - EXPECT_TRUE(te->are_equal(A, C)); - EXPECT_TRUE(te->are_equal(B, C)); + for (const auto& dimension : dimensions) + EXPECT_NE(DimensionTracker::get_label(dimension), no_label); + + for (const auto& lhs : dimensions) { + for (const auto& rhs : dimensions) { + if (&lhs == &rhs) + continue; + EXPECT_NE(DimensionTracker::get_label(lhs), DimensionTracker::get_label(rhs)); + EXPECT_FALSE(te->are_equal(lhs, rhs)); + } + } + + te->set_as_equal(dimensions[0], dimensions[1]); // A == B + te->set_as_equal(dimensions[3], dimensions[4]); // D == E + te->set_as_equal(dimensions[2], dimensions[3]); // C == D + te->set_as_equal(dimensions[1], dimensions[2]); // B == C + + // expected to see A == B == C == D == E + for (const auto& lhs : dimensions) + for (const auto& rhs : dimensions) + EXPECT_TRUE(te->are_equal(lhs, rhs)); // clear up all the tracking info - DimensionTracker::reset_tracking_info(A); - DimensionTracker::reset_tracking_info(B); - DimensionTracker::reset_tracking_info(C); - - // expected to have no label - EXPECT_EQ(DimensionTracker::get_label(A), no_label); - EXPECT_EQ(DimensionTracker::get_label(B), no_label); - EXPECT_EQ(DimensionTracker::get_label(C), no_label); + for (auto& dimension : dimensions) + DimensionTracker::reset_tracking_info(dimension); + + // checking labels are unique + for (const auto& dimension : dimensions) + EXPECT_EQ(DimensionTracker::get_label(dimension), no_label); } From e1a33f10d5b4b8bf1efe05dc3875a1ad63f10b96 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 25 Oct 2023 10:19:14 +0200 Subject: [PATCH 049/275] [core]Migrate Slice to new API (#20417) * Migrate slice to new API * Remove visit_attributes, is same as base class * Move shape checks to shape_infer - minor refactor Slice op * Move `get_tensors_partial_shapes` to dev API * Correct comment Co-authored-by: Tomasz Jankowski --------- Co-authored-by: Tomasz Jankowski --- src/core/include/openvino/op/slice.hpp | 7 +- .../include/slice_shape_inference.hpp | 14 +- src/core/src/op/slice.cpp | 249 +++++++----------- 3 files changed, 101 insertions(+), 169 deletions(-) diff --git a/src/core/include/openvino/op/slice.hpp b/src/core/include/openvino/op/slice.hpp index 934e6896f1629e..bb36ea4cac02da 100644 --- a/src/core/include/openvino/op/slice.hpp +++ b/src/core/include/openvino/op/slice.hpp @@ -4,6 +4,7 @@ #pragma once +#include "openvino/op/constant.hpp" #include "openvino/op/op.hpp" namespace ov { @@ -40,14 +41,10 @@ class OPENVINO_API Slice : public Op { const Output& axes); void validate_and_infer_types() override; - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START bool has_evaluate() const override; - // TODO: Update to use new evaluate with TensorVector - bool evaluate(const HostTensorVector&, const HostTensorVector&) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector&, const TensorVector&) const override; bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; bool evaluate_label(TensorLabelVector& output_labels) const override; diff --git a/src/core/shape_inference/include/slice_shape_inference.hpp b/src/core/shape_inference/include/slice_shape_inference.hpp index cd6c3b018bdf68..04461484a81154 100644 --- a/src/core/shape_inference/include/slice_shape_inference.hpp +++ b/src/core/shape_inference/include/slice_shape_inference.hpp @@ -57,6 +57,14 @@ std::vector shape_infer(const Slice* op, const auto& input_shape = input_shapes[0]; const auto& input_rank = input_shape.rank(); + // it is not possible to define output shape if input data shape rank is undefined + // even if lengths of begin, end, or strides are defined + if (input_rank.is_dynamic()) { + return {PartialShape::dynamic()}; + } else { + NODE_SHAPE_INFER_CHECK(op, input_shapes, input_rank.get_length() > 0, "Slice `data` input can't be a scalar."); + } + for (size_t i = 1; i < input_shapes.size(); ++i) { const auto& shape = input_shapes[i]; const auto& shape_rank = shape.rank(); @@ -87,12 +95,6 @@ std::vector shape_infer(const Slice* op, "Slice `start`, `stop`, `step` inputs must have compatible shapes."); auto output_shapes = std::vector(1); - // it is not possible to define output shape if input data shape rank is undefined - // even the lengths of begin, end, or strides are defined - if (input_rank.is_dynamic()) { - output_shapes[0] = PartialShape::dynamic(); - return output_shapes; - } // compute constant values of begin, end, and strides if possible const auto start = get_input_bounds(op, 1, ta); diff --git a/src/core/src/op/slice.cpp b/src/core/src/op/slice.cpp index f08a885bea280e..0bca5274b15ff2 100644 --- a/src/core/src/op/slice.cpp +++ b/src/core/src/op/slice.cpp @@ -2,223 +2,156 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/slice.hpp" - -#include +#include "openvino/op/slice.hpp" #include "bound_evaluate.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/graph_util.hpp" -#include "ngraph/op/constant.hpp" #include "openvino/reference/slice.hpp" #include "slice_shape_inference.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace { +std::vector default_axes(const size_t n) { + std::vector axes; + axes.reserve(n); + std::generate_n(std::back_inserter(axes), n, SeqGen(0)); + return axes; +} + +bool slice_bound_check(const ov::Node* const node) { + return ov::have_node_inputs_bounds_set(node, 1, node->get_input_size() - 1); +} + +bool slice_no_axes(const Node* const node) { + return node->get_input_size() < 5; +} +} // namespace + +namespace v8 { +using ov::op::v0::Constant; -op::v8::Slice::Slice(const Output& data, - const Output& start, - const Output& stop, - const Output& step) +Slice::Slice(const Output& data, const Output& start, const Output& stop, const Output& step) : Op({data, start, stop, step}) { constructor_validate_and_infer_types(); } -op::v8::Slice::Slice(const Output& data, - const Output& start, - const Output& stop, - const Output& step, - const Output& axes) +Slice::Slice(const Output& data, + const Output& start, + const Output& stop, + const Output& step, + const Output& axes) : Op({data, start, stop, step, axes}) { constructor_validate_and_infer_types(); } -bool op::v8::Slice::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v8_Slice_visit_attributes); - return true; -} - -std::shared_ptr op::v8::Slice::get_default_const_axes(const Output& start) const { - const auto start_pshape = start.get_partial_shape(); +std::shared_ptr Slice::get_default_const_axes(const Output& start) const { + const auto& start_pshape = start.get_partial_shape(); // Static case - if (start_pshape.rank().is_static() && start_pshape.rank().get_length() == 1 && start_pshape[0].is_static()) { - size_t axes_length = start_pshape[0].get_length(); - std::vector axes(axes_length); - std::iota(axes.begin(), axes.end(), 0); - return v0::Constant::create(element::i64, Shape{axes_length}, axes); + if (start_pshape.is_static() && start_pshape.size() == 1) { + const auto axes = default_axes(static_cast(start_pshape[0].get_length())); + return Constant::create(element::i64, start_pshape.get_shape(), axes); } else { // Dynamic case return {}; } -} +} // namespace ov -void op::v8::Slice::validate_and_infer_types() { +void Slice::validate_and_infer_types() { OV_OP_SCOPE(v8_Slice_validate_and_infer_types); - const auto inputs_size = get_input_size(); - NODE_VALIDATION_CHECK(this, - inputs_size == 4 || inputs_size == 5, - "Slice has to have 4 or 5 inputs. Got: ", - inputs_size); - - const PartialShape& data_shape = get_input_partial_shape(0); - const auto& data_rank = data_shape.rank(); - - NODE_VALIDATION_CHECK(this, - data_rank.is_dynamic() || data_rank.get_length() > 0, - "Slice `data` input can't be a scalar."); - - if (get_input_size() < 5) { + if (slice_no_axes(this)) { if (auto axes_const = get_default_const_axes(input_value(1))) { set_argument(4, axes_const); } } - for (size_t i = 0; i < get_input_size(); ++i) { - if (i > 0) { - NODE_VALIDATION_CHECK(this, - get_input_element_type(i).is_integral_number(), - "Slice `", - slice::shape_names[i - 1], - "` input type must be integer."); - } - - set_input_is_relevant_to_shape(i); - } - OPENVINO_SUPPRESS_DEPRECATED_START const auto input_shapes = get_node_input_partial_shapes(*this); OPENVINO_SUPPRESS_DEPRECATED_END - const auto output_shapes = shape_infer(this, input_shapes); + + set_input_is_relevant_to_shape(0); + for (size_t i = 1; i < get_input_size(); ++i) { + NODE_VALIDATION_CHECK(this, + get_input_element_type(i).is_integral_number(), + "Slice `", + slice::shape_names[i - 1], + "` input type must be integer."); + set_input_is_relevant_to_shape(i); + } + set_output_type(0, get_input_element_type(0), output_shapes.front()); } -std::shared_ptr op::v8::Slice::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Slice::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v8_Slice_clone_with_new_inputs); check_new_args_count(this, new_args); if (new_args.size() == 4) { - return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); } else { - return std::make_shared(new_args.at(0), - new_args.at(1), - new_args.at(2), - new_args.at(3), - new_args.at(4)); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4)); } } -bool op::v8::Slice::has_evaluate() const { +bool Slice::has_evaluate() const { OV_OP_SCOPE(v8_Slice_has_evaluate); - switch (get_input_element_type(1)) { - case ngraph::element::i8: - case ngraph::element::i16: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u16: - case ngraph::element::u32: - case ngraph::element::u64: - break; - default: - return false; - } - if (get_input_size() > 4) { - switch (get_input_element_type(4)) { - case ngraph::element::i8: - case ngraph::element::i16: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u16: - case ngraph::element::u32: - case ngraph::element::u64: - break; + const auto valid_integral_type = [](const element::Type& et) -> bool { + switch (et) { + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: + return true; default: return false; } - } + }; - return true; + return valid_integral_type(get_input_element_type(1)) && + (slice_no_axes(this) || valid_integral_type(get_input_element_type(4))); } -OPENVINO_SUPPRESS_DEPRECATED_START -bool op::v8::Slice::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Slice::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v8_Slice_evaluate); - OPENVINO_ASSERT(inputs.size() >= 4, "Slice evaluate needs at least 4 inputs."); - - // Static HostTensor data shape is needed to clamp and normalize `start` values - OPENVINO_ASSERT(inputs[0]->get_partial_shape().is_static(), - "Can't evaluate Slice elements without static HostTensor data shape."); - - auto input_shapes = std::vector(); - input_shapes.reserve(inputs.size()); - - for (size_t i = 0; i < inputs.size(); ++i) { - auto&& tensor = inputs[i]; - input_shapes.push_back(tensor->get_partial_shape()); - } - - OPENVINO_SUPPRESS_DEPRECATED_START - const auto starts = host_tensor_2_vector(inputs[1]); - const auto stops = host_tensor_2_vector(inputs[2]); - const auto steps = host_tensor_2_vector(inputs[3]); - std::vector axes; - if (inputs.size() < 5) { - axes.reserve(starts.size()); - std::generate_n(std::back_inserter(axes), starts.size(), SeqGen(0)); - } else { - axes = host_tensor_2_vector(inputs[4]); - } - OPENVINO_SUPPRESS_DEPRECATED_END - - const auto output_shapes = shape_infer(this, input_shapes, make_tensor_accessor(inputs)); - OPENVINO_ASSERT(output_shapes.front().is_static(), "Can't calculate static output shape for Slice evaluation."); - - outputs[0]->set_shape(output_shapes.front().to_shape()); - outputs[0]->set_element_type(inputs[0]->get_element_type()); - - ov::reference::slice(inputs[0]->get_data_ptr(), - inputs[0]->get_shape(), - outputs[0]->get_data_ptr(), - outputs[0]->get_shape(), - inputs[0]->get_element_type().size(), - starts, - steps, - axes); - return true; -} -OPENVINO_SUPPRESS_DEPRECATED_END - -namespace { -bool slice_input_check(const ov::Node* node) { - if (!node->get_input_tensor(1).has_and_set_bound()) - return false; - if (!node->get_input_tensor(2).has_and_set_bound()) - return false; - if (!node->get_input_tensor(3).has_and_set_bound()) - return false; - if (node->get_input_size() == 5 && !node->get_input_tensor(4).has_and_set_bound()) - return false; + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + outputs[0].set_shape(output_shapes.front().to_shape()); + + const auto starts = ov::get_tensor_data_as(inputs[1]); + const auto steps = ov::get_tensor_data_as(inputs[3]); + const auto axes = slice_no_axes(this) ? default_axes(starts.size()) : ov::get_tensor_data_as(inputs[4]); + + reference::slice(static_cast(inputs[0].data()), + inputs[0].get_shape(), + static_cast(outputs[0].data()), + outputs[0].get_shape(), + inputs[0].get_element_type().size(), + starts, + steps, + axes); return true; } -} // namespace -bool op::v8::Slice::evaluate_lower(ov::TensorVector& output_values) const { - return slice_input_check(this) && default_lower_bound_evaluator(this, output_values); +bool Slice::evaluate_lower(ov::TensorVector& output_values) const { + return slice_bound_check(this) && default_lower_bound_evaluator(this, output_values); } -bool op::v8::Slice::evaluate_upper(ov::TensorVector& output_values) const { - return slice_input_check(this) && default_upper_bound_evaluator(this, output_values); +bool Slice::evaluate_upper(ov::TensorVector& output_values) const { + return slice_bound_check(this) && default_upper_bound_evaluator(this, output_values); } -bool op::v8::Slice::evaluate_label(TensorLabelVector& output_labels) const { - if (!slice_input_check(this)) - return false; +bool Slice::evaluate_label(TensorLabelVector& output_labels) const { OPENVINO_SUPPRESS_DEPRECATED_START - return default_label_evaluator(this, output_labels); + return slice_bound_check(this) && default_label_evaluator(this, output_labels); OPENVINO_SUPPRESS_DEPRECATED_END } +} // namespace v8 +} // namespace op +} // namespace ov From d138dd2f08d129c7afb74daddd110fda4494e2c8 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Wed, 25 Oct 2023 12:37:04 +0400 Subject: [PATCH 050/275] [CONFORMANCE][SUBGRAPHS DUMPER] Fix issue connected with `fused_names` extractor (#20522) * [CONFORMANCE][SUBGRAPHS DUMPER] Fix issue connected with extractor * fix build --- .../subgraphs_dumper/include/cache/cache.hpp | 4 ++-- .../subgraphs_dumper/include/cache/graph_cache.hpp | 2 +- .../subgraphs_dumper/include/gflag_config.hpp | 8 ++++++-- .../include/matchers/subgraph/fused_names.hpp | 2 -- .../subgraphs_dumper/include/utils/model.hpp | 3 ++- .../subgraphs_dumper/src/cache/graph_cache.cpp | 8 ++++---- .../plugin/conformance/subgraphs_dumper/src/main.cpp | 10 ++++++++++ .../src/matchers/subgraph/fused_names.cpp | 9 ++------- .../conformance/subgraphs_dumper/src/utils/model.cpp | 7 ++----- .../conformance/subgraphs_dumper/tests/cache/cache.cpp | 4 ++-- .../subgraphs_dumper/tests/cache/op_cache.cpp | 4 ++-- 11 files changed, 33 insertions(+), 28 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp index 8f762e5cbacf8c..a35eca0e7ad619 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp @@ -36,8 +36,8 @@ class ICache { model_bytesize_gb >>= 30; auto mem_size_gb = mem_size; mem_size_gb >>= 30; - // std::cout << "[ WARNING ] Model " << model_path << " bytesize is " << model_bytesize_gb << - // "is larger than RAM size: " << mem_size_gb << ". Model will be skipped!" << std::endl; + std::cout << "[ WARNING ] Model " << model_path << " bytesize is " << model_bytesize_gb << + "is larger than RAM size: " << mem_size_gb << ". Model will be skipped!" << std::endl; return true; } return false; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp index 6bcedde7f9a114..c3f6ae6aecf971 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp @@ -53,7 +53,7 @@ class GraphCache : public ICache { GraphCache(const std::string& device = "") { ExtractorsManager::ExtractorsMap matchers = { // temporary disabling according mem leaks in CI and not using swap mem - // { "fused_names", FusedNamesExtractor::Ptr(new FusedNamesExtractor(device)) }, + { "fused_names", FusedNamesExtractor::Ptr(new FusedNamesExtractor(device)) }, { "repeat_pattern", RepeatPatternExtractor::Ptr(new RepeatPatternExtractor) }, }; m_manager.set_extractors(matchers); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp index c1a999f190227c..298397c433ecf9 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp @@ -17,16 +17,19 @@ static const char path_regex_message[] = "Optional. regular expression to be app "folders recursive discovery"; static const char extract_body_message[] = "Optional. Allow to extract operation bodies to operation cache."; static const char cache_type_message[] = "Optional. Specify caching type: OP, GRAPH. The default value is both"; -static const char device_message[] = "Optional. Specify device to compile model for `fused_names` extractor. Default is `TEMPLATE` "; +static const char device_message[] = "Optional. Specify device to compile model for `fused_names` extractor. Default is `CPU` "; +static const char target_plugin_message[] = + "Optional. Name of plugin library. The example is `/path/to/libopenvino_template_plugin.so`. Use only with unregistered in OV Core devices"; DEFINE_bool(h, false, help_message); DEFINE_string(input_folders, "", local_cache_message); DEFINE_string(local_cache, "", input_folders_message); DEFINE_string(output_folder, "output", output_folder_message); -DEFINE_string(device, ov::test::utils::DEVICE_TEMPLATE, device_message); +DEFINE_string(device, "CPU", device_message); DEFINE_string(path_regex, ".*", output_folder_message); DEFINE_bool(extract_body, true, extract_body_message); DEFINE_string(cache_type, "", cache_type_message); +DEFINE_string(plugin_lib_name, "", target_plugin_message); /** * @brief This function shows a help message @@ -44,5 +47,6 @@ static void showUsage() { std::cout << " --extract_body \"\" " << extract_body_message << "\n"; std::cout << " --cache_type \"\" " << cache_type_message << "\n"; std::cout << " --device \"\" " << device_message << "\n"; + std::cout << " --plugin_lib_name \"\" " << output_folder_message << std::endl; std::cout << std::flush; } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp index d9a78fe51220ce..9d621f8ebda905 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/subgraph/fused_names.hpp @@ -15,7 +15,6 @@ namespace subgraph_dumper { class FusedNamesExtractor final : public SubgraphExtractor { public: FusedNamesExtractor(const std::string& device = ""); - ~FusedNamesExtractor(); std::vector extract(const std::shared_ptr &modele) override; @@ -24,7 +23,6 @@ class FusedNamesExtractor final : public SubgraphExtractor { void set_target_device(const std::string& _device); std::string device; - std::shared_ptr core; }; } // namespace subgraph_dumper diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp index 8b48b2074df711..bfae8ea874b2c0 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp @@ -15,7 +15,6 @@ #include "common_test_utils/file_utils.hpp" #include "common_test_utils/test_constants.hpp" -#include "functional_test_utils/ov_plugin_cache.hpp" #include "cache/cache.hpp" #include "utils/node.hpp" @@ -62,6 +61,8 @@ static std::map model_cache_status_to_str = { { ModelCacheStatus::LARGE_MODELS_INCLUDED, "large_models_included" }, }; +const std::shared_ptr core = std::make_shared(); + std::pair, std::pair>> find_models(const std::vector &dirs, const std::string& regexp = ".*"); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp index 51ee4835419c15..824f611a6e8808 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp @@ -38,7 +38,7 @@ void GraphCache::update_cache(const std::shared_ptr& model, // check that Free RAM memory is enough. Serialize in other case // serialize graph cache in case graph cache bytesize > 4GB to avoid long search the same graphs if (m_graph_cache_bytesize + 2 * model_bytesize > mem_size || m_graph_cache_bytesize >> 20 != 0) { - // std::cout << "[ GRAPH CACHE ][ WARNING ] There are not enought RAM memory! Serialize graph cache" << std::endl; + std::cout << "[ GRAPH CACHE ][ WARNING ] There are not enought RAM memory! Serialize graph cache" << std::endl; serialize_cache(); m_graph_cache_bytesize = 0; } @@ -48,8 +48,8 @@ void GraphCache::update_cache(const std::shared_ptr& model, model_bytesize_gb >>= 30; auto mem_size_gb = mem_size; mem_size_gb >>= 30; - // std::cout << "[ GRAPH CACHE ][ WARNING ] Model bytesize is " << model_bytesize_gb << - // "GB. It is larger than 25% RAM size: " << mem_size_gb << ". Constants won't be copied!" << std::endl; + std::cout << "[ GRAPH CACHE ][ WARNING ] Model bytesize is " << model_bytesize_gb << + "GB. It is larger than 25% RAM size: " << mem_size_gb << ". Constants won't be copied!" << std::endl; } auto extracted_patterns = m_manager.extract(model, extract_body, !is_large_model); if (extracted_patterns.empty()) { @@ -96,7 +96,7 @@ void GraphCache::update_cache(const std::shared_ptr& extracted_model, // std::cout << "[ GRAPH CACHE ][ INFO ] Reading cached model: " << serialized_model_path << std::endl; auto bin_path = ov::test::utils::replaceExt(serialized_model_path, ".bin"); auto meta_path = ov::test::utils::replaceExt(serialized_model_path, ".meta"); - auto cached_model = ov::test::utils::PluginCache::get().core()->read_model(serialized_model_path); + auto cached_model = core->read_model(serialized_model_path); auto cached_meta = MetaInfo::read_meta_from_file(meta_path); ov::test::utils::removeFile(serialized_model_path); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp index 4b678d8725132a..825e8f7648f19b 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp @@ -22,6 +22,16 @@ int main(int argc, char *argv[]) { return 0; } + if (!FLAGS_device.empty() && !FLAGS_plugin_lib_name.empty()) { + try { + core->register_plugin(FLAGS_plugin_lib_name + OV_BUILD_POSTFIX, FLAGS_device); + std::cout << "[ INFO ] Device: " << FLAGS_device << " is registred in OV core with " << FLAGS_plugin_lib_name << " lib" << std::endl; + } catch (const std::exception& e) { + std::cout << "[ ERROR ] Impossible to register device " << FLAGS_device << " with lib " << FLAGS_plugin_lib_name << + std::endl << e.what() << std::endl; + } + } + std::vector local_cache_dirs = ov::test::utils::splitStringByDelimiter(FLAGS_local_cache); std::vector dirs = ov::test::utils::splitStringByDelimiter(FLAGS_input_folders); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp index 6ce421e6c3aaaf..17c477d8a51f70 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp @@ -8,7 +8,6 @@ #include "openvino/op/loop.hpp" #include "common_test_utils/common_utils.hpp" -#include "functional_test_utils/ov_plugin_cache.hpp" #include "matchers/subgraph/fused_names.hpp" #include "utils/model.hpp" @@ -22,7 +21,8 @@ void FusedNamesExtractor::set_target_device(const std::string& _device) { std::cout << "[ WARNING ][ GRAPH CACHE ] " << device << " will be used for `fused_names` extractor" << std::endl; return; - } else if (std::find(available_devices.begin(), + } else if (_device != "TEMPLATE" && + std::find(available_devices.begin(), available_devices.end(), _device) == available_devices.end()) { std::string message = "Incorrect device "; @@ -49,14 +49,9 @@ FusedNamesExtractor::extract_compiled_model_names(const std::shared_ptr FusedNamesExtractor::extract(const std::shared_ptr &model) { auto compiled_op_name = extract_compiled_model_names(model); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp index b84a11b0b458c2..ef0dd60d0f771b 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp @@ -3,7 +3,6 @@ // #include "openvino/util/file_util.hpp" -#include "functional_test_utils/ov_plugin_cache.hpp" #include "utils/model.hpp" namespace ov { @@ -46,7 +45,6 @@ find_models(const std::vector &dirs, const std::string& regexp) { try { // models.emplace_back(file); if (ov::util::file_exists(model_file)) { - auto core = ov::test::utils::PluginCache::get().core(); auto model_size = core->read_model(model_file)->get_graph_size(); models_sorted_by_size.insert({ model_size, model_file}); } else { @@ -102,7 +100,6 @@ std::map> cache_models( { ModelCacheStatus::LARGE_MODELS_EXCLUDED, {} }, { ModelCacheStatus::LARGE_MODELS_INCLUDED, {} }, }; - auto core = ov::test::utils::PluginCache::get().core(); auto models_size = models.size(); for (size_t i = 0; i < models_size; ++i) { @@ -121,8 +118,8 @@ std::map> cache_models( cache_status[ModelCacheStatus::LARGE_MODELS_INCLUDED].push_back(model); } cache->update_cache(function, model, extract_body, from_cache); - } catch (std::exception) { - // std::cout << "[ ERROR ] Model processing failed with exception:" << std::endl << e.what() << std::endl; + } catch (std::exception& e) { + std::cout << "[ ERROR ] Model processing failed with exception:" << std::endl << e.what() << std::endl; model_status = ModelCacheStatus::NOT_FULLY_CACHED; } } catch (std::exception) { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/cache.cpp index 0450d05ab1c054..f803b390651858 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/cache.cpp @@ -13,6 +13,7 @@ #include "cache/cache.hpp" #include "cache/meta/meta_info.hpp" +#include "utils/model.hpp" #include "base_test.hpp" @@ -79,8 +80,7 @@ TEST_F(ICacheUnitTest, serialize_model) { if (!ov::util::file_exists(meta_path)) { throw std::runtime_error("Meta was not serilized!"); } - auto core = ov::Core(); - auto serialized_model = core.read_model(xml_path, bin_path); + auto serialized_model = ov::tools::subgraph_dumper::core->read_model(xml_path, bin_path); auto res = compare_functions(test_model, serialized_model, true, true, true, true, true, true); if (!res.first) { throw std::runtime_error("Serialized and runtime model are not equal!"); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp index 0ec25023f3801c..6900efd658b13f 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp @@ -12,6 +12,7 @@ #include "cache/op_cache.hpp" #include "utils/node.hpp" +#include "utils/model.hpp" #include "base_test.hpp" @@ -164,8 +165,7 @@ TEST_F(OpCacheUnitTest, serialize_op) { auto serialized_model_path = ov::util::path_join({test_artifacts_dir, "operation", "static", "Convert-0", "f16", "Convert-0_0.xml"}); ASSERT_TRUE(ov::util::file_exists(serialized_model_path)); - auto core = ov::Core(); - auto serialized_model = core.read_model(serialized_model_path); + auto serialized_model = core->read_model(serialized_model_path); auto res = compare_functions(test_model, serialized_model, true, false, true, true, true, false); ASSERT_TRUE(res.first); } From b3eb2810d70fb603824310369551bcad46a6ccb5 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 25 Oct 2023 10:39:50 +0200 Subject: [PATCH 051/275] [core]Migrate Negative operator to new API (#20609) * Migrate Negative operator to new API * Remove `visit_attributes` is same as base * Use std::negate instead of lambda --------- Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/negative.hpp | 5 +- .../include/openvino/reference/negate.hpp | 17 ++- src/core/src/op/negative.cpp | 100 +++++++----------- src/core/src/validation_util.cpp | 4 + 4 files changed, 58 insertions(+), 68 deletions(-) diff --git a/src/core/include/openvino/op/negative.hpp b/src/core/include/openvino/op/negative.hpp index 1b031446180782..db67628e8a6f9d 100644 --- a/src/core/include/openvino/op/negative.hpp +++ b/src/core/include/openvino/op/negative.hpp @@ -21,11 +21,8 @@ class OPENVINO_API Negative : public util::UnaryElementwiseArithmetic { /// \param arg Node that produces the input tensor. Negative(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/negate.hpp b/src/core/reference/include/openvino/reference/negate.hpp index d63755ba2ec88e..26f679e1288822 100644 --- a/src/core/reference/include/openvino/reference/negate.hpp +++ b/src/core/reference/include/openvino/reference/negate.hpp @@ -4,15 +4,22 @@ #pragma once -#include +#include +#include namespace ov { namespace reference { + +/** + * @brief Reference implementation of Negative operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ template -void negate(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = -arg[i]; - } +void negate(const T* arg, T* out, const size_t count) { + std::transform(arg, std::next(arg, count), out, std::negate()); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/negative.cpp b/src/core/src/op/negative.cpp index 1cbe44de659a4b..a34d29a479d83d 100644 --- a/src/core/src/op/negative.cpp +++ b/src/core/src/op/negative.cpp @@ -2,83 +2,65 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/negative.hpp" +#include "openvino/op/negative.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/reference/negate.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace negative { +struct Evaluate : element::NoAction { + using element::NoAction::visit; -op::Negative::Negative(const Output& arg) : UnaryElementwiseArithmetic(arg) { - constructor_validate_and_infer_types(); -} + template > + static result_type visit(const Tensor& arg0, Tensor& out, const size_t count) { + reference::negate(arg0.data(), out.data(), count); + return true; + } +}; +} // namespace negative -bool ngraph::op::v0::Negative::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Negative_visit_attributes); - return true; +namespace v0 { + +Negative::Negative(const Output& arg) : UnaryElementwiseArithmetic(arg) { + constructor_validate_and_infer_types(); } -shared_ptr op::Negative::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Negative::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Negative_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace negativeop { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::negate(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; + return std::make_shared(new_args.at(0)); } -bool evaluate_negative(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); - - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_negative, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_negative, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_negative, bf16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_negative, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_negative, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace negativeop - -bool op::Negative::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Negative::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_Negative_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(inputs, 1)); - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return negativeop::evaluate_negative(inputs[0], outputs[0], shape_size(inputs[0]->get_shape())); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); + + outputs[0].set_shape(inputs[0].get_shape()); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(inputs[0].get_shape())); } -bool op::Negative::has_evaluate() const { +bool Negative::has_evaluate() const { OV_OP_SCOPE(v0_Negative_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::bf16: + case element::f16: + case element::f32: + case element::i32: + case element::i64: return true; default: - break; + return false; } - return false; -} - -shared_ptr ngraph::operator-(const Output& arg0) { - return make_shared(arg0); } +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp index 4a7bd1958f1c53..7662229f2fa701 100644 --- a/src/core/src/validation_util.cpp +++ b/src/core/src/validation_util.cpp @@ -10,6 +10,7 @@ #include "bound_evaluate.hpp" #include "compare.hpp" #include "ngraph/evaluator.hpp" +#include "ngraph/op/negative.hpp" #include "openvino/core/dimension_tracker.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/gather.hpp" @@ -979,6 +980,9 @@ bool validate_host_tensor_vector(const HostTensorVector& tensor_vector, const si std::none_of(tensor_vector.cbegin(), tensor_vector.cend(), ov::cmp::Equal(nullptr)); } +std::shared_ptr operator-(const Output& arg0) { + return std::make_shared(arg0); +} } // namespace ngraph void ov::infer_auto_padding(const Shape& image_shape, From 3313d1ce7af552c413a05d3c1e13a5e40e988ae9 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 25 Oct 2023 12:45:31 +0400 Subject: [PATCH 052/275] [GPU] FC with 4-bit weights compression support for iGPU (#20572) --- ...dequantization_subgraph_transformation.cpp | 15 +- .../mark_dequantization_subgraph.cpp | 12 + .../intel_gpu/primitives/fully_connected.hpp | 27 +- .../intel_gpu/src/graph/fully_connected.cpp | 3 + .../prepare_primitive_fusing.cpp | 21 +- .../src/graph/impls/ocl/fully_connected.cpp | 4 + .../impls/ocl/kernel_selector_helper.cpp | 16 ++ .../fully_connected_gpu_bf_tiled.cl | 98 ++++++-- .../fully_connected_gpu_bfyx_ref.cl | 40 ++- .../include/batch_headers/int4_utils.cl | 101 ++++++++ .../cl_kernels/reorder_weights_int4.cl | 54 ++++ .../src/kernel_selector/common_types.h | 4 + .../intel_gpu/src/kernel_selector/jitter.cpp | 51 +++- .../intel_gpu/src/kernel_selector/jitter.h | 2 +- .../kernel_selector_common.cpp | 4 + .../kernel_selector_params.cpp | 24 ++ .../kernel_selector/kernel_selector_params.h | 2 + .../fully_connected_kernel_base.cpp | 19 +- .../fully_connected_kernel_bf_tiled.cpp | 31 ++- .../fully_connected_kernel_bfyx_ref.cpp | 8 + .../kernels/reorder/reorder_weights_int4.cpp | 68 +++++ .../kernels/reorder/reorder_weights_int4.h | 23 ++ .../reorder_weights_kernel_selector.cpp | 2 + .../src/kernel_selector/weight_bias_params.h | 2 + .../src/plugin/ops/fully_connected.cpp | 16 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 4 +- .../convert_fc_to_compressed.cpp | 14 +- .../src/plugin/transformations_pipeline.cpp | 7 +- .../dynamic/matmul_weights_decompression.cpp | 65 +++-- .../unit/test_cases/hash_key_gpu_test.cpp | 8 +- .../convert_fc_to_compressed_test.cpp | 237 ++++++++++++++++++ 31 files changed, 891 insertions(+), 91 deletions(-) create mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl create mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_int4.cl create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.cpp create mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.h create mode 100644 src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp diff --git a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp index 92eb885c24970f..1bb38b64647368 100644 --- a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp @@ -8,6 +8,7 @@ #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/rt_info/decompression.hpp" #include "transformations/rt_info/dequantization_node.hpp" +#include "transformations/rt_info/keep_const_precision.hpp" #include "common_test_utils/ov_test_utils.hpp" @@ -38,7 +39,8 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) { // // After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. - // Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute + // All 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute + // Weights and zero points are marked with 'KeepConstPrecision' attribute { auto parameter = std::make_shared(element::f32, Shape{1, 16, 14, 14}); @@ -107,10 +109,12 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) { } std::shared_ptr weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2}); + enable_keep_const_precision(weights); { auto convert = std::make_shared(weights, element::f32); pass::disable_constant_folding(convert); auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127}); + enable_keep_const_precision(zero_point); auto convert_on_zero_point = std::make_shared(zero_point, element::f32); pass::disable_constant_folding(convert_on_zero_point); auto subtract = std::make_shared(convert, convert_on_zero_point); @@ -157,6 +161,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint // After MarkDequantizationSubgraph all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute + // Weights node is marked with 'KeepConstPrecision' attribute { auto parameter = std::make_shared(element::f32, Shape{1, 16, 14, 14}); @@ -214,6 +219,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint } std::shared_ptr weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2}); + enable_keep_const_precision(weights); { auto convert = std::make_shared(weights, element::f32); pass::disable_constant_folding(convert); @@ -259,6 +265,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint // After MarkDequantizationSubgraph all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute + // Weights node is marked with 'KeepConstPrecision' attribute { auto parameter = std::make_shared(element::f32, Shape{1, 16, 14, 14}); @@ -323,6 +330,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint } std::shared_ptr weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2}); + enable_keep_const_precision(weights); { auto convert = std::make_shared(weights, element::f32); pass::disable_constant_folding(convert); @@ -373,6 +381,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstant // After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute + // Weights and zero point nodes are marked with 'KeepConstPrecision' attribute { auto parameter = std::make_shared(element::f32, Shape{1, 16, 14, 14}); @@ -395,10 +404,12 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstant } std::shared_ptr weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-3}); + enable_keep_const_precision(weights); { auto clamp = std::make_shared(weights, -2, 2); auto convert = std::make_shared(clamp, element::f32); auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127}); + enable_keep_const_precision(zero_point); auto convert_on_zero_point = std::make_shared(zero_point, element::f32); auto subtract = std::make_shared(convert, convert_on_zero_point); auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2}); @@ -488,6 +499,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationFoldSubCons // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' node before weights is marked with 'DisableConstantFolding' attribute // but Convert before Dequantization Sub const isn't because fold_subtract_const is set to true + // Weights node is marked with 'KeepConstPrecision' attribute { auto weights = opset10::Constant::create(element::u8, Shape{4, 16, 1, 1}, {3}); @@ -505,6 +517,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationFoldSubCons { auto weights = opset10::Constant::create(element::u8, Shape{4, 16, 1, 1}, {3}); + enable_keep_const_precision(weights); auto convert = std::make_shared(weights, element::f32); pass::disable_constant_folding(convert); auto zero_point = opset10::Constant::create(element::f32, Shape{}, {127}); diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index d79ac3bb8736ff..c2662dc77e2311 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -9,6 +9,7 @@ #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/rt_info/dequantization_node.hpp" #include "transformations/rt_info/disable_constant_folding.hpp" +#include "transformations/rt_info/keep_const_precision.hpp" #include "transformations/utils/utils.hpp" ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::TypeVector& precisions, @@ -62,6 +63,16 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element:: if (ov::op::util::is_on_constant_path(input)) { // disable ConstantFolding if dequantization subgraph is on constant data ov::disable_constant_folding(convert); + // It is also necessary to avoid precision conversion for constant nodes with input_precision + auto keep_const_precision = [&](Node* node) { + if (auto constant = ov::as_type(node)) { + const auto& const_et = constant->get_element_type(); + if (std::find(precisions.begin(), precisions.end(), const_et) != precisions.end()) + ov::enable_keep_const_precision(convert->get_input_node_shared_ptr(0)); + } + }; + std::unordered_set visited; + ov::op::util::visit_shape_path(input.get_node(), visited, keep_const_precision); } if (subtract_it != pattern_map.end()) { @@ -75,6 +86,7 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element:: // so we don't have to constantfold it and then convert it back to // low precision in LP transformations ov::disable_constant_folding(zero_point); + ov::enable_keep_const_precision(zero_point->get_input_node_shared_ptr(0)); } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp index dc438b691aaa9a..21e72b9e0e5a61 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp @@ -3,6 +3,7 @@ // #pragma once +#include "intel_gpu/runtime/optionals.hpp" #include "primitive.hpp" #include @@ -110,6 +111,7 @@ struct fully_connected : public primitive_base { bool compressed_weights = false; primitive_id decompression_scale = ""; primitive_id decompression_zero_point = ""; + optional_value decompression_zero_point_scalar = optional_value(); /// @brief Primitive dimension size. size_t input_size = 2; @@ -124,6 +126,8 @@ struct fully_connected : public primitive_base { seed = hash_combine(seed, compressed_weights); seed = hash_combine(seed, !decompression_scale.empty()); seed = hash_combine(seed, !decompression_zero_point.empty()); + seed = hash_combine(seed, decompression_zero_point_scalar.has_value()); + seed = hash_combine(seed, decompression_zero_point_scalar.value_or(0.0f)); return seed; } @@ -135,7 +139,11 @@ struct fully_connected : public primitive_base { return input_size == rhs_casted.input_size && weights_rank == rhs_casted.weights_rank && - bias.empty() == rhs_casted.bias.empty(); + bias.empty() == rhs_casted.bias.empty() && + compressed_weights == rhs_casted.compressed_weights && + decompression_scale.empty() == rhs_casted.decompression_scale.empty() && + decompression_zero_point.empty() == rhs_casted.decompression_zero_point.empty() && + decompression_zero_point_scalar.value_or(0.0f) == rhs_casted.decompression_zero_point_scalar.value_or(0.0f); } void save(BinaryOutputBuffer& ob) const override { @@ -147,6 +155,13 @@ struct fully_connected : public primitive_base { ob << decompression_zero_point; ob << input_size; ob << weights_rank; + + if (decompression_zero_point_scalar.has_value()) { + ob << true; + ob << make_data(&decompression_zero_point_scalar.value(), sizeof(float)); + } else { + ob << false; + } } void load(BinaryInputBuffer& ib) override { @@ -158,6 +173,16 @@ struct fully_connected : public primitive_base { ib >> decompression_zero_point; ib >> input_size; ib >> weights_rank; + + bool has_value; + ib >> has_value; + if (has_value) { + float decompression_zero_point_value = 0.f; + ib >> make_data(&decompression_zero_point_value, sizeof(float)); + decompression_zero_point_scalar = decompression_zero_point_value; + } else { + decompression_zero_point_scalar = optional_value(); + } } protected: diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index ed396907d7e1b1..4d65e5223e27dd 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -222,6 +222,9 @@ std::string fully_connected_inst::to_string(fully_connected_node const& node) { if (desc->compressed_weights) { fc_info.add("decompression scale id", desc->decompression_scale); fc_info.add("decompression zp id", desc->decompression_zero_point); + if (desc->decompression_zero_point_scalar.has_value()) { + fc_info.add("decompression zp value", desc->decompression_zero_point_scalar.value()); + } } node_info->add("fully connected info", fc_info); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index cc9d8602316fa5..167b77c74afc55 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -1200,8 +1200,25 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { next_node->is_type() || next_node->is_type()) { size_t weights_offset = next_node->get_primitive()->input_size(); - return &next_node->get_dependency(weights_offset) == node ? next_node - : nullptr; + std::vector valid_weights_indices = {next_node->get_primitive()->input_size()}; + if (next_node->is_type()) { + auto& fc = next_node->as(); + auto desc = fc.get_primitive(); + if (desc->compressed_weights) { + size_t scale_idx = weights_offset + (fc.bias_term() ? 2 : 1); + valid_weights_indices.push_back(scale_idx); + if (!desc->decompression_zero_point.empty()) { + valid_weights_indices.push_back(scale_idx + 1); + } + } + } + + for (auto& widx : valid_weights_indices) { + if (&next_node->get_dependency(widx) == node) { + return next_node; + } + } + return nullptr; } if (node->is_constant() && node->get_users().size() == 1) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 19007a481579f6..e3ac31cccc7cc9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -161,6 +161,10 @@ struct fully_connected_impl : typed_primitive_impl_ocl { if (with_zp) { params.has_decompression_zp = true; params.decompression_zero_point = convert_data_tensor(input_layouts[3]); + } else if (primitive->decompression_zero_point_scalar.has_value()) { + params.has_decompression_zp = true; + params.scalar_zp = true; + params.zp_value = primitive->decompression_zero_point_scalar.value(); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index f0872d3702970e..e9b98a57e0022f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -123,6 +123,10 @@ kernel_selector::data_type to_data_type(data_types dt) { switch (dt) { case cldnn::data_types::u1: return kernel_selector::data_type::BINARY; + case cldnn::data_types::i4: + return kernel_selector::data_type::INT4; + case cldnn::data_types::u4: + return kernel_selector::data_type::UINT4; case cldnn::data_types::i8: return kernel_selector::data_type::INT8; case cldnn::data_types::u8: @@ -144,6 +148,10 @@ data_types from_data_type(kernel_selector::data_type dt) { switch (dt) { case kernel_selector::data_type::BINARY: return cldnn::data_types::u1; + case kernel_selector::data_type::INT4: + return cldnn::data_types::i4; + case kernel_selector::data_type::UINT4: + return cldnn::data_types::u4; case kernel_selector::data_type::INT8: return cldnn::data_types::i8; case kernel_selector::data_type::UINT8: @@ -165,6 +173,10 @@ kernel_selector::weights_type to_weights_type(data_types dt) { switch (dt) { case cldnn::data_types::u1: return kernel_selector::weights_type::BINARY; + case cldnn::data_types::u4: + return kernel_selector::weights_type::UINT4; + case cldnn::data_types::i4: + return kernel_selector::weights_type::INT4; case cldnn::data_types::i8: return kernel_selector::weights_type::INT8; case cldnn::data_types::u8: @@ -184,6 +196,10 @@ data_types from_weights_type(kernel_selector::weights_type dt) { switch (dt) { case kernel_selector::weights_type::BINARY: return data_types::u1; + case kernel_selector::weights_type::INT4: + return data_types::i4; + case kernel_selector::weights_type::UINT4: + return data_types::u4; case kernel_selector::weights_type::INT8: return data_types::i8; case kernel_selector::weights_type::UINT8: diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index f6dacec4a73c80..dd8adb53e76726 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -38,10 +38,17 @@ # error "fully_connected_gpu_bf_tiled.cl - TILE_K_OFM must be equal to TILE_K * TILE_OFM and at most 8" #endif +#if COMPRESSED_WEIGHTS_INT4 +# if TILE_K_OFM != TILE_K_OFM_PACKED * 2 +# error "fully_connected_gpu_bf_tiled.cl - TILE_K_OFM must be divisible by 2 for 4-bit compressed case" +# endif +#endif + // Macros for vectorized types. #define INPUT_VEC_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, TILE_IFM) #define ACCUMULATOR_VEC_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, TILE_OFM) #define FILTER_VEC_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, TILE_K_OFM) +#define FILTER_PACKED_VEC_TYPE MAKE_VECTOR_TYPE(FILTER_TYPE, TILE_K_OFM_PACKED) #define BIAS_VEC_TYPE MAKE_VECTOR_TYPE(BIAS_TYPE, TILE_OFM) #define OUTPUT_VEC_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, TILE_OFM) #define ACTIVATION_VEC_TYPE MAKE_VECTOR_TYPE(ACTIVATION_TYPE, TILE_OFM) @@ -50,7 +57,7 @@ #define TO_FILTER_VEC_TYPE(x) CAT(convert_, FILTER_VEC_TYPE)(x) #define INPUT_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, TILE_IFM, ptr, offset) -#define FILTER_BLOCK_READ(ptr, offset) BLOCK_READN(FILTER_TYPE, TILE_K_OFM, ptr, offset) +#define FILTER_BLOCK_READ(ptr, offset) BLOCK_READN(FILTER_TYPE, TILE_K_OFM_PACKED, ptr, offset) #define BIAS_BLOCK_READ(ptr, offset) BLOCK_READN(BIAS_TYPE, TILE_OFM, ptr, offset) #define OUTPUT_BLOCK_WRITE(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, TILE_OFM, ptr, offset, val) @@ -85,7 +92,7 @@ KERNEL(fc)( #if DECOMPRESSION_SCALE_TERM const __global DECOMPRESSION_SCALE_TYPE* decompression_scale, #endif -#if DECOMPRESSION_ZP_TERM +#if DECOMPRESSION_ZP_TERM && !DECOMPRESSION_ZP_SCALAR const __global DECOMPRESSION_ZP_TYPE* decompression_zp, #endif __global OUTPUT_TYPE* output, @@ -118,12 +125,16 @@ KERNEL(fc)( FILTER_VEC_TYPE wei = 0; uint input_offset = out_b * TILE_IN_B_PITCH + INPUT0_OFFSET; +#if COMPRESSED_WEIGHTS_INT4 + uint weights_offset = out_f * (INPUT_ELEMENTS_COUNT / 2); +#else uint weights_offset = out_f * INPUT_ELEMENTS_COUNT; +#endif #if COMPRESSED_WEIGHTS && DECOMPRESSION_SCALE_GROUPS_NUM == 1 - #if DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD == 0 + #if DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % (TILE_OFM * SIMD) == 0 ACCUMULATOR_VEC_TYPE d_scale = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_scale, out_f); - #elif DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD != 0 + #elif DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % (TILE_OFM * SIMD) != 0 ACCUMULATOR_VEC_TYPE d_scale = 0; unroll_for(uint of = 0; of < TILE_OFM; ++of) { uint offset = out_f + of*SIMD + get_sub_group_local_id(); @@ -137,10 +148,10 @@ KERNEL(fc)( ACCUMULATOR_TYPE* d_scales = (ACCUMULATOR_TYPE*)(&d_scale); #endif -#if COMPRESSED_WEIGHTS && DECOMPRESSION_ZP_TERM && DECOMPRESSION_ZP_GROUPS_NUM == 1 - #if DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD == 0 +#if COMPRESSED_WEIGHTS && DECOMPRESSION_ZP_TERM && DECOMPRESSION_ZP_GROUPS_NUM == 1 && !DECOMPRESSION_ZP_SCALAR + #if DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % (TILE_OFM * SIMD) == 0 ACCUMULATOR_VEC_TYPE d_zp = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_zp, out_f); - #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD != 0 + #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % (TILE_OFM * SIMD) != 0 ACCUMULATOR_VEC_TYPE d_zp = 0; unroll_for(uint of = 0; of < TILE_OFM; ++of) { uint offset = out_f + of*SIMD + get_sub_group_local_id(); @@ -187,29 +198,46 @@ KERNEL(fc)( // NOTE: Manually unrolling multiplication loop leads to lower register pressure and allows for bigger block sizes, // but significantly degrades readability and generality of code. // It doesn't also show noticable performance improvement on tested configurations. + #if DECOMPRESSION_SCALE_POST_OP + ACCUMULATOR_VEC_TYPE acc_tmp[TILE_B] = { }; + #endif + unroll_for(uint ki = 0; ki < (TILE_IFM * SIMD) / TILE_K; ++ki) { - wei = TO_FILTER_VEC_TYPE(FILTER_BLOCK_READ(weights, weights_offset)); + #if COMPRESSED_WEIGHTS_INT4 + FILTER_PACKED_VEC_TYPE wei_packed = FILTER_BLOCK_READ(weights, weights_offset); + wei = UNPACK_INT4x2(ACCUMULATOR_TYPE, *((INT4_PACKED_TYPE*)&wei_packed)); + #else + wei = TO_FILTER_VEC_TYPE(FILTER_BLOCK_READ(weights, weights_offset)); + #endif + #if COMPRESSED_WEIGHTS ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); unroll_for(uint kii = 0; kii < TILE_K; ++kii) { unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { const uint w_idx = kii * TILE_OFM + fi; const uint offset_ofm = out_f + fi*SIMD + sglid; - #if DECOMPRESSION_SCALE_GROUPS_NUM > 1 - const uint scale_offset = (offset_ofm % DECOMPRESSION_SCALE_BATCH_NUM) * DECOMPRESSION_SCALE_BATCH_PITCH + - ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; - ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; + #if !DECOMPRESSION_SCALE_POST_OP + // Apply scales before FMA to avoid FP16 overflow in case of INT8 + #if DECOMPRESSION_SCALE_GROUPS_NUM > 1 + const uint scale_offset = (offset_ofm % DECOMPRESSION_SCALE_BATCH_NUM) * DECOMPRESSION_SCALE_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; + ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; + #else + ACCUMULATOR_TYPE ds = d_scales[fi % DECOMPRESSION_SCALE_LENGTH]; + #endif #else - ACCUMULATOR_TYPE ds = d_scales[fi]; + ACCUMULATOR_TYPE ds = ACCUMULATOR_VAL_ONE; #endif #if DECOMPRESSION_ZP_TERM - #if DECOMPRESSION_ZP_GROUPS_NUM > 1 + #if DECOMPRESSION_ZP_SCALAR + ACCUMULATOR_TYPE dzp = DECOMPRESSION_ZP_VALUE; + #elif DECOMPRESSION_ZP_GROUPS_NUM > 1 const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; ACCUMULATOR_TYPE dzp = decompression_zp[zp_offset]; #else - ACCUMULATOR_TYPE dzp = d_zps[fi]; + ACCUMULATOR_TYPE dzp = d_zps[fi % DECOMPRESSION_ZP_LENGTH]; #endif #else ACCUMULATOR_TYPE dzp = ACCUMULATOR_VAL_ZERO; @@ -218,18 +246,38 @@ KERNEL(fc)( } } #endif - weights_offset += TILE_K_OFM * SIMD; + weights_offset += TILE_K_OFM_PACKED * SIMD; unroll_for (uint kii = 0; kii < TILE_K; ++kii) { const uint total_k = ki * TILE_K + kii; unroll_for (uint bi = 0; bi < TILE_B; ++bi) { INPUT0_TYPE in_val = _sub_group_shuffle(((INPUT0_TYPE*)(&in_0[bi]))[total_k / SIMD], total_k % SIMD); unroll_for (uint fi = 0; fi < TILE_OFM; ++fi) { +#if DECOMPRESSION_SCALE_POST_OP + ((ACCUMULATOR_TYPE*)(&acc_tmp[bi]))[fi] += in_val * ((ACCUMULATOR_TYPE*)(&wei))[kii * TILE_OFM + fi]; +#else ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += in_val * ((ACCUMULATOR_TYPE*)(&wei))[kii * TILE_OFM + fi]; +#endif } } } } +#if DECOMPRESSION_SCALE_POST_OP + unroll_for (uint bi = 0; bi < TILE_B; ++bi) { + unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { + const uint offset_ofm = out_f + fi*SIMD + sglid; + + #if DECOMPRESSION_SCALE_GROUPS_NUM > 1 + const uint scale_offset = (offset_ofm % DECOMPRESSION_SCALE_BATCH_NUM) * DECOMPRESSION_SCALE_BATCH_PITCH + + ((ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; + ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; + #else + ACCUMULATOR_TYPE ds = d_scales[fi % DECOMPRESSION_SCALE_LENGTH]; + #endif + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += ((ACCUMULATOR_TYPE*)(&acc_tmp[bi]))[fi] * ds; + } + } +#endif } // ===================================================================================================================================== // Leftovers @@ -246,7 +294,13 @@ KERNEL(fc)( #undef LOAD_IN_0 input_offset += TILE_IFM * SIMD - TILE_IN_B_PITCH * TILE_B; unroll_for(uint ki = 0; ki < CEIL_DIV(LEFTOVER_IFM, TILE_K); ++ki) { - wei = TO_FILTER_VEC_TYPE(FILTER_BLOCK_READ(weights, weights_offset)); + #if COMPRESSED_WEIGHTS_INT4 + FILTER_PACKED_VEC_TYPE wei_packed = FILTER_BLOCK_READ(weights, weights_offset); + wei = UNPACK_INT4x2(ACCUMULATOR_TYPE, *((INT4_PACKED_TYPE*)&wei_packed)); + #else + wei = TO_FILTER_VEC_TYPE(FILTER_BLOCK_READ(weights, weights_offset)); + #endif + #if COMPRESSED_WEIGHTS ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); unroll_for(uint kii = 0; kii < TILE_K; ++kii) { @@ -258,16 +312,18 @@ KERNEL(fc)( ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; #else - ACCUMULATOR_TYPE ds = d_scales[fi]; + ACCUMULATOR_TYPE ds = d_scales[fi % DECOMPRESSION_SCALE_LENGTH]; #endif #if DECOMPRESSION_ZP_TERM - #if DECOMPRESSION_ZP_GROUPS_NUM > 1 + #if DECOMPRESSION_ZP_SCALAR + ACCUMULATOR_TYPE dzp = DECOMPRESSION_ZP_VALUE; + #elif DECOMPRESSION_ZP_GROUPS_NUM > 1 const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; ACCUMULATOR_TYPE dzp = decompression_zp[zp_offset]; #else - ACCUMULATOR_TYPE dzp = d_zps[fi]; + ACCUMULATOR_TYPE dzp = d_zps[fi % DECOMPRESSION_ZP_LENGTH]; #endif #else ACCUMULATOR_TYPE dzp = ACCUMULATOR_VAL_ZERO; @@ -276,7 +332,7 @@ KERNEL(fc)( } } #endif - weights_offset += TILE_K_OFM * SIMD; + weights_offset += TILE_K_OFM_PACKED * SIMD; unroll_for (uint kii = 0; kii < TILE_K; ++kii) { unroll_for (uint fi = 0; fi < TILE_OFM; ++fi) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl index 6374e65c4f5fcc..b83018e8c9bc53 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl @@ -4,6 +4,7 @@ #include "include/batch_headers/fetch_data.cl" #include "include/batch_headers/fetch_weights.cl" +#include "include/batch_headers/int4_utils.cl" KERNEL(fc)( OPTIONAL_SHAPE_INFO_ARG @@ -11,7 +12,7 @@ KERNEL(fc)( #if DECOMPRESSION_SCALE_TERM const __global DECOMPRESSION_SCALE_TYPE* decompression_scale, #endif -#if DECOMPRESSION_ZP_TERM +#if DECOMPRESSION_ZP_TERM && !DECOMPRESSION_ZP_SCALAR const __global DECOMPRESSION_ZP_TYPE* decompression_zp, #endif __global OUTPUT_TYPE* output, @@ -38,8 +39,12 @@ KERNEL(fc)( const uint input0_idx = INPUT0_GET_INDEX(b, ofm, y, x); #if COMPRESSED_WEIGHTS #if DECOMPRESSION_ZP_TERM - const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(oym, y / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); - ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); + #if DECOMPRESSION_ZP_SCALAR + ACCUMULATOR_TYPE zp = DECOMPRESSION_ZP_VALUE; + #else + const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(oym, y / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); + ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); + #endif #else ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; #endif @@ -47,13 +52,19 @@ KERNEL(fc)( DECOMPRESSION_SCALE_TYPE scale = decompression_scale[decomp_offset]; #endif - #if COMPRESSED_WEIGHTS_INT8 const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); + #if COMPRESSED_WEIGHTS_INT8 ACCUMULATOR_TYPE filter_compressed = TO_ACCUMULATOR_TYPE(weights[filter_idx]); ACCUMULATOR_TYPE filter_val = (filter_compressed - zp) * scale; dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); + #elif COMPRESSED_WEIGHTS_INT4 + FILTER_TYPE filter_packed = weights[filter_idx / 2]; + MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, 2) filter_unpacked = UNPACK_INT4x2(ACCUMULATOR_TYPE, *((INT4_PACKED_TYPE*)&filter_packed)); + + ACCUMULATOR_TYPE filter_compressed = ((ACCUMULATOR_TYPE*)(&filter_unpacked))[filter_idx % 2]; + ACCUMULATOR_TYPE filter_val = (filter_compressed - zp) * scale; + dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * filter_val; #else - const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); #endif } @@ -75,8 +86,12 @@ KERNEL(fc)( const uint input0_idx = INPUT0_GET_INDEX(b, ifm, y, x); #if COMPRESSED_WEIGHTS #if DECOMPRESSION_ZP_TERM - const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(ofm, ifm / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); - ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); + #if DECOMPRESSION_ZP_SCALAR + ACCUMULATOR_TYPE zp = DECOMPRESSION_ZP_VALUE; + #else + const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(ofm, ifm / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); + ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); + #endif #else ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; #endif @@ -84,14 +99,19 @@ KERNEL(fc)( DECOMPRESSION_SCALE_TYPE scale = decompression_scale[decomp_offset]; #endif - - #if COMPRESSED_WEIGHTS_INT8 const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); + #if COMPRESSED_WEIGHTS_INT8 FILTER_TYPE filter_compressed = weights[filter_idx]; ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - zp) * scale; dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); + #elif COMPRESSED_WEIGHTS_INT4 + FILTER_TYPE filter_packed = weights[filter_idx / 2]; + MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, 2) filter_unpacked = UNPACK_INT4x2(ACCUMULATOR_TYPE, *((INT4_PACKED_TYPE*)&filter_packed)); + + ACCUMULATOR_TYPE filter_compressed = ((ACCUMULATOR_TYPE*)(&filter_unpacked))[filter_idx % 2]; + ACCUMULATOR_TYPE filter_val = (filter_compressed - zp) * scale; + dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * filter_val; #else - const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); #endif } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl new file mode 100644 index 00000000000000..db786b30c15f4a --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl @@ -0,0 +1,101 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +typedef struct __attribute__ ((packed)) int4x2_t { char s0; } int4x2_t; +typedef struct __attribute__ ((packed)) int4x4_t { int4x2_t s0; int4x2_t s1; } int4x4_t; +typedef struct __attribute__ ((packed)) int4x8_t { int4x2_t s0; int4x2_t s1; int4x2_t s2; int4x2_t s3; } int4x8_t; +typedef struct __attribute__ ((packed)) int4x16_t { int4x2_t s0; int4x2_t s1; int4x2_t s2; int4x2_t s3; int4x2_t s4; int4x2_t s5; int4x2_t s6; int4x2_t s7; } int4x16_t; + +typedef struct __attribute__ ((packed)) uint4x2_t { uchar s0; } uint4x2_t; +typedef struct __attribute__ ((packed)) uint4x4_t { uint4x2_t s0; uint4x2_t s1; } uint4x4_t; +typedef struct __attribute__ ((packed)) uint4x8_t { uint4x2_t s0; uint4x2_t s1; uint4x2_t s2; uint4x2_t s3; } uint4x8_t; +typedef struct __attribute__ ((packed)) uint4x16_t { uint4x2_t s0; uint4x2_t s1; uint4x2_t s2; uint4x2_t s3; uint4x2_t s4; uint4x2_t s5; uint4x2_t s6; uint4x2_t s7; } uint4x16_t; + +inline uchar2 cvt_uint4x2_to_uint8x2(uint4x2_t v) __attribute__((overloadable)) { + const uchar v0 = v.s0 & 0x0F; + const uchar v1 = (v.s0 & 0xF0) >> 4; + return (uchar2)(v0, v1); +} + +inline char2 cvt_int4x2_to_int8x2(int4x2_t v) __attribute__((overloadable)) { + const char s_bit = (v.s0 & convert_char(0x08)); + const char mask = s_bit > 0 ? convert_char(0xF0) : convert_char(0x00); + const char v0 = (v.s0 & convert_char(0x0F)) | mask; + const char v1 = v.s0 >> 4; + return (char2)(v0, v1); +} + +inline half2 unpack_to_half(uint4x2_t v) __attribute__((overloadable)) { + return convert_half2(cvt_uint4x2_to_uint8x2(v)); +} + +inline float2 unpack_to_float(uint4x2_t v) __attribute__((overloadable)) { + return convert_float2(cvt_uint4x2_to_uint8x2(v)); +} + +inline half2 unpack_to_half(int4x2_t v) __attribute__((overloadable)) { + return convert_half2(cvt_int4x2_to_int8x2(v)); +} + +inline float2 unpack_to_float(int4x2_t v) __attribute__((overloadable)) { + return convert_float2(cvt_int4x2_to_int8x2(v)); +} + +inline half4 unpack_to_half(uint4x4_t v) __attribute__((overloadable)) { + half2 f0 = unpack_to_half(v.s0); + half2 f1 = unpack_to_half(v.s1); + return (half4)(f0.s0, f0.s1, f1.s0, f1.s1); +} + +inline float4 unpack_to_float(uint4x4_t v) __attribute__((overloadable)) { + float2 f0 = unpack_to_float(v.s0); + float2 f1 = unpack_to_float(v.s1); + return (float4)(f0.s0, f0.s1, f1.s0, f1.s1); +} + +inline half4 unpack_to_half(int4x4_t v) __attribute__((overloadable)) { + half2 f0 = unpack_to_half(v.s0); + half2 f1 = unpack_to_half(v.s1); + return (half4)(f0.s0, f0.s1, f1.s0, f1.s1); +} + +inline float4 unpack_to_float(int4x4_t v) __attribute__((overloadable)) { + float2 f0 = unpack_to_float(v.s0); + float2 f1 = unpack_to_float(v.s1); + return (float4)(f0.s0, f0.s1, f1.s0, f1.s1); +} + +inline half8 unpack_to_half(uint4x8_t v) __attribute__((overloadable)) { + half2 f0 = unpack_to_half(v.s0); + half2 f1 = unpack_to_half(v.s1); + half2 f2 = unpack_to_half(v.s2); + half2 f3 = unpack_to_half(v.s3); + return (half8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); +} + +inline float8 unpack_to_float(uint4x8_t v) __attribute__((overloadable)) { + float2 f0 = unpack_to_float(v.s0); + float2 f1 = unpack_to_float(v.s1); + float2 f2 = unpack_to_float(v.s2); + float2 f3 = unpack_to_float(v.s3); + return (float8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); +} + +inline half8 unpack_to_half(int4x8_t v) __attribute__((overloadable)) { + half2 f0 = unpack_to_half(v.s0); + half2 f1 = unpack_to_half(v.s1); + half2 f2 = unpack_to_half(v.s2); + half2 f3 = unpack_to_half(v.s3); + return (half8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); +} + +inline float8 unpack_to_float(int4x8_t v) __attribute__((overloadable)) { + float2 f0 = unpack_to_float(v.s0); + float2 f1 = unpack_to_float(v.s1); + float2 f2 = unpack_to_float(v.s2); + float2 f3 = unpack_to_float(v.s3); + return (float8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); +} + +#define UNPACK_INT4x2(target_type, value) CAT(unpack_to_, target_type)(value) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_int4.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_int4.cl new file mode 100644 index 00000000000000..46eb8909277543 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_int4.cl @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/batch_headers/fetch_weights.cl" + +KERNEL(reorder_weights_int4)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output) { +#if defined(INPUT0_LAYOUT_IOYX) && defined(OUTPUT_LAYOUT_OIYX) + const uint out_byte_offset = get_global_id(0); + + const uint offset0 = out_byte_offset * 2 + 0; + const uint offset1 = out_byte_offset * 2 + 1; + + const uint i0 = offset0 % OUTPUT_IFM_NUM; + const uint i1 = offset1 % OUTPUT_IFM_NUM; + + const uint o0 = offset0 / OUTPUT_IFM_NUM; + const uint o1 = offset1 / OUTPUT_IFM_NUM; + + const uint input0_offset = GET_FILTER_INDEX(INPUT0, 0, o0, i0, 0, 0); + const uint input1_offset = GET_FILTER_INDEX(INPUT0, 0, o1, i1, 0, 0); + + const uint input0_idx = input0_offset % 2; + const uint input1_idx = input1_offset % 2; + + INPUT0_TYPE in0 = (input[input0_offset / 2] >> input0_idx*4) & 0x0F; + INPUT0_TYPE in1 = (input[input1_offset / 2] >> input1_idx*4) & 0x0F; + + OUTPUT_TYPE out = in0 | (in1 << 4); + output[out_byte_offset] = out; +#elif defined(OUTPUT_LAYOUT_OS_IYX_OSV32) + const unsigned o = (uint)get_global_id(0); + const unsigned i = (uint)get_global_id(1); + + const unsigned o0 = (o / 16) * 32 + (o % 16); + const unsigned o1 = (o / 16) * 32 + (o % 16) + 16; + + const uint input0_offset = GET_FILTER_INDEX(INPUT0, 0, o0, i, 0, 0); + const uint input1_offset = GET_FILTER_INDEX(INPUT0, 0, o1, i, 0, 0); + + const uint input0_idx = input0_offset % 2; + const uint input1_idx = input1_offset % 2; + + INPUT0_TYPE in0 = (input[input0_offset / 2] >> input0_idx*4) & 0x0F; + INPUT0_TYPE in1 = (input[input1_offset / 2] >> input1_idx*4) & 0x0F; + + INPUT0_TYPE packed_out_channels = in0 | (in1 << 4); + + const uint output_idx = GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, 0, 0, 32 / 2); // Calculate offset as osv16 due to packing + output[output_idx] = packed_out_channels; +#else +#error "reorder_weights_int4: unsupported layouts combination" +#endif +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 1acc0aa89e6af6..6d84aa65f378b8 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -106,6 +106,8 @@ enum class KernelType { enum class Datatype { UNSUPPORTED, BINARY, + UINT4, + INT4, INT8, UINT8, INT16, @@ -127,6 +129,8 @@ enum class WeightsType { F32, INT8, UINT8, + UINT4, + INT4, INT32 }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp index 67c25b89025eb9..ac079ea448711c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp @@ -115,8 +115,10 @@ std::string toCLType(WeightsType wType) { switch (wType) { case WeightsType::BINARY: return GetTypeName(); + case WeightsType::INT4: case WeightsType::INT8: return GetTypeName(); + case WeightsType::UINT4: case WeightsType::UINT8: return GetTypeName(); case WeightsType::F16: @@ -1343,18 +1345,18 @@ JitConstants MakeActivationJitConstants(ActivationFunction activation_function, } JitConstants MakeTypeJitConstants(Datatype dataType, const std::string& macroName) { - std::string type; - std::string max_val; - std::string min_val; - std::string val_one; - std::string val_zero; - std::string to_type; - std::string to_type_sat; - std::string as_type; - std::string max_func; - std::string min_func; - std::string abs_func; - std::string type_size; + std::string type = "undefined"; + std::string max_val = "undefined"; + std::string min_val = "undefined"; + std::string val_one = "undefined"; + std::string val_zero = "undefined"; + std::string to_type = "undefined"; + std::string to_type_sat = "undefined"; + std::string as_type = "undefined"; + std::string max_func = "undefined"; + std::string min_func = "undefined"; + std::string abs_func = "undefined"; + std::string type_size = "undefined"; bool is_fp; switch (dataType) { case Datatype::INT8: @@ -1478,6 +1480,16 @@ JitConstants MakeTypeJitConstants(Datatype dataType, const std::string& macroNam type_size = "2"; is_fp = true; break; + case Datatype::INT4: + type = "char"; + type_size = "0.5f"; + is_fp = false; + break; + case Datatype::UINT4: + type = "uchar"; + type_size = "0.5f"; + is_fp = false; + break; default: type = "float"; max_val = "FLT_MAX"; @@ -1523,6 +1535,10 @@ JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& ma return MakeTypeJitConstants(Datatype::INT8, macroName); case WeightsType::UINT8: return MakeTypeJitConstants(Datatype::UINT8, macroName); + case WeightsType::INT4: + return MakeTypeJitConstants(Datatype::INT4, macroName); + case WeightsType::UINT4: + return MakeTypeJitConstants(Datatype::UINT4, macroName); case WeightsType::BINARY: return MakeTypeJitConstants(Datatype::UINT32, macroName); case WeightsType::INT32: @@ -1533,6 +1549,17 @@ JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& ma return MakeTypeJitConstants(Datatype::UNSUPPORTED, macroName); } +JitConstants make_int4_packed_type_jit_constant(const std::string& macro_name, WeightsType wt, size_t pack_size) { + OPENVINO_ASSERT(pack_size % 2 == 0 && pack_size != 0 && pack_size <= 16); + std::string type_string = ""; + switch (wt) { + case WeightsType::UINT4: type_string = "uint4x"; break; + case WeightsType::INT4: type_string = "int4x"; break; + default: OPENVINO_THROW("[GPU] Unsupported compressed type"); + } + return { MakeJitConstant(macro_name, type_string + std::to_string(pack_size) + "_t") }; +} + JitConstants MakeActivationJitConstants(const base_activation_params& params, Datatype out_dt, const std::string& suffix, diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.h b/src/plugins/intel_gpu/src/kernel_selector/jitter.h index 29dd56efe37647..aa1ec7f7d9db11 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/jitter.h +++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.h @@ -322,7 +322,7 @@ JitConstants MakeConstantLoopUnrollJitConstants(uint32_t loopCount); JitConstants MakeTypeJitConstants(Datatype dataType, const std::string& macroName); JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& macroName); inline JitConstants MakeUnitTypeJitConstants(Datatype dataType) { return MakeTypeJitConstants(dataType, "UNIT"); } - +JitConstants make_int4_packed_type_jit_constant(const std::string& macro_name, WeightsType wt, size_t pack_size); class FusedOpsCodeGenerator { public: diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp index 8c6d2af2fd8f69..193d589daba44d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp @@ -142,6 +142,8 @@ std::string toString(DataLayout l) { std::string toString(Datatype dType) { switch (dType) { case Datatype::BINARY: return "BINARY"; + case Datatype::UINT4: return "UINT4"; + case Datatype::INT4: return "INT4"; case Datatype::INT8: return "INT8"; case Datatype::UINT8: return "UINT8"; case Datatype::INT16: return "INT16"; @@ -160,6 +162,8 @@ std::string toString(WeightsType wType) { case WeightsType::BINARY: return "BINARY"; case WeightsType::F16: return "F16"; case WeightsType::F32: return "F32"; + case WeightsType::UINT4: return "UINT4"; + case WeightsType::INT4: return "INT4"; case WeightsType::INT8: return "INT8"; case WeightsType::UINT8: return "UINT8"; case WeightsType::INT32: return "INT32"; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp index 32d451704fafa4..d7b3002bb6ffab 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp @@ -47,6 +47,12 @@ DeviceFeaturesKey EngineInfo::get_supported_device_features_key() const { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void ParamsKey::EnableInputDataType(Datatype dt) { switch (dt) { + case Datatype::INT4: + key.inputType.val.int4 = 1; + break; + case Datatype::UINT4: + key.inputType.val.uint4 = 1; + break; case Datatype::INT8: key.inputType.val.int8 = 1; break; @@ -86,6 +92,12 @@ void ParamsKey::EnableAllInputDataType() { key.inputType.raw = 0xffffffff; } void ParamsKey::EnableOutputDataType(Datatype dt) { switch (dt) { + case Datatype::INT4: + key.outputType.val.int4 = 1; + break; + case Datatype::UINT4: + key.outputType.val.uint4 = 1; + break; case Datatype::INT8: key.outputType.val.int8 = 1; break; @@ -134,6 +146,12 @@ void ParamsKey::EnableInputWeightsType(WeightsType wt) { case WeightsType::INT8: key.inputWeightsType.val.int8 = 1; break; + case WeightsType::INT4: + key.inputWeightsType.val.int4 = 1; + break; + case WeightsType::UINT4: + key.inputWeightsType.val.uint4 = 1; + break; case WeightsType::BINARY: key.inputWeightsType.val.binary = 1; break; @@ -157,6 +175,12 @@ void ParamsKey::EnableOutputWeightsType(WeightsType wt) { case WeightsType::INT8: key.outputWeightsType.val.int8 = 1; break; + case WeightsType::INT4: + key.outputWeightsType.val.int4 = 1; + break; + case WeightsType::UINT4: + key.outputWeightsType.val.uint4 = 1; + break; case WeightsType::BINARY: key.outputWeightsType.val.binary = 1; break; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h index 604db427f7c4c0..8fb50c1716633d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h @@ -252,6 +252,8 @@ class ParamsKey { typedef union DataTypesKey_t { struct val_t { + uint32_t int4 : 1; + uint32_t uint4 : 1; uint32_t int8 : 1; uint32_t uint8 : 1; uint32_t int16 : 1; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp index a75d35469837f7..2d88dd2f94225f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp @@ -26,6 +26,8 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS", 1)}); if (params.weights.GetDType() == WeightsType::INT8 || params.weights.GetDType() == WeightsType::UINT8) { jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS_INT8", 1)}); + } else if (params.weights.GetDType() == WeightsType::INT4 || params.weights.GetDType() == WeightsType::UINT4) { + jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS_INT4", 1)}); } const size_t scale_groups_num = params.decompression_scale.Feature().v; @@ -35,12 +37,17 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_GROUPS_NUM", scale_groups_num)}); jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_GROUP_SIZE", scale_group_size)}); if (params.has_decompression_zp) { - const size_t zp_groups_num = params.decompression_zero_point.Feature().v; - const size_t zp_group_size = params.weights.IFM().v / params.decompression_zero_point.Feature().v; jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_TERM", 1)}); - jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP", params.decompression_zero_point)}); - jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUPS_NUM", zp_groups_num)}); - jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUP_SIZE", zp_group_size)}); + if (params.scalar_zp) { + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_VALUE", params.zp_value)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_SCALAR", 1)}); + } else { + const size_t zp_groups_num = params.decompression_zero_point.Feature().v; + const size_t zp_group_size = params.weights.IFM().v / params.decompression_zero_point.Feature().v; + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP", params.decompression_zero_point)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUPS_NUM", zp_groups_num)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUP_SIZE", zp_group_size)}); + } } } @@ -119,7 +126,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params ¶ms, int inputs_count = 1; if (newParams.compressed) { inputs_count++; - if (newParams.has_decompression_zp) + if (newParams.has_decompression_zp && !newParams.scalar_zp) inputs_count++; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index c272124627db23..50a3d8649b16a5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -6,6 +6,7 @@ #include #include +#include "common_types.h" static constexpr size_t simd = 16; @@ -38,6 +39,8 @@ ParamsKey FullyConnected_bf_tiled::GetSupportedKey() const { k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); + k.EnableInputWeightsType(WeightsType::UINT4); + k.EnableInputWeightsType(WeightsType::INT4); k.EnableInputWeightsType(WeightsType::F16); k.EnableInputWeightsType(WeightsType::F32); k.EnableInputLayout(DataLayout::bf); @@ -70,6 +73,7 @@ bool FullyConnected_bf_tiled::Validate(const Params& params, const optional_para auto& fc_params = static_cast(params); auto& input = fc_params.inputs[0]; auto& output = fc_params.outputs[0]; + auto& weights = fc_params.weights; // Block reads must be aligned to 4 bytes, for fp16 we can correct for offset misalignment, // but we need to ensure that batch pitch preserves alignment. @@ -103,6 +107,11 @@ bool FullyConnected_bf_tiled::Validate(const Params& params, const optional_para return false; } + auto wt = weights.GetDType(); + if ((wt == WeightsType::UINT4 || wt == WeightsType::INT4) && (weights.IFM().v % 2 != 0 || weights.OFM().v % 2 != 0)) { + return false; + } + return true; } @@ -150,6 +159,11 @@ bool TuneParamsSelector::VerifyTuneParams(const fully_connected_params& params, output_f = params.outputs[0].Y().v; } + if (params.compressed && + (params.weights.GetDType() == WeightsType::INT4 || params.weights.GetDType() == WeightsType::UINT4) && + tparams.tile_ofm != 2) + return false; + auto batch_size = params.is_shape_agnostic ? Align(output_b, tparams.tile_b) : output_b; if (batch_size % (tparams.tile_b * tparams.dispatch_bsv) != 0) return false; @@ -201,7 +215,9 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, while (max_tile_ofm * 2 * simd <= output_f && max_tile_ofm < 4) max_tile_ofm *= 2; - if (params.compressed && params.engineInfo.supports_immad) { + if (params.weights.GetDType() == WeightsType::UINT4 || params.weights.GetDType() == WeightsType::INT4) { + return selector.Default(tune_params(1, 2, 1, 4, 1, 1, EXE_MODE_DEFAULT)); + } else if (params.compressed && params.engineInfo.supports_immad) { return selector.Default(tune_params(1, 1, 1, 4, 1, 1, EXE_MODE_DEFAULT)); } else if (params.is_shape_agnostic) { // Use special tuning params for Gen12HP dGPUs, since these parameters demonstrate higher performance @@ -314,13 +330,24 @@ KernelsPriority FullyConnected_bf_tiled::GetKernelsPriority(const Params& params JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const { JitConstants jit = Parent::GetJitConstants(params, dispatchData); + size_t tile_k_ofm = dispatchData.tile_nk * dispatchData.tile_n; + size_t tile_k_ofm_packed = tile_k_ofm; + if (params.weights.GetDType() == WeightsType::UINT4 || params.weights.GetDType() == WeightsType::INT4) { + tile_k_ofm_packed /= 2; + + jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE", params.weights.GetDType(), tile_k_ofm)); + const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; + if (scale_group_size % simd == 0) + jit.AddConstant(MakeJitConstant("DECOMPRESSION_SCALE_POST_OP", 1)); + } jit.AddConstant(MakeJitConstant("SIMD", simd)); jit.AddConstant(MakeJitConstant("TILE_B", dispatchData.tile_m)); jit.AddConstant(MakeJitConstant("TILE_OFM", dispatchData.tile_n)); jit.AddConstant(MakeJitConstant("TILE_IFM", dispatchData.tile_mk)); jit.AddConstant(MakeJitConstant("TILE_K", dispatchData.tile_nk)); - jit.AddConstant(MakeJitConstant("TILE_K_OFM", dispatchData.tile_nk * dispatchData.tile_n)); + jit.AddConstant(MakeJitConstant("TILE_K_OFM", tile_k_ofm)); + jit.AddConstant(MakeJitConstant("TILE_K_OFM_PACKED", tile_k_ofm_packed)); jit.AddConstant(MakeJitConstant("DISPATCH_BSV", dispatchData.tile_ms)); jit.AddConstant(MakeJitConstant("DISPATCH_FSV", dispatchData.tile_ns)); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp index 9b08e67245258a..d9b1a55e8eba82 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp @@ -4,6 +4,7 @@ #include +#include "common_types.h" #include "fully_connected_kernel_bfyx_ref.h" #include "kernel_selector_utils.h" @@ -22,6 +23,8 @@ ParamsKey FullyConnected_bfyx_Ref::GetSupportedKey() const { k.EnableInputWeightsType(WeightsType::F32); k.EnableInputWeightsType(WeightsType::UINT8); k.EnableInputWeightsType(WeightsType::INT8); + k.EnableInputWeightsType(WeightsType::UINT4); + k.EnableInputWeightsType(WeightsType::INT4); k.EnableAllInputLayout(); k.EnableDifferentInputWeightsTypes(); k.EnableDifferentTypes(); @@ -70,6 +73,11 @@ JitConstants FullyConnected_bfyx_Ref::GetJitConstants(const fully_connected_para jit.Merge(MakeTypeJitConstants(accumulator_dt, "ACCUMULATOR")); jit.Merge(MakeActivationJitConstants(params.activations, activation_dt, "_TYPED")); + auto wt = params.weights.GetDType(); + if (wt == WeightsType::UINT4 || wt == WeightsType::INT4) { + jit.Merge(make_int4_packed_type_jit_constant("INT4_PACKED_TYPE", wt, 2)); + } + if (!params.fused_ops.empty()) { std::vector idx_order = { "b", "ofm", "0", "0" }; if (params.outputs[0].GetLayout() == DataLayout::bfyx) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.cpp new file mode 100644 index 00000000000000..d443e2888c09f3 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reorder_weights_int4.h" +#include "kernel_selector_common.h" +#include "kernel_selector_params.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { + +ParamsKey ReorderWeightsKernelInt4::GetSupportedKey() const { + ParamsKey k; + k.EnableInputWeightsType(WeightsType::INT4); + k.EnableInputWeightsType(WeightsType::UINT4); + k.EnableOutputWeightsType(WeightsType::UINT4); + k.EnableOutputWeightsType(WeightsType::INT4); + k.EnableInputWeightsLayout(WeightsLayout::oiyx); + k.EnableInputWeightsLayout(WeightsLayout::ioyx); + k.EnableOutputWeightsLayout(WeightsLayout::os_iyx_osv32); + k.EnableOutputWeightsLayout(WeightsLayout::oiyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + return k; +} + +KernelsData ReorderWeightsKernelInt4::GetKernelsData(const Params& params, const optional_params& options) const { + const reorder_weights_params& orgParams = static_cast(params); + return GetCommonKernelsData(orgParams, options); +} + +ReorderWeightsKernelInt4::DispatchData ReorderWeightsKernelInt4::SetDefault(const reorder_weights_params& params) const { + DispatchData dispatchData; + + const auto& output = params.output; + + // Divide one of the dimensions by 2 to save with byte granularity + if (output.GetLayout() == WeightsLayout::os_iyx_osv32) { + dispatchData.gws = { Align(output.OFM().v, 32) / 2, output.IFM().v, 1 }; + } else { + dispatchData.gws = { CeilDiv(output.LogicalSize(), 2), 1, 1 }; + } + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + + return dispatchData; +} + +bool ReorderWeightsKernelInt4::Validate(const Params& params, const optional_params& /*options*/) const { + const auto& p = static_cast(params); + const auto& input = p.input; + const auto& output = p.output; + + if (input.LogicalSize() != input.OFM().v * input.IFM().v || + output.LogicalSize() != output.OFM().v * output.IFM().v) { + return false; + } + + bool supported_case = input.GetLayout() == WeightsLayout::oiyx && output.GetLayout() == WeightsLayout::os_iyx_osv32; + supported_case |= input.GetLayout() == WeightsLayout::ioyx && output.GetLayout() == WeightsLayout::oiyx; + supported_case |= input.GetLayout() == WeightsLayout::ioyx && output.GetLayout() == WeightsLayout::os_iyx_osv32; + + return supported_case; +} + +KernelsPriority ReorderWeightsKernelInt4::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { + return DONT_USE_IF_HAVE_SOMETHING_ELSE; +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.h new file mode 100644 index 00000000000000..d98fc9adee6351 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_int4.h @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "reorder_kernel_base.h" + +namespace kernel_selector { +class ReorderWeightsKernelInt4 : public ReorderKernelBase { +public: + ReorderWeightsKernelInt4() : ReorderKernelBase("reorder_weights_int4") {} + virtual ~ReorderWeightsKernelInt4() {} + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + DispatchData SetDefault(const reorder_weights_params& arg) const override; + +protected: + bool Validate(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp index 3006063045f015..fbe010f111bb76 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp @@ -10,6 +10,7 @@ #include "reorder_weights_image_winograd_6x3_kernel.h" #include "reorder_weights_binary_kernel.h" #include "reorder_weights_opt.h" +#include "reorder_weights_int4.h" namespace kernel_selector { @@ -21,6 +22,7 @@ ReorderWeightsKernelSelector::ReorderWeightsKernelSelector() { Attach(); Attach(); Attach(); + Attach(); } KernelsData ReorderWeightsKernelSelector::GetBestKernels(const Params& params, const optional_params& options) const { diff --git a/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h b/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h index ad1c03cb8a7298..236c1afeff3701 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h @@ -19,6 +19,8 @@ struct weight_bias_params : public base_params { bool compressed = false; bool has_decompression_zp = false; + bool scalar_zp = false; + float zp_value = 0.0f; DataTensor decompression_scale; DataTensor decompression_zero_point; diff --git a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp index 2c2d42b842d696..8a628809266d43 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp @@ -7,6 +7,7 @@ #include "intel_gpu/op/fully_connected.hpp" #include "intel_gpu/op/fully_connected_compressed.hpp" +#include "openvino/op/constant.hpp" #include "intel_gpu/primitives/fully_connected.hpp" #include "intel_gpu/primitives/reshape.hpp" @@ -34,17 +35,30 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share auto scale_name = inputs[2].pid; auto zp_name = inputs.size() == 4 ? inputs[3].pid : ""; + float zp_value = 0.0f; + bool has_scalar_zp = false; + if (op->get_input_size() == 4) { + auto zp_const = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(3)); + if (zp_const && ov::shape_size(zp_const->get_output_shape(0)) == 1) { + has_scalar_zp = true; + zp_value = zp_const->cast_vector()[0]; + } + } auto fc = cldnn::fully_connected(primitive_name, cldnn::input_info(input_name), weights_name, "", scale_name, - zp_name, + has_scalar_zp ? "" : zp_name, cldnn::element_type_to_data_type(op->get_output_element_type(0)), cldnn::padding(), op->get_input_partial_shape(0).size(), op->get_input_partial_shape(1).size()); + if (has_scalar_zp) { + fc.decompression_zero_point_scalar = zp_value; + } + p.add_primitive(*op, fc); } diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 388269ddbb424d..60d2077495ffb9 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -125,7 +125,7 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name(); ov::pass::Serialize(path_base + ".xml", path_base + ".bin").run_on_model(cloned_model); - ov::pass::VisualizeTree(path_base + ".dot").run_on_model(cloned_model); + ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } transform_model(cloned_model, config); @@ -146,7 +146,7 @@ std::shared_ptr Plugin::clone_and_transform_model(const std::shared_p GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" + "transformed_func"; ov::pass::Serialize(path_base + ".xml", path_base + ".bin").run_on_model(cloned_model); - ov::pass::VisualizeTree(path_base + "_transformed.dot").run_on_model(cloned_model); + ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); } return cloned_model; } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index 0ff0e1fd0bf258..c4ae333f207f60 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -28,7 +28,9 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon auto compressed_constant = [](const ov::Output& output) { return (output.get_element_type() == ov::element::u8 || - output.get_element_type() == ov::element::i8) && + output.get_element_type() == ov::element::i8 || + output.get_element_type() == ov::element::u4 || + output.get_element_type() == ov::element::i4) && output.get_target_inputs().size() == 1; }; @@ -101,6 +103,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon std::shared_ptr fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr()); std::shared_ptr fc_input_scale = scale; std::shared_ptr fc_input_zp = optional_zero_point; + std::vector> result_nodes = {}; if (has_transpose) { const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); std::shared_ptr transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); @@ -112,9 +115,13 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon } fc_input_b = transpose->clone_with_new_inputs({ fc_input_b->output(0), transpose_const }); + result_nodes.push_back(fc_input_b); fc_input_scale = transpose->clone_with_new_inputs({ scale->output(0), transpose_const }); - if (with_zero_point) + result_nodes.push_back(fc_input_scale); + if (with_zero_point && ov::shape_size(optional_zero_point->output(0).get_shape()) > 1) { fc_input_zp = transpose->clone_with_new_inputs({ optional_zero_point->output(0), transpose_const }); + result_nodes.push_back(fc_input_zp); + } } std::shared_ptr new_fc = nullptr; @@ -131,8 +138,9 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon fc->get_output_type()); } + result_nodes.push_back(new_fc); new_fc->set_friendly_name(fc->get_friendly_name()); - ov::copy_runtime_info(m.get_matched_nodes(), new_fc); + ov::copy_runtime_info(m.get_matched_nodes(), result_nodes); ov::replace_node(fc, new_fc); return true; }; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index ac567cd998f9a2..99623a72c32811 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -233,7 +233,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return !is_type(next_node); }); - manager.register_pass(ov::element::TypeVector{ov::element::u8}, true); + manager.register_pass(ov::element::TypeVector{ov::element::u8, ov::element::u4, ov::element::i4}, true); const bool keep_precision_sensitive_in_fp32_1 = true; const bool convert_input_output_precision = false; @@ -635,7 +635,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return num_iter != 1; return num_iter >= 16; }); - manager.register_pass(true); manager.run_passes(func); } @@ -647,6 +646,10 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); + // This is supposed to be the last pass to ensure that we don't have name collisions until + // GPU plugin stops using friendly names for program creation + manager.register_pass(true); + manager.run_passes(func); } } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp index 75bdb9f0ec71a7..1830e07cda9a8a 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp @@ -14,10 +14,10 @@ using namespace ov::test; namespace SubgraphTestsDefinitions { /* - * Subtract_const(U8/NF4/U4) - * / - * Weights(U8/NF4/U4) Convert(F32) - * | / + * Subtract_const(U8/NF4/U4/I4) + * / + * Weights(U8/NF4/U4/I4) Convert(F32) + * | / * Convert(F32) Reshape(optional) * \ / Multiply_const(F32) * Subtract(optional) / @@ -50,6 +50,7 @@ using MatmulWeightsDecompressionParams = std::tuple>; // additional config class MatmulWeightsDecompression : public testing::WithParamInterface, public SubgraphBaseTest { @@ -61,6 +62,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface additional_config; std::tie(shape_params, @@ -69,6 +71,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(data_precision, data_shape)}; const auto weights_subgraph = init_compressed_weights_subgraph(weights_shape, group_size, @@ -106,7 +111,8 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(params[0], weights_subgraph); return std::make_shared(NodeVector{mat_mul}, params, "MatmulWeightsDecompression"); @@ -118,7 +124,8 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(shift_tensor.data())[0] = 0x88; + } auto shift_const = std::make_shared(shift_tensor); std::shared_ptr shift_convert = std::make_shared(shift_const, data_precision); - if (reshape_on_decompression_constant) { + if (reshape_on_decompression_constant && !per_tensor_zp) { auto shift_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); auto shift_reshape = std::make_shared(shift_convert, shift_reshape_const, false); shift_convert = shift_reshape; @@ -179,7 +190,13 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_convert, shift_convert); } - auto scale_tensor = ov::test::utils::create_and_fill_tensor(data_precision, scaleshift_const_shape, 1, -0.5, 10000); + auto scale_tensor = ov::test::utils::create_and_fill_tensor(data_precision, scaleshift_const_shape, 1, -0.5, 30000); + for (size_t i = 0; i < scale_tensor.get_size(); i++) { + if (data_precision == ov::element::f16) + scale_tensor.data()[i] /= ov::float16(16.f); + else if (data_precision == ov::element::f32) + scale_tensor.data()[i] /= 16.f; + } std::shared_ptr scale_const = std::make_shared(scale_tensor); if (reshape_on_decompression_constant) { auto scale_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); @@ -214,6 +231,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface additional_config; std::tie(shape_params, @@ -222,6 +240,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface 200) so fp16 representation & math error is larger than default threshold - if (weights_input_channels > 2048) { - abs_threshold = 4.0f; - } else { - abs_threshold = 1.0f; - } + abs_threshold = 1.0f; + } else { + abs_threshold = 1e-4f; } } @@ -285,12 +300,12 @@ TEST_P(MatmulWeightsDecompression, CompareWithRefs) { namespace { const std::vector activations_precisions = {ov::element::f32, ov::element::f16}; -const std::vector weights_precisions = {ov::element::u8}; +const std::vector weights_precisions = {ov::element::u8, ov::element::u4, ov::element::i4}; +const std::vector transpose_weights = {true, false}; const std::vector input_shapes_basic = { {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}}, {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, {{{}, {{1, 4, 16}}}, {1, 16, 32}}, - {{{}, {{10, 40, 496}}}, {1, 496, 240}}, {{{}, {{1, 4, 48}}}, {48, 256}}, {{{}, {{11, 339, 377}}}, {377, 335}} }; @@ -300,9 +315,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, ::testing::Combine(::testing::ValuesIn(input_shapes_basic), ::testing::ValuesIn(weights_precisions), ::testing::ValuesIn(activations_precisions), + ::testing::ValuesIn(transpose_weights), ::testing::Values(true), ::testing::Values(true), - ::testing::Values(true), + ::testing::Values(false), ::testing::Values(std::map())), MatmulWeightsDecompression::get_test_case_name); @@ -310,6 +326,7 @@ const std::vector input_shapes_corner_cases_basic = { {{{-1, -1, -1}, {{1, 4, 16}}}, {1, 16, 32}}, {{{-1, -1, -1}, {{1, 4, 16}}}, {16, 32}}, {{{-1, -1, 16}, {{1, 4, 16}}}, {16, 32}, 4}, + {{{-1, 16}, {{4, 16}}}, {16, 32}, 4}, }; const std::vector input_shapes_corner_cases_big = { {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {1, 480, 256}}, @@ -318,9 +335,9 @@ const std::vector input_shapes_corner_cases_big = { {{{-1, 4096}, {{1, 4096}}}, {4096, 4096}, 128}, }; -const std::vector transpose_weights = {true, false}; const std::vector add_decompression_sub = {true, false}; const std::vector reshape_on_decompression = {true, false}; +const std::vector per_tensor_zp = {true, false}; INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_basic, MatmulWeightsDecompression, @@ -330,6 +347,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_basic, ::testing::ValuesIn(transpose_weights), ::testing::ValuesIn(add_decompression_sub), ::testing::ValuesIn(reshape_on_decompression), + ::testing::ValuesIn(per_tensor_zp), ::testing::Values(std::map{})), MatmulWeightsDecompression::get_test_case_name); @@ -341,6 +359,7 @@ INSTANTIATE_TEST_SUITE_P(MatMulCompressedWeights_corner_cases_big, ::testing::ValuesIn(transpose_weights), ::testing::ValuesIn(add_decompression_sub), ::testing::ValuesIn(reshape_on_decompression), + ::testing::ValuesIn(per_tensor_zp), ::testing::Values(std::map{})), MatmulWeightsDecompression::get_test_case_name); } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp index 68cfc54237737b..17447be4266141 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp @@ -71,11 +71,11 @@ class check_hash_value: public ::testing::Test { const auto primitive_hash = primitve->hash(); const auto params_hash = primitve->type->get_fake_aligned_params(*prim_inst->get_impl_params()).hash(); if (!engine.get_device_info().supports_immad) { - ASSERT_EQ(primitive_hash, 6924775129729406941UL); - ASSERT_EQ(params_hash, 8142839956977133460UL); + ASSERT_EQ(primitive_hash, 14259723886449306729UL); + ASSERT_EQ(params_hash, 1637150664489130388UL); } else { - ASSERT_EQ(primitive_hash, 6924775129729406941UL); - ASSERT_EQ(params_hash, 9266224209991282259UL); + ASSERT_EQ(primitive_hash, 14259723886449306729UL); + ASSERT_EQ(params_hash, 6343702278017463925UL); } } diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp new file mode 100644 index 00000000000000..bfed37f571d4ed --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp @@ -0,0 +1,237 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_test_utils.hpp" + +#include "openvino/core/model.hpp" +#include "openvino/pass/manager.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/subtract.hpp" +#include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/fully_connected_compressed.hpp" + +#include "plugin/transformations/convert_fc_to_compressed.hpp" + +#include + +using namespace testing; +using namespace ov::intel_gpu; + +namespace ov { +namespace test { +namespace intel_gpu { + +TEST_F(TransformationTestsF, ConvertFCToCompressed1) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); + auto scale = std::make_shared(convert, scale_const); + auto fc = std::make_shared(input1, scale); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); + auto fc_compressed = std::make_shared(input1, weights_const, scale_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed2) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto fc = std::make_shared(input1, scale); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); + auto fc_compressed = std::make_shared(input1, weights_const, scale_const, zp_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed3) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 4, 4 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4, 1 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4, 1 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); + auto reshape = std::make_shared(scale, reshape_const, false); + auto fc = std::make_shared(input1, reshape); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); + auto fc_compressed = std::make_shared(input1, weights_const, scale_const, zp_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed4) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 32, 4, 4 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1, 1 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4, 1 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); + auto reshape = std::make_shared(scale, reshape_const, false); + auto fc = std::make_shared(input1, reshape); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 32, 16 }, { 1 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); + auto fc_compressed = std::make_shared(input1, weights_const, scale_const, zp_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed5) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1, 1 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); + auto reshape = std::make_shared(scale, reshape_const, false); + auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose = std::make_shared(reshape, transpose_const); + auto fc = std::make_shared(input1, transpose); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); + auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); + auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); + auto fc_compressed = std::make_shared(input1, transpose_weights, transpose_scale, zp_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed6) { + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f32); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); + auto reshape = std::make_shared(scale, reshape_const, false); + auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose = std::make_shared(reshape, transpose_const); + auto fc = std::make_shared(input1, transpose); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); + auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); + auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); + auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); + auto fc_compressed = std::make_shared(input1, transpose_weights, transpose_scale, transpose_zp); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed7) { + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f16); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); + auto reshape = std::make_shared(scale, reshape_const, false); + auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose = std::make_shared(reshape, transpose_const); + auto fc = std::make_shared(input1, transpose); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); + auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); + auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); + auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); + auto fc_compressed = std::make_shared(input1, transpose_weights, transpose_scale, transpose_zp); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +} // namespace intel_gpu +} // namespace test +} // namespace ov From 307176e5c611618ae1d5659d5566e5f55a1be909 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 25 Oct 2023 12:46:47 +0400 Subject: [PATCH 053/275] [GPU] Fixed surfaces shape in create_tensor_nv12 helpers (#20539) --- src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp | 4 ++-- .../include/openvino/runtime/intel_gpu/ocl/ocl.hpp | 4 ++-- src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp | 4 ++-- .../remote_blob_tests/gpu_remote_tensor_tests.cpp | 6 ++++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp b/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp index 70caec5ec389f9..be868c4e7fa824 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp @@ -161,10 +161,10 @@ class D3DContext : public ClContext { AnyMap tensor_params = {{ov::intel_gpu::shared_mem_type.name(), ov::intel_gpu::SharedMemType::VA_SURFACE}, {ov::intel_gpu::dev_object_handle.name(), static_cast(nv12_surf)}, {ov::intel_gpu::va_plane.name(), uint32_t(0)}}; - auto y_tensor = create_tensor(element::u8, {1, 1, height, width}, tensor_params); + auto y_tensor = create_tensor(element::u8, {1, height, width, 1}, tensor_params); tensor_params[ov::intel_gpu::mem_handle.name()] = static_cast(nv12_surf); tensor_params[ov::intel_gpu::va_plane.name()] = uint32_t(1); - auto uv_tensor = create_tensor(element::u8, {1, 2, height / 2, width / 2}, tensor_params); + auto uv_tensor = create_tensor(element::u8, {1, height / 2, width / 2, 2}, tensor_params); return std::make_pair(y_tensor.as(), uv_tensor.as()); } diff --git a/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp b/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp index f35f7531a9a128..6730c9cd8da0e3 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp @@ -264,9 +264,9 @@ class ClContext : public RemoteContext { AnyMap tensor_params = { {ov::intel_gpu::shared_mem_type.name(), ov::intel_gpu::SharedMemType::OCL_IMAGE2D}, {ov::intel_gpu::mem_handle.name(), static_cast(nv12_image_plane_y.get())}}; - auto y_tensor = create_tensor(element::u8, {1, 1, height, width}, tensor_params); + auto y_tensor = create_tensor(element::u8, {1, height, width, 1}, tensor_params); tensor_params[ov::intel_gpu::mem_handle.name()] = static_cast(nv12_image_plane_uv.get()); - auto uv_tensor = create_tensor(element::u8, {1, 2, height / 2, width / 2}, tensor_params); + auto uv_tensor = create_tensor(element::u8, {1, height / 2, width / 2, 2}, tensor_params); return std::make_pair(y_tensor.as(), uv_tensor.as()); } diff --git a/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp b/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp index 402abe223bca09..66ed166ff78f08 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp @@ -125,9 +125,9 @@ class VAContext : public ClContext { AnyMap tensor_params = {{ov::intel_gpu::shared_mem_type.name(), ov::intel_gpu::SharedMemType::VA_SURFACE}, {ov::intel_gpu::dev_object_handle.name(), nv12_surf}, {ov::intel_gpu::va_plane.name(), uint32_t(0)}}; - auto y_tensor = create_tensor(element::u8, {1, 1, height, width}, tensor_params); + auto y_tensor = create_tensor(element::u8, {1, height, width, 1}, tensor_params); tensor_params[ov::intel_gpu::va_plane.name()] = uint32_t(1); - auto uv_tensor = create_tensor(element::u8, {1, 2, height / 2, width / 2}, tensor_params); + auto uv_tensor = create_tensor(element::u8, {1, height / 2, width / 2, 2}, tensor_params); return std::make_pair(y_tensor.as(), uv_tensor.as()); } diff --git a/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp b/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp index f3bffc6e6cebde..f4b16858551ea4 100644 --- a/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp @@ -1040,8 +1040,10 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_ConvertTranspose) { cl::Image2D img_y = cl::Image2D(nv12_image_plane_y); cl::Image2D img_uv = cl::Image2D(nv12_image_plane_uv); - auto tensor_remote_y = cldnn_context.create_tensor(param_input_y->get_element_type(), fake_image_data_y.get_shape(), img_y); - auto tensor_remote_uv = cldnn_context.create_tensor(param_input_uv->get_element_type(), fake_image_data_uv.get_shape(), img_uv); + auto nv12 = cldnn_context.create_tensor_nv12(img_y, img_uv); + + auto tensor_remote_y = nv12.first; + auto tensor_remote_uv = nv12.second; inf_req_remote.set_tensor(*param_input_y->output(0).get_tensor().get_names().begin(), tensor_remote_y); inf_req_remote.set_tensor(*param_input_uv->output(0).get_tensor().get_names().begin(), tensor_remote_uv); From 361011c75eac7887729630345c57d6386328c92c Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Wed, 25 Oct 2023 14:18:43 +0530 Subject: [PATCH 054/275] fix coverity scan issue (#20678) --- src/inference/src/dev/threading/cpu_streams_executor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp index 691a3951615460..8068359ad935ce 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp @@ -372,7 +372,7 @@ struct CPUStreamsExecutor::Impl { public: CustomThreadLocal(std::function()> callback_construct, Impl* impl) - : ThreadLocal>(callback_construct), + : ThreadLocal>(std::move(callback_construct)), _impl(impl) {} std::shared_ptr local() { // maybe there are two CPUStreamsExecutors in the same thread. From 68538b2b3f57e3458e41e11521720630e2c79447 Mon Sep 17 00:00:00 2001 From: "( Nechiforel David-Samuel ) NsdHSO" <37635083+NsdHSO@users.noreply.github.com> Date: Wed, 25 Oct 2023 11:50:53 +0300 Subject: [PATCH 055/275] =?UTF-8?q?docs=20=F0=9F=93=9D=20:=20add=20the=20t?= =?UTF-8?q?ake=20issue=20workflow=20[19006]=20(#19027)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs 📝 : add the take issue workflow * chore: add the latest tag from action Co-authored-by: Przemyslaw Wysocki * Update .github/workflows/triage.yml Co-authored-by: Przemyslaw Wysocki * docs(readme): more descriptive message Co-authored-by: Przemyslaw Wysocki * docs(triage): more descriptive message in action Co-authored-by: Przemyslaw Wysocki * chore: .github/workflows/triage.yml Co-authored-by: Andrey Kashchikhin * chore:🌻 rename the file and add new secret --------- Co-authored-by: Przemyslaw Wysocki Co-authored-by: Andrey Kashchikhin --- .github/workflows/assign_issue.yml | 23 +++++++++++++++++++++++ README.md | 2 ++ 2 files changed, 25 insertions(+) create mode 100644 .github/workflows/assign_issue.yml diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml new file mode 100644 index 00000000000000..236705e070bf97 --- /dev/null +++ b/.github/workflows/assign_issue.yml @@ -0,0 +1,23 @@ +name: Take Issue + +on: + issue_comment: + types: + - created + - edited + +jobs: + take-issue: + name: Take issue + runs-on: ubuntu-latest + permissions: + issues: write + timeout-minutes: 10 + steps: + - name: take an issue + uses: bdougie/take-action@v1.6.1 + with: + message: Thank you for looking into this issue! Please let us know if you have any questions or require any help. + issueCurrentlyAssignedMessage: Thanks for being interested in this issue. It looks like this ticket is already assigned to a contributor. Please communicate with the assigned contributor to confirm the status of the issue. + trigger: .take + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index 489ef7803ccd80..ca6045fbd8322d 100644 --- a/README.md +++ b/README.md @@ -177,6 +177,8 @@ See [How to build OpenVINO](./docs/dev/build.md) to get more information about t See [Contributions Welcome](https://github.com/openvinotoolkit/openvino/issues/17502) for good first issues. See [CONTRIBUTING](./CONTRIBUTING.md) for contribution details. Thank you! +## Take the issue +If you wish to be assigned to an issue please add a comment with `.take` command. ## Get a support From f2b26962fa7938fec7e1d48c42ff5841965f2ae8 Mon Sep 17 00:00:00 2001 From: Evgenya Nugmanova Date: Wed, 25 Oct 2023 13:04:04 +0400 Subject: [PATCH 056/275] De-Reshape MatMul (#20396) * De-Reshape MatMul * Fixed includes * Comments resolved * Style * comment adressed --- .../dereshape_matmul.hpp | 66 +++ .../symbolic_optimizations.hpp | 27 +- .../symbolic_transformations/utils.hpp | 13 +- .../dereshape_matmul.cpp | 336 +++++++++++++ .../symbolic_optimizations.cpp | 91 +++- .../symbolic_transformations/utils.cpp | 22 +- .../dereshape_matmul.cpp | 454 ++++++++++++++++++ 7 files changed, 987 insertions(+), 22 deletions(-) create mode 100644 src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp create mode 100644 src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp create mode 100644 src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp diff --git a/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp b/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp new file mode 100644 index 00000000000000..6df2e406ee8154 --- /dev/null +++ b/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp @@ -0,0 +1,66 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { +class TRANSFORMATIONS_API DeReshapeMatMul; +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief Transformation uses symbol / label information to optimize out Reshape operations surrounding MatMul. + * It checks that surrounding Reshapes are only manipulating with batch dimensions of tensor in a do-undo kind of way. + * + * Example: + * Before: + * [A,B,C,D] -> Reshape -> [A*B,C,D] + * MatMul [A*B,C,E] -> Reshape -> [A,B,C,E] + * [A,B,D,E] -> Reshape -> [A*B,D,E] + * + * After: + * [A,B,C,D] -> + * MatMul -> [A,B,C,E] + * [A,B,D,E] -> + * + * Transformation allows slightly different variations of the pattern on inputs of MatMul. + * - Simplest pattern contains only Reshape operation on MatMul input: + * Reshape -> MatMul + * + * - The next acceptable variation is Concat of two inputs on MatMul input: + * Reshape -[-> Concat -]-> MatMul + * This variation would be transformed with realignment of the other input of Concat and the other outputs of + * Concat with the help of Reshape operations + * + * - The most complex variation on the MatMul input pattern is with Binary Elementwise Operation with scalar second + * input: Reshape -[-> Concat -]-[-> BEA (scalar) -]-> MatMul + * + * Additionally, transformation supports variation of the pattern on output of MatMul. It allows for + * Binary Elementwise Arithmetic operation without second input scalar restriction. + * MatMul -[-> BEA -]-> Reshape + * this pattern variation is only applicable for the case when input reshapes are 4D -> 3D and output reshape is 3D -> + * 4D. Additionally, shape labels on output of MatMul should be equal to the input shape labels of the last Reshape, + * meaning that this Binary Elementwise Arithmetic doesn't perform any broadcasting of input coming from MatMul -- only + * other input may be broadcasted to the MatMul input of this BEA. This effect (equality of MatMul output shape labels + * and output shape of BEA) is being handled by LabelResolvingThroughSelect transformation in the particular models + * that this variation targets. + * + * Full pattern this transformation searches for: + * -> Reshape -[-> Concat -]-[-> BEA (scalar) -]-> + * MatMul -[-> BEA -]-> Reshape -> + * -> Reshape -[-> Concat -]-[-> BEA (scalar) -]-> + * + * NOTE: input branches could be (and in observed model cases are) asymmetrical, meaning that the presence of Concat + * on one input of MatMul doesn't require the other input to also have Concat + */ +class ov::pass::DeReshapeMatMul : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("DeReshapeMatMul", "0"); + DeReshapeMatMul(); +}; diff --git a/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp b/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp index 1cf3cf9577dc78..35b90f942dfeb1 100644 --- a/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp +++ b/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp @@ -4,16 +4,17 @@ #pragma once -#include -#include -#include -#include -#include +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/manager.hpp" +#include "openvino/pass/pass.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "transformations_visibility.hpp" namespace ov { namespace pass { class TRANSFORMATIONS_API SymbolicOptimizations; class TRANSFORMATIONS_API SymbolicPropagation; +class TRANSFORMATIONS_API LabelResolvingThroughSelect; } // namespace pass } // namespace ov @@ -48,3 +49,19 @@ class ov::pass::SymbolicPropagation : public ov::pass::ModelPass { private: std::shared_ptr m_te; }; + +/** + * @ingroup ie_transformation_common_api + * @brief Transformation requires equal labels on one input of Add and output of last Reshape in the pattern: + * -> Add -> Reshape -[then or else input]-> Select -> Softmax -> Reshape -> + * + * If shape labels onn mentioned tensors are equal we proved that no broadcasting of this input was done for Add and + * for Select. Therefore, we can put the same labels on the output of Add and Select. This transformation helps + * propagate labels and will not be needed if we would use information on equality of products of input and output + * dimensions of Reshape operations + */ +class ov::pass::LabelResolvingThroughSelect : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("LabelResolvingThroughSelect", "0"); + LabelResolvingThroughSelect(); +}; \ No newline at end of file diff --git a/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp b/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp index 2f3d84dfe825ff..9d2ecdda82d582 100644 --- a/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp +++ b/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp @@ -4,12 +4,11 @@ #pragma once -#include - #include "openvino/core/descriptor/tensor.hpp" #include "openvino/core/dimension.hpp" #include "openvino/core/partial_shape.hpp" #include "openvino/core/type/element_type.hpp" +#include "transformations_visibility.hpp" namespace ov { namespace symbol { @@ -38,6 +37,16 @@ TRANSFORMATIONS_API bool get_labels(const ov::Output& output, ov::Tens /// /// \return true if labels are unique and equal between lhs and rhs else false TRANSFORMATIONS_API bool are_unique_and_equal_labels(const ov::TensorLabel& lhs, const ov::TensorLabel& rhs); + +/// \brief Compares dimensions: if dimensions are static compares values of dimensions, if dimensions are dynamic +/// compares their respective labels using TableOfEquivalence +/// +/// \param lhs Dimension object to compare +/// \param rhs Dimension object to compare +/// +/// \return true if static dimensions are equal and dynamic dimensions have equal labels else false +TRANSFORMATIONS_API bool dims_are_equal(const ov::Dimension& lhs, const ov::Dimension& rhs); + } // namespace util } // namespace symbol } // namespace ov diff --git a/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp b/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp new file mode 100644 index 00000000000000..2c7ee44c6328d9 --- /dev/null +++ b/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp @@ -0,0 +1,336 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/symbolic_transformations/dereshape_matmul.hpp" + +#include "itt.hpp" +#include "openvino/core/dimension_tracker.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/util/binary_elementwise_arithmetic.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/symbolic_transformations/utils.hpp" +#include "transformations/utils/utils.hpp" + +using namespace ov::symbol::util; + +namespace { +bool concat_predicate(ov::Output output) { + auto output_pshape = output.get_partial_shape(); + if (output_pshape.rank().is_dynamic() || output_pshape.size() <= 2) + return false; + const auto& concat = ov::as_type_ptr(output.get_node_shared_ptr()); + if (!concat) + return false; + return concat->get_concatenation_axis() >= output_pshape.rank().get_length() - 2; +} + +bool last_two_dims_are_equal(const ov::PartialShape& lhs, const ov::PartialShape& rhs) { + if (lhs.rank().is_dynamic() || lhs.size() < 2) + return false; + if (rhs.rank().is_dynamic() || rhs.size() < 2) + return false; + auto lhs_dim = lhs.rbegin(), rhs_dim = rhs.rbegin(); + for (int i = 0; i < 2; ++i, lhs_dim++, rhs_dim++) + if (!dims_are_equal(*lhs_dim, *rhs_dim)) + return false; + return true; +} + +bool reshape_keeps_last_two_dims(const std::shared_ptr& op) { + return last_two_dims_are_equal(op->get_input_partial_shape(0), op->get_output_partial_shape(0)); +} + +bool batches_are_equal(const ov::PartialShape& lhs, const ov::PartialShape& rhs, bool one_dim_can_differ = false) { + if (lhs.rank().is_dynamic() || rhs.rank().is_dynamic() || lhs.size() != rhs.size()) + return false; + size_t num_dims_differ = 0; + for (size_t i = 0; i < lhs.size() - 2; ++i) + num_dims_differ += !dims_are_equal(lhs[i], rhs[i]); + return num_dims_differ <= (one_dim_can_differ ? 1 : 0); +} + +bool batches_are_equal(const std::shared_ptr& op_0, const std::shared_ptr& op_1) { + auto input_0 = op_0->get_input_partial_shape(0); + auto input_1 = op_1->get_input_partial_shape(0); + auto output_0 = op_0->get_output_partial_shape(0); + auto output_1 = op_1->get_output_partial_shape(0); + return batches_are_equal(input_0, input_1, true) && batches_are_equal(output_0, output_1); +} + +void get_dims(const ov::Output& source, + const size_t& from, + const size_t& to, + const std::vector>& copy_rt_info_from, + ov::NodeVector& dims) { + std::vector non_constant_ids; + for (size_t i = from; i < to; ++i) { + auto node = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(source, {i}, copy_rt_info_from); + OPENVINO_SUPPRESS_DEPRECATED_START + if (auto constant = ov::get_constant_from_source(node)) { + OPENVINO_SUPPRESS_DEPRECATED_END + node = constant; + } else { + non_constant_ids.push_back(i); + } + dims.push_back(node); + } +} + +ov::Output get_target_shape_from_sources(const ov::Output& batch_dims_source, + const ov::Output& non_batch_dims_source, + const std::vector>& copy_rt_info_from) { + ov::NodeVector dims; + // batch dims here stand for MatMul batch dims -- leaving two last dims for Matrix Multiplication + size_t num_batch_dims = batch_dims_source.get_partial_shape().size() - 2; + get_dims(batch_dims_source, 0, num_batch_dims, copy_rt_info_from, dims); + + size_t non_batch_dims_start = non_batch_dims_source.get_partial_shape().size() - 2; + get_dims(non_batch_dims_source, non_batch_dims_start, non_batch_dims_start + 2, copy_rt_info_from, dims); + + size_t num_non_const_nodes = 0; // candidates for becoming a Constant -1 -- special value for Reshape pattern + for (size_t curr_i = 0; curr_i + 1 < dims.size(); ++curr_i) { + auto curr_node = dims[curr_i], next_node = dims[curr_i + 1]; + bool curr_is_const = ov::op::util::is_constant(curr_node), next_is_const = ov::op::util::is_constant(next_node); + if (num_non_const_nodes == 0 && !curr_is_const && next_is_const) { + curr_node = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); + curr_is_const = true; + num_non_const_nodes += 1; + } + if (num_non_const_nodes == 0 && !next_is_const && curr_is_const) { + next_node = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); + next_is_const = true; + num_non_const_nodes += 1; + } + if (curr_is_const && next_is_const) { + dims[curr_i] = nullptr; + dims[curr_i + 1] = ov::op::util::make_try_fold(ov::NodeVector{curr_node, next_node}, 0); + } + } + dims.erase(std::remove_if(dims.begin(), + dims.end(), + [](const std::shared_ptr& node) { + return node == nullptr; + }), + dims.end()); + auto target_shape = ov::op::util::make_try_fold(dims, 0); + ov::copy_runtime_info(copy_rt_info_from, target_shape); + return target_shape->output(0); +} + +void pull_reshape_through_optional_concat_and_bea(const ov::pass::pattern::PatternValueMap& vm, + std::shared_ptr concat_label, + std::shared_ptr bea_label, + ov::Output reshape_output, + ov::Input matmul_input, + std::vector& nodes_for_revalidation) { + // Reshape -- [Concat] -- [BEA with scalar] -- > MatMul + auto original_reshape = reshape_output.get_node_shared_ptr(); + if (vm.count(concat_label)) { + auto concat_node = ov::as_type_ptr(vm.at(concat_label).get_node_shared_ptr()); + OPENVINO_ASSERT(concat_node != nullptr, + "DeReshapeMatMul transformation matched operation which should be Concat -- but it is not"); + auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); + auto axis = (concat_node->get_concatenation_axis() == (rank - 1)) ? -1 : -2; + + auto idx_of_reshape_input = reshape_output == concat_node->input_value(0) ? 0 : 1; + auto idx_of_non_reshape_input = static_cast(!idx_of_reshape_input); + + auto target_shape_of_input = get_target_shape_from_sources(original_reshape->input_value(0), + concat_node->input_value(idx_of_non_reshape_input), + {original_reshape}); + + auto input_reshape = original_reshape->clone_with_new_inputs( + {concat_node->input_value(idx_of_non_reshape_input), target_shape_of_input}); + ov::copy_runtime_info(original_reshape, input_reshape); + + ov::replace_output_update_name(reshape_output, original_reshape->input_value(0)); + + ov::OutputVector new_concat_inputs(2); + new_concat_inputs[idx_of_reshape_input] = concat_node->input_value(idx_of_reshape_input); + new_concat_inputs[idx_of_non_reshape_input] = input_reshape->output(0); + + auto new_concat = std::make_shared(new_concat_inputs, axis); + ov::copy_runtime_info({concat_node, original_reshape}, new_concat); + + auto target_shape_of_output = + get_target_shape_from_sources(input_reshape->input_value(0), new_concat->output(0), {original_reshape}); + auto output_reshape = original_reshape->clone_with_new_inputs({new_concat->output(0), target_shape_of_output}); + ov::copy_runtime_info(original_reshape, output_reshape); + + if (vm.count(bea_label)) { + auto bea_node = vm.at(bea_label).get_node_shared_ptr(); + auto idx_of_non_scalar_data = bea_node->input_value(0) == vm.at(concat_label) ? 0 : 1; + bea_node->input(idx_of_non_scalar_data).replace_source_output(new_concat); + nodes_for_revalidation.insert(nodes_for_revalidation.begin(), bea_node.get()); + } else { + matmul_input.replace_source_output(new_concat); + } + ov::replace_output_update_name(concat_node->output(0), output_reshape->output(0)); + } else { + // no Concat and it doesn't matter if BEA is present -- just delete reshape + ov::replace_output_update_name(reshape_output, original_reshape->input_value(0)); + } +} +} // namespace + +#define IN_RESHAPE \ + pattern::wrap_type(pattern::op::as_value_predicate([](std::shared_ptr n) -> bool { \ + return pattern::consumers_count(1)(n->output(0)) && reshape_keeps_last_two_dims(n); \ + })); + +#define SCALAR_INPUT \ + pattern::any_input([](ov::Output out) { \ + return out.get_partial_shape().is_static() && ov::shape_size(out.get_shape()) == 1; \ + }); + +ov::pass::DeReshapeMatMul::DeReshapeMatMul() { + MATCHER_SCOPE(DeReshapeMatMul); + // BEGIN: symmetrical patterns for MatMul inputs + + // lhs of MatMul + auto lhs_reshape = IN_RESHAPE; + + auto lhs_concat_0 = pattern::wrap_type({pattern::any_input(), lhs_reshape}, concat_predicate); + auto lhs_concat_1 = pattern::wrap_type({lhs_reshape, pattern::any_input()}, concat_predicate); + auto lhs_concat = std::make_shared(OutputVector{lhs_concat_0, lhs_concat_1}); + + auto lhs_reshape_or_concat = std::make_shared(OutputVector{lhs_reshape, lhs_concat}); + + auto lhs_bea_scalar = SCALAR_INPUT; + auto lhs_bea = pattern::wrap_type({lhs_reshape_or_concat, lhs_bea_scalar}, + pattern::consumers_count(1)); + + auto lhs_bea_or_concat = std::make_shared(OutputVector{lhs_reshape_or_concat, lhs_bea}); + + // rhs of MatMul + auto rhs_reshape = IN_RESHAPE; + + auto rhs_concat_0 = pattern::wrap_type({pattern::any_input(), rhs_reshape}, concat_predicate); + auto rhs_concat_1 = pattern::wrap_type({rhs_reshape, pattern::any_input()}, concat_predicate); + auto rhs_concat = std::make_shared(OutputVector{rhs_concat_0, rhs_concat_1}); + + auto rhs_reshape_or_concat = std::make_shared(OutputVector{rhs_reshape, rhs_concat}); + + auto rhs_bea_scalar = SCALAR_INPUT; + auto rhs_bea = pattern::wrap_type({rhs_reshape_or_concat, rhs_bea_scalar}, + pattern::consumers_count(1)); + + auto rhs_bea_or_concat = std::make_shared(OutputVector{rhs_reshape_or_concat, rhs_bea}); + // END: symmetrical patterns for MatMul inputs + + auto matmul = + pattern::wrap_type({lhs_bea_or_concat, rhs_bea_or_concat}, pattern::consumers_count(1)); + + auto add = pattern::wrap_type( + OutputVector{matmul, pattern::any_input()}, + [](ov::Output out) -> bool { + if (!pattern::consumers_count(1)(out)) + return false; + auto input_0_pshape = out.get_node_shared_ptr()->get_input_partial_shape(0); + auto input_1_pshape = out.get_node_shared_ptr()->get_input_partial_shape(1); + auto output_pshape = out.get_partial_shape(); + ov::TensorLabel output_labels, input_0_labels, input_1_labels; + if (get_labels(input_0_pshape, input_0_labels) && get_labels(input_1_pshape, input_1_labels) && + get_labels(output_pshape, output_labels)) { + if (input_0_pshape.size() != 3 || input_1_pshape.size() != 3 || output_pshape.size() != 3) + return false; + return are_unique_and_equal_labels(input_0_labels, output_labels) || + are_unique_and_equal_labels(input_1_labels, output_labels); + } else { + return false; + } + }); + + auto matmul_or_add = std::make_shared(OutputVector{matmul, add}); + auto final_reshape = + pattern::wrap_type({matmul_or_add, pattern::any_input()}, + pattern::op::as_value_predicate([](std::shared_ptr n) -> bool { + return reshape_keeps_last_two_dims(n); + })); + + ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) { + const auto& pm = m.get_pattern_map(); + const auto& vm = m.get_pattern_value_map(); + std::vector nodes_for_revalidation{pm.at(matmul).get()}; + // reshapes check: BEGIN + // reshape_keeps_last_two_dims checks were already applied for all Reshapes in the pattern predicates + auto in_reshape_0 = pm.at(lhs_reshape); + auto in_reshape_1 = pm.at(rhs_reshape); + auto out_reshape = pm.at(final_reshape); + if (!batches_are_equal(in_reshape_0, in_reshape_1) || + !batches_are_equal(in_reshape_0->get_output_partial_shape(0), out_reshape->get_input_partial_shape(0)) || + !batches_are_equal(in_reshape_0->get_input_partial_shape(0), + out_reshape->get_output_partial_shape(0), + true)) { + return false; + } + // reshapes check: END + + if (vm.count(add)) { + const auto& in_reshape_0_in_pshape = in_reshape_0->get_input_partial_shape(0); + if (in_reshape_0_in_pshape.size() != 4) + return false; + // we only allow MatMul -> Add pattern to be optimized in case of 4d -> 3d -> 4d DeReshaping + } + + // preventing wrong matches + if (vm.count(lhs_concat) && !ov::as_type_ptr(pm.at(lhs_concat))) + return false; + if (vm.count(rhs_concat) && !ov::as_type_ptr(pm.at(rhs_concat))) + return false; + + pull_reshape_through_optional_concat_and_bea(vm, + lhs_concat, + lhs_bea, + in_reshape_0, + pm.at(matmul)->input(0), + nodes_for_revalidation); + pull_reshape_through_optional_concat_and_bea(vm, + rhs_concat, + rhs_bea, + in_reshape_1, + pm.at(matmul)->input(1), + nodes_for_revalidation); + + for (auto& node : nodes_for_revalidation) + node->validate_and_infer_types(); + + if (vm.count(add)) { + auto add_node = pm.at(add); + size_t matmul_port = (add_node->input_value(0) == vm.at(matmul) ? 0 : 1); + size_t non_matmul_port = static_cast(!matmul_port); + + auto first_batch_dim = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source( + add_node->input_value(non_matmul_port), + {0}, + {in_reshape_0, in_reshape_1}); + auto divisor = + ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(in_reshape_0->input_value(0), + {1}, + {in_reshape_0, in_reshape_1}); + first_batch_dim = std::make_shared(first_batch_dim, divisor, true); + auto minus_one = ov::op::v0::Constant::create(element::i64, {1}, {-1}); + auto non_batch_dims = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source( + add_node->input_value(non_matmul_port), + {1, 2}, + {in_reshape_0, in_reshape_1}); + auto pattern = + std::make_shared(OutputVector{first_batch_dim, minus_one, non_batch_dims}, 0); + auto other_input_reshape = + op::util::make_try_fold(add_node->input_value(non_matmul_port), pattern, true); + add_node->input(non_matmul_port).replace_source_output(other_input_reshape->output(0)); + ov::copy_runtime_info({in_reshape_0, in_reshape_1}, {first_batch_dim, minus_one, other_input_reshape}); + add_node->validate_and_infer_types(); + } + ov::replace_output_update_name(out_reshape->output(0), out_reshape->input_value(0)); + return true; + }; + + auto m = std::make_shared(final_reshape, matcher_name); + register_matcher(m, matcher_pass_callback); +} diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp index 7451df397ba33c..660a4fc80932c8 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp @@ -4,18 +4,28 @@ #include "transformations/symbolic_transformations/symbolic_optimizations.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include "itt.hpp" +#include "openvino/core/dimension_tracker.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/util/symbolic_info.hpp" +#include "openvino/pass/manager.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/pass/visualize_tree.hpp" +#include "transformations/common_optimizations/dimension_tracking.hpp" +#include "transformations/common_optimizations/nop_elimination.hpp" +#include "transformations/common_optimizations/shared_ops_optimization.hpp" +#include "transformations/common_optimizations/simplify_shape_of_sub_graph.hpp" +#include "transformations/symbolic_transformations/chained_maximum.hpp" +#include "transformations/symbolic_transformations/dereshape_matmul.hpp" +#include "transformations/symbolic_transformations/label_optimization.hpp" +#include "transformations/symbolic_transformations/nop_broadcast.hpp" +#include "transformations/symbolic_transformations/utils.hpp" + +using namespace ov::pass; +using namespace ov::symbol::util; namespace { void symbolic_set_up_for_shape(ov::DimensionTracker& dt, ov::PartialShape& shape) { @@ -116,6 +126,60 @@ bool ov::pass::SymbolicPropagation::run_on_model(const std::shared_ptr(); + auto input_reshape = pattern::wrap_type({add, pattern::any_input()}); + + auto select_then = pattern::wrap_type({pattern::any_input(), input_reshape, pattern::any_input()}); + auto select_else = pattern::wrap_type({pattern::any_input(), pattern::any_input(), input_reshape}); + auto select = std::make_shared(OutputVector{select_then, select_else}); + + auto softmax = pattern::wrap_type({select}); + auto reshape = pattern::wrap_type({softmax, pattern::any_input()}); + + ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) { + const auto& value_map = m.get_pattern_value_map(); + ov::TensorLabel reshape_labels, add_0_labels, add_1_labels; + if (!get_labels(value_map.at(reshape).get_partial_shape(), reshape_labels)) + return false; + auto add_node = value_map.at(add).get_node_shared_ptr(); + auto add_0_pshape = add_node->input_value(0).get_partial_shape(); + auto add_1_pshape = add_node->input_value(1).get_partial_shape(); + if (!get_labels(add_0_pshape, add_0_labels) && !get_labels(add_1_pshape, add_1_labels)) + return false; + + if (are_unique_and_equal_labels(reshape_labels, add_0_labels)) { + // we detected that no broadcasting was done during binary elementwise and select, propagating labels + // through + add_node->set_output_type(0, add_node->get_output_element_type(0), add_0_pshape); + } else if (are_unique_and_equal_labels(reshape_labels, add_1_labels)) { + // we detected that no broadcasting was done during binary elementwise and select, propagating labels + // through + add_node->set_output_type(0, add_node->get_output_element_type(0), add_1_pshape); + } else { + return false; + } + + std::shared_ptr select_node = nullptr; + if (value_map.count(select_then)) + select_node = value_map.at(select_then).get_node_shared_ptr(); + if (value_map.count(select_else)) + select_node = value_map.at(select_else).get_node_shared_ptr(); + if (select_node == nullptr) + return false; + + auto select_output = select_node->output(0); + const auto& reshape_pshape = value_map.at(input_reshape).get_partial_shape(); + select_node->set_output_type(0, select_node->get_output_element_type(0), reshape_pshape); + value_map.at(softmax).get_node_shared_ptr()->validate_and_infer_types(); + return true; + }; + + auto m = std::make_shared(reshape, matcher_name); + register_matcher(m, matcher_pass_callback); +} + ov::pass::SymbolicOptimizations::SymbolicOptimizations(bool full_run) { m_manager = std::make_shared(); m_manager->set_per_pass_validation(false); @@ -134,7 +198,10 @@ ov::pass::SymbolicOptimizations::SymbolicOptimizations(bool full_run) { // transformations which use labels for optimizations REGISTER_SYMBOLIC(ApplyTableOfEquivalence) if (full_run) { - REGISTER_SYMBOLIC(OptimizeLabelsUsedAsValues) // reduce shape sub-graphs + REGISTER_SYMBOLIC(OptimizeLabelsUsedAsValues) // reduce shape sub-graphs + REGISTER_SYMBOLIC(LabelResolvingThroughSelect) // figures out that broadcasting didn't happen through Select op + REGISTER_SYMBOLIC(DeReshapeMatMul) + REGISTER_SYMBOLIC(SimplifyShapeOfSubGraph) } } diff --git a/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp b/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp index 3fedc3bd4c85be..abe1485bf5fdd3 100644 --- a/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp +++ b/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp @@ -4,9 +4,9 @@ #include "transformations/symbolic_transformations/utils.hpp" -#include -#include -#include +#include "openvino/core/dimension_tracker.hpp" +#include "openvino/core/node.hpp" +#include "transformations/utils/utils.hpp" bool ov::symbol::util::get_labels(const ov::PartialShape& shape, ov::TensorLabel& labels) { if (shape.rank().is_dynamic()) @@ -32,3 +32,19 @@ bool ov::symbol::util::are_unique_and_equal_labels(const ov::TensorLabel& lhs, c return false; return true; } + +bool ov::symbol::util::dims_are_equal(const ov::Dimension& lhs, const ov::Dimension& rhs) { + if (lhs.is_static() && lhs == rhs) + return true; + auto lhs_label = ov::DimensionTracker::get_label(lhs); + auto rhs_label = ov::DimensionTracker::get_label(rhs); + if (lhs_label == ov::no_label || rhs_label == ov::no_label) + return false; + if (lhs_label == rhs_label) + return true; + if (auto table_l = ov::DimensionTracker::get_table_of_equivalence(lhs)) + return table_l->are_equal(lhs, rhs); + if (auto table_r = ov::DimensionTracker::get_table_of_equivalence(rhs)) + return table_r->are_equal(lhs, rhs); + return false; +} diff --git a/src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp b/src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp new file mode 100644 index 00000000000000..7ab4d48cc81381 --- /dev/null +++ b/src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp @@ -0,0 +1,454 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/symbolic_transformations/dereshape_matmul.hpp" + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/core/dimension_tracker.hpp" +#include "openvino/core/model.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/reshape.hpp" +#include "transformations/utils/utils.hpp" + +using namespace ov; +using namespace ov::op; +using namespace std; + +namespace { +/* Helps to organize dimension representation in the following tests: + * 1. Creates requested amount of dimensions + * 2. Labels them automatically + * 3. Creates value representation of the dimension via creating Parameter->Shape->Gather subgraph + * 4. Gives access to dimension and its value representation via operator[] + * 5. Gives access to utility Parameter via get_parameter -- only used for ov::Model creation in tests + * */ +class DimensionTestHelper { +public: + struct DimensionWithOutput { + Dimension dim; + Output source; + }; + + explicit DimensionTestHelper(const size_t& num_dims) { + auto te = make_shared(); + auto dt = ov::DimensionTracker(te); + auto dimensions = PartialShape::dynamic(Rank(num_dims)); + dt.set_up_for_tracking(dimensions); + parameter = make_shared(element::f32, dimensions); + for (size_t i = 0; i < num_dims; ++i) + m_map[i] = {dimensions[i], op::util::node_to_get_shape_value_of_indices_from_shape_source(parameter, {i})}; + } + + DimensionWithOutput operator[](size_t idx) const { + return m_map.at(idx); + } + + ov::PartialShape make_shape(const vector& dim_indices) const { + auto shape = PartialShape::dynamic(Rank(dim_indices.size())); + for (size_t i = 0; i < dim_indices.size(); ++i) + shape[i] = m_map.at(dim_indices[i]).dim; + return shape; + } + + shared_ptr make_reshape(const Output& source, const vector& dims_indices) const { + OutputVector sources(dims_indices.size()); + for (size_t i = 0; i < dims_indices.size(); ++i) + sources[i] = m_map.at(dims_indices[i]).source; + auto concat = make_shared(sources, 0); + return make_shared(source, concat, false); + } + + std::shared_ptr get_parameter() const { + return parameter; + } + +private: + std::shared_ptr parameter; + std::map m_map; +}; + +size_t max_element(const vector>& vectors) { + size_t current_max = 0; + for (const auto& vector : vectors) + current_max = max(current_max, *std::max_element(vector.begin(), vector.end())); + return current_max; +} + +shared_ptr reshape(const Output& source, + const vector& dims_indices, + const DimensionTestHelper& helper) { + OutputVector sources(dims_indices.size()); + for (size_t i = 0; i < dims_indices.size(); ++i) + sources[i] = helper[dims_indices[i]].source; + auto concat = make_shared(sources, 0); + return make_shared(source, concat, false); +} + +void get_dims(const ov::Output& source, const size_t& from, const size_t& to, ov::NodeVector& dims) { + std::vector non_constant_ids; + for (size_t i = from; i < to; ++i) { + auto node = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(source, {i}); + OPENVINO_SUPPRESS_DEPRECATED_START + if (auto constant = ov::get_constant_from_source(node)) { + OPENVINO_SUPPRESS_DEPRECATED_END + node = constant; + } else { + non_constant_ids.push_back(i); + } + dims.push_back(node); + } +} + +ov::Output get_target_shape_from_sources(const ov::Output& batch_dims_source, + const ov::Output& non_batch_dims_source) { + ov::NodeVector dims; + // batch dims here stand for MatMul batch dims -- leaving two last dims for Matrix Multiplication + size_t num_batch_dims = batch_dims_source.get_partial_shape().size() - 2; + get_dims(batch_dims_source, 0, num_batch_dims, dims); + + size_t non_batch_dims_start = non_batch_dims_source.get_partial_shape().size() - 2; + get_dims(non_batch_dims_source, non_batch_dims_start, non_batch_dims_start + 2, dims); + + size_t num_non_const_nodes = 0; // candidates for becoming a Constant -1 -- special value for Reshape pattern + for (size_t curr_i = 0; curr_i + 1 < dims.size(); ++curr_i) { + auto curr_node = dims[curr_i], next_node = dims[curr_i + 1]; + bool curr_is_const = ov::op::util::is_constant(curr_node), next_is_const = ov::op::util::is_constant(next_node); + if (num_non_const_nodes == 0 && !curr_is_const && next_is_const) { + curr_node = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); + curr_is_const = true; + num_non_const_nodes += 1; + } + if (num_non_const_nodes == 0 && !next_is_const && curr_is_const) { + next_node = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); + next_is_const = true; + num_non_const_nodes += 1; + } + if (curr_is_const && next_is_const) { + dims[curr_i] = nullptr; + dims[curr_i + 1] = ov::op::util::make_try_fold(ov::NodeVector{curr_node, next_node}, 0); + } + } + dims.erase(std::remove_if(dims.begin(), + dims.end(), + [](const std::shared_ptr& node) { + return node == nullptr; + }), + dims.end()); + auto target_shape = ov::op::util::make_try_fold(dims, 0); + return target_shape->output(0); +} + +PartialShape make_concat_input_pshape(const DimensionTestHelper& dims, const vector& dims_indices) { + auto another_pshape = dims.make_shape(dims_indices); + size_t rank = dims_indices.size(); + // To reduce test graph we avoid changing Concat axis dimension with this Concat + another_pshape[rank - 1] = Dimension(0); + return another_pshape; +} + +static std::ostream& operator<<(std::ostream& os, const vector& vals) { + bool first = true; + for (const auto& val : vals) { + if (!first) + os << "_"; + first = false; + os << val; + } + return os; +} +} // namespace + +using DeReshapeMatMulParameters = + tuple, vector, vector, vector, vector>, + size_t, + size_t, + size_t>; + +class DeReshapeMatMulTest : public TransformationTestsF, public testing::WithParamInterface { +public: + void SetUp() override { + TransformationTestsF::SetUp(); + const auto& params = std::get<0>(GetParam()); + + const auto& lhs_shape_idx = std::get<0>(params); + const auto& lhs_reshape_idx = std::get<1>(params); + const auto& rhs_shape_idx = std::get<2>(params); + const auto& rhs_reshape_idx = std::get<3>(params); + const auto& out_reshape_idx = std::get<4>(params); + + // 0 - no bea, 1 - lhs, 2 - rhs, 3 - lhs and rhs + const size_t& bea_scalar_mode = std::get<1>(GetParam()); + + // 0 - no concat + // 10 - concat on lhs, reshape on 0 port + // 11 - concat on lhs, reshape on 1 port + // 20 - concat on rhs, reshape on 0 port + // 21 - concat on rhs, reshape on 1 port + // 300 - concat on both sizes, both reshapes on 0 port of concats + // 301 - concat on both sizes, lhs reshape on 0 port, rhs reshape on 1 port + // 310 - concat on both sizes, lhs reshape on 1 port, rhs reshape on 0 port + // 311 - concat on both sizes, both reshapes on 1 port of concats + const size_t& concat_mode = std::get<2>(GetParam()); + + // 0 - no add, 1 - add has matmul on lhs, 2 - add has matmul on rhs + const size_t& final_add_mode = std::get<3>(GetParam()); + + const auto& max_idx = + max_element({lhs_shape_idx, rhs_shape_idx, lhs_reshape_idx, rhs_reshape_idx, out_reshape_idx}); + const DimensionTestHelper dims(max_idx + 1); + + PartialShape lhs_original_pshape = dims.make_shape(lhs_shape_idx); + PartialShape rhs_original_pshape = dims.make_shape(rhs_shape_idx); + + get_model(dims, + lhs_original_pshape, + rhs_original_pshape, + lhs_reshape_idx, + rhs_reshape_idx, + out_reshape_idx, + bea_scalar_mode, + concat_mode, + final_add_mode); + manager.register_pass(); + + if (lhs_shape_idx.size() != 4 && lhs_reshape_idx.size() != 3 && final_add_mode != 0) + return; // check that for all those cases transformation doesn't do anything + + get_model_ref(dims, + lhs_original_pshape, + rhs_original_pshape, + lhs_reshape_idx, + rhs_reshape_idx, + bea_scalar_mode, + concat_mode, + final_add_mode); + } + + void get_model(const DimensionTestHelper& dims, + const PartialShape& lhs_original_pshape, + const PartialShape& rhs_original_pshape, + const vector& lhs_reshape_idx, + const vector& rhs_reshape_idx, + const vector& out_reshape_idx, + const size_t& bea_scalar_mode, + const size_t& concat_mode, + const size_t& final_add_mode) { + ParameterVector inputs; + OutputVector outputs; + + // LHS input of MatMul + auto lhs_input = make_shared(element::f32, lhs_original_pshape); + auto lhs_output = dims.make_reshape(lhs_input, lhs_reshape_idx); + + if (set{10, 11, 300, 301, 310, 311}.count(concat_mode)) { + const auto& another_pshape = make_concat_input_pshape(dims, lhs_reshape_idx); + const auto& another_input = make_shared(element::f32, another_pshape); + + if (set{10, 300, 301}.count(concat_mode)) { // reshape on 0 port + lhs_output = make_shared(OutputVector{lhs_output, another_input}, -1); + } else if (set{11, 310, 311}.count(concat_mode)) { // reshape on 1 port + lhs_output = make_shared(OutputVector{another_input, lhs_output}, -1); + } else { + ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode; + } + inputs.push_back(another_input); + outputs.emplace_back(lhs_output); + } + + if (bea_scalar_mode == 1 || bea_scalar_mode == 3) + lhs_output = make_shared(lhs_output, v0::Constant::create(element::f32, {}, {0.125})); + + // RHS input of MatMul + auto rhs_input = make_shared(element::f32, rhs_original_pshape); + auto rhs_output = dims.make_reshape(rhs_input, rhs_reshape_idx); + + if (set{20, 21, 300, 301, 310, 311}.count(concat_mode)) { + const auto& another_pshape = make_concat_input_pshape(dims, rhs_reshape_idx); + const auto& another_input = make_shared(element::f32, another_pshape); + if (set{20, 300, 310}.count(concat_mode)) { // reshape on 0 port + rhs_output = make_shared(OutputVector{rhs_output, another_input}, -1); + } else if (set{21, 301, 311}.count(concat_mode)) { // reshape on 1 port + rhs_output = make_shared(OutputVector{another_input, rhs_output}, -1); + } else { + ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode; + } + inputs.push_back(another_input); + outputs.emplace_back(rhs_output); + } + + if (bea_scalar_mode == 2 || bea_scalar_mode == 3) + rhs_output = make_shared(rhs_output, v0::Constant::create(element::f32, {}, {0.125})); + + Output matmul = make_shared(lhs_output, rhs_output); + + if (final_add_mode == 1) // 1 - add has matmul on lhs + matmul = + make_shared(matmul, v0::Constant::create(element::f32, Shape(lhs_reshape_idx.size(), 1), {1})); + else if (final_add_mode == 2) // 2 - add has matmul on rhs + matmul = + make_shared(v0::Constant::create(element::f32, Shape(lhs_reshape_idx.size(), 1), {1}), matmul); + + auto output_reshape = reshape(matmul, out_reshape_idx, dims); + + inputs.push_back(dims.get_parameter()); + inputs.push_back(lhs_input); + inputs.push_back(rhs_input); + outputs.emplace_back(output_reshape); + + for (auto& output : outputs) + output = std::make_shared(output, v0::Constant::create(element::i32, {1}, {-1}), false); + auto output = make_shared(outputs, 0); + model = make_shared(output, inputs, "Tested model"); + } + + void get_model_ref(const DimensionTestHelper& dims, + const PartialShape& lhs_original_pshape, + const PartialShape& rhs_original_pshape, + const vector& lhs_reshape_idx, + const vector& rhs_reshape_idx, + const size_t& bea_scalar_mode, + const size_t& concat_mode, + const size_t& final_add_mode) { + ParameterVector inputs; + OutputVector outputs; + + // LHS input of MatMul + auto lhs_input = make_shared(element::f32, lhs_original_pshape); + auto lhs_output = lhs_input->output(0); + + if (set{10, 11, 300, 301, 310, 311}.count(concat_mode)) { + const auto& another_pshape = make_concat_input_pshape(dims, lhs_reshape_idx); + const auto& another_input = make_shared(element::f32, another_pshape); + + auto target_shape_of_input = get_target_shape_from_sources(lhs_output, another_input); + auto input_reshape = make_shared(another_input, target_shape_of_input, false); + + if (set{10, 300, 301}.count(concat_mode)) { // reshape on 0 port + lhs_output = make_shared(OutputVector{lhs_output, input_reshape}, -1); + } else if (set{11, 310, 311}.count(concat_mode)) { // reshape on 1 port + lhs_output = make_shared(OutputVector{input_reshape, lhs_output}, -1); + } else { + ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode; + } + + auto target_shape_of_output = get_target_shape_from_sources(input_reshape->input_value(0), lhs_output); + auto output_reshape = make_shared(lhs_output, target_shape_of_output, false); + + inputs.push_back(another_input); + outputs.emplace_back(output_reshape); + } + + if (bea_scalar_mode == 1 || bea_scalar_mode == 3) + lhs_output = make_shared(lhs_output, v0::Constant::create(element::f32, {}, {0.125})); + + // RHS input of MatMul + auto rhs_input = make_shared(element::f32, rhs_original_pshape); + auto rhs_output = rhs_input->output(0); + + if (set{20, 21, 300, 301, 310, 311}.count(concat_mode)) { + const auto& another_pshape = make_concat_input_pshape(dims, rhs_reshape_idx); + const auto& another_input = make_shared(element::f32, another_pshape); + + auto target_shape_of_input = get_target_shape_from_sources(rhs_output, another_input); + auto input_reshape = make_shared(another_input, target_shape_of_input, false); + + if (set{20, 300, 310}.count(concat_mode)) { // reshape on 0 port + rhs_output = make_shared(OutputVector{rhs_output, input_reshape}, -1); + } else if (set{21, 301, 311}.count(concat_mode)) { // reshape on 1 port + rhs_output = make_shared(OutputVector{input_reshape, rhs_output}, -1); + } else { + ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode; + } + auto target_shape_of_output = get_target_shape_from_sources(input_reshape->input_value(0), rhs_output); + auto output_reshape = make_shared(rhs_output, target_shape_of_output, false); + + inputs.push_back(another_input); + outputs.emplace_back(output_reshape); + } + + if (bea_scalar_mode == 2 || bea_scalar_mode == 3) + rhs_output = make_shared(rhs_output, v0::Constant::create(element::f32, {}, {0.125})); + + Output matmul = make_shared(lhs_output, rhs_output); + + if (final_add_mode > 0) { + const auto original_add_in = v0::Constant::create(element::f32, Shape(lhs_reshape_idx.size(), 1), {1}); + auto divisor = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(lhs_input, {1}); + auto first_batch_dim = + std::make_shared(ov::op::v0::Constant::create(element::i64, {1}, {1}), + divisor, + true); + auto minus_one = ov::op::v0::Constant::create(element::i64, {1}, {-1}); + auto non_batch_dims = ov::op::v0::Constant::create(element::i64, {2}, {1, 1}); + auto pattern = + std::make_shared(OutputVector{first_batch_dim, minus_one, non_batch_dims}, 0); + auto other_input_reshape = op::util::make_try_fold(original_add_in, pattern, true); + + if (final_add_mode == 1) { // 1 - add has matmul on lhs + matmul = make_shared(matmul, other_input_reshape); + } else if (final_add_mode == 2) { // 2 - add has matmul on rhs + matmul = make_shared(other_input_reshape, matmul); + } + } + inputs.push_back(dims.get_parameter()); + inputs.push_back(lhs_input); + inputs.push_back(rhs_input); + outputs.emplace_back(matmul); + + for (auto& output : outputs) + output = std::make_shared(output, v0::Constant::create(element::i32, {1}, {-1}), false); + auto output = make_shared(outputs, 0); + + model_ref = make_shared(output, inputs, "Reference model"); + } + + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + vector lhs_input_shape_indices, lhs_reshape_indices; + vector rhs_input_shape_indices, rhs_reshape_indices; + vector output_reshape_indices; + size_t bea_scalar_mode, concat_mode, final_add_mode; + + tuple, vector, vector, vector, vector> tmp; + + std::tie(tmp, bea_scalar_mode, concat_mode, final_add_mode) = obj.param; + std::tie(lhs_input_shape_indices, + lhs_reshape_indices, + rhs_input_shape_indices, + rhs_reshape_indices, + output_reshape_indices) = tmp; + + std::ostringstream result; + result << "l_in_shape_idx=" << lhs_input_shape_indices << "_l_reshape_idx=" << lhs_reshape_indices + << "_r_in_shape_idx=" << rhs_input_shape_indices << "_r_reshape_idx=" << rhs_reshape_indices + << "_out_reshape_idx=" << output_reshape_indices << "_bea_scalar_mode=" << bea_scalar_mode + << "_concat_mode=" << concat_mode << "_final_add_mode=" << final_add_mode; + return result.str(); + } +}; + +const auto shape_test_cases = + vector, vector, vector, vector, vector>>{ + {{0, 1, 2, 3}, {5, 2, 3}, {0, 1, 3, 4}, {5, 3, 4}, {0, 1, 2, 4}}, // 4D -> 3D -> 4D + {{5, 2, 3}, {0, 1, 2, 3}, {5, 3, 4}, {0, 1, 3, 4}, {5, 2, 4}}, // 3D -> 4D -> 3D + {{0, 1, 2, 3, 4}, {0, 6, 3, 4}, {0, 1, 2, 4, 5}, {0, 6, 4, 5}, {0, 1, 2, 3, 5}}, // 5D -> 4D -> 5D + }; + +const auto bea_scalar_modes = vector{0, 1, 2, 3}; +const auto concat_modes = vector{0, 10, 11, 20, 21, 300, 301, 310, 311}; +const auto final_add_modes = vector{0, 1, 2}; + +TEST_P(DeReshapeMatMulTest, DeReshapeTests) {} + +INSTANTIATE_TEST_SUITE_P( + TransformationTestsF, + DeReshapeMatMulTest, + testing::Combine(testing::ValuesIn(shape_test_cases), // lhs_idx, rhs_idx, reshape_idx, reshape_idx, reshape_idx + testing::ValuesIn(bea_scalar_modes), + testing::ValuesIn(concat_modes), + testing::ValuesIn(final_add_modes)), + DeReshapeMatMulTest::getTestCaseName); From 04c766e9f17061ee1c9d51c06a7db19f8c5c895b Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Wed, 25 Oct 2023 13:37:36 +0400 Subject: [PATCH 057/275] `ReverseSequence`, `RNNCell`, `Reverse` to API2.0 (#20569) * `ReverseSequence` to API2.0 * `RNNCell` to API2.0 * `Reverse` GPU test to API2.0 * Alignment fix --- .../single_layer_tests/reverse_sequence.cpp | 44 +++---- .../single_layer_tests/rnn_cell.cpp | 16 +-- .../single_layer_tests/reverse.cpp | 79 ++++++------- .../single_layer_tests/rnn_cell.cpp | 18 +-- .../skip_tests_config.cpp | 2 + .../include/single_op_tests/reverse.hpp | 15 +++ .../single_op_tests/reverse_sequence.hpp | 15 +++ .../include/single_op_tests/rnn_cell.hpp | 15 +++ .../shared_test_classes/single_op/reverse.hpp | 28 +++++ .../single_op/reverse_sequence.hpp | 36 ++++++ .../single_op/rnn_cell.hpp | 42 +++++++ .../src/single_op/reverse.cpp | 49 ++++++++ .../src/single_op/reverse_sequence.cpp | 59 ++++++++++ .../src/single_op/rnn_cell.cpp | 109 ++++++++++++++++++ 14 files changed, 447 insertions(+), 80 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/reverse.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/reverse_sequence.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/rnn_cell.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse_sequence.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_cell.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/reverse.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/rnn_cell.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp index d8a9fc23c4974f..fd1c3354f0bda0 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp @@ -4,43 +4,43 @@ #include -#include "single_layer_tests/reverse_sequence.hpp" +#include "single_op_tests/reverse_sequence.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ReverseSequenceLayerTest; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U16, - InferenceEngine::Precision::I32 +const std::vector model_types = { + ov::element::f32, + ov::element::f16, + ov::element::u8, + ov::element::i8, + ov::element::u16, + ov::element::i32 }; -const std::vector batchAxisIndices = { 0L }; +const std::vector batch_axis_indices = { 0L }; -const std::vector seqAxisIndices = { 1L }; +const std::vector seq_axis_indices = { 1L }; -const std::vector> inputShapes = { {3, 10} }; //, 10, 20 +const std::vector> input_shapes = { {3, 10} }; //, 10, 20 -const std::vector> reversSeqLengthsVecShapes = { {3} }; +const std::vector> reverse_seq_shapes = { {3} }; -const std::vector secondaryInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER +const std::vector secondary_input_types = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER }; INSTANTIATE_TEST_SUITE_P(smoke_ReverseSequence, ReverseSequenceLayerTest, ::testing::Combine( - ::testing::ValuesIn(batchAxisIndices), - ::testing::ValuesIn(seqAxisIndices), - ::testing::ValuesIn(inputShapes), - ::testing::ValuesIn(reversSeqLengthsVecShapes), - ::testing::ValuesIn(secondaryInputTypes), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(batch_axis_indices), + ::testing::ValuesIn(seq_axis_indices), + ::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(reverse_seq_shapes), + ::testing::ValuesIn(secondary_input_types), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU)), ReverseSequenceLayerTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp index cedd5442e41b1d..b0b2ea664e4515 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp @@ -4,10 +4,10 @@ #include -#include "single_layer_tests/rnn_cell.hpp" +#include "single_op_tests/rnn_cell.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RNNCellTest; namespace { std::vector should_decompose{false, true}; @@ -16,12 +16,12 @@ namespace { std::vector input_size{1, 30}; std::vector> activations = {{"relu"}, {"sigmoid"}, {"tanh"}}; std::vector clip = {0.f, 0.7f}; - std::vector layer_types = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER + std::vector layer_types = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER }; - std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; + std::vector model_types = {ov::element::f32, + ov::element::f16}; INSTANTIATE_TEST_SUITE_P(smoke_RNNCellCommon, RNNCellTest, ::testing::Combine( @@ -34,7 +34,7 @@ namespace { ::testing::ValuesIn(layer_types), ::testing::ValuesIn(layer_types), ::testing::ValuesIn(layer_types), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU)), RNNCellTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse.cpp index a14827a678adf4..453e8a7cbd0835 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse.cpp @@ -2,92 +2,89 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/reverse.hpp" - -#include - +#include "single_op_tests/reverse.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ReverseLayerTest; namespace { -const std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8}; +const std::vector model_types = {ov::element::f32, + ov::element::f16, + ov::element::i32, + ov::element::i64, + ov::element::i8, + ov::element::u8}; -const std::vector> inputShapes1D = {{10}}; -const std::vector> indices1D = {{0}}; +const std::vector> input_shapes_1D = {{10}}; +const std::vector> indices_1D = {{0}}; const std::vector modes = {"index", "mask"}; INSTANTIATE_TEST_SUITE_P(smoke_Reverse1D, ReverseLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapes1D), - ::testing::ValuesIn(indices1D), + ::testing::Combine(::testing::ValuesIn(input_shapes_1D), + ::testing::ValuesIn(indices_1D), ::testing::ValuesIn(modes), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReverseLayerTest::getTestCaseName); -const std::vector> inputShapes2D = {{3, 4}, {1, 3}}; -const std::vector> indices2D = {{0}, {1}}; +const std::vector> input_shapes_2D = {{3, 4}, {1, 3}}; +const std::vector> indices_2D = {{0}, {1}}; INSTANTIATE_TEST_SUITE_P(smoke_Reverse2D, ReverseLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapes2D), - ::testing::ValuesIn(indices2D), + ::testing::Combine(::testing::ValuesIn(input_shapes_2D), + ::testing::ValuesIn(indices_2D), ::testing::ValuesIn(modes), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReverseLayerTest::getTestCaseName); -const std::vector> inputShapes3D = {{1, 3, 4}, {2, 5, 6}}; -const std::vector> indices3D = {{0}, {0, 1}, {0, 2}}; +const std::vector> input_shapes_3D = {{1, 3, 4}, {2, 5, 6}}; +const std::vector> indices_3D = {{0}, {0, 1}, {0, 2}}; INSTANTIATE_TEST_SUITE_P(smoke_Reverse3D, ReverseLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapes3D), - ::testing::ValuesIn(indices3D), + ::testing::Combine(::testing::ValuesIn(input_shapes_3D), + ::testing::ValuesIn(indices_3D), ::testing::ValuesIn(modes), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReverseLayerTest::getTestCaseName); -const std::vector> inputShapes4D = {{1, 2, 3, 4}, {1, 2, 5, 6}}; -const std::vector> indices4D = {{1}, {1, 2}, {1, 3}}; +const std::vector> input_shapes_4D = {{1, 2, 3, 4}, {1, 2, 5, 6}}; +const std::vector> indices_4D = {{1}, {1, 2}, {1, 3}}; INSTANTIATE_TEST_SUITE_P(smoke_Reverse4D, ReverseLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapes4D), - ::testing::ValuesIn(indices4D), + ::testing::Combine(::testing::ValuesIn(input_shapes_4D), + ::testing::ValuesIn(indices_4D), ::testing::ValuesIn(modes), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReverseLayerTest::getTestCaseName); -const std::vector> inputShapes5D = {{1, 1, 4, 3, 3}}; -const std::vector> indices5D = {{2}, {2, 3}, {2, 4}}; +const std::vector> input_shapes_5D = {{1, 1, 4, 3, 3}}; +const std::vector> indices_5D = {{2}, {2, 3}, {2, 4}}; INSTANTIATE_TEST_SUITE_P(smoke_Reverse5D, ReverseLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapes5D), - ::testing::ValuesIn(indices5D), + ::testing::Combine(::testing::ValuesIn(input_shapes_5D), + ::testing::ValuesIn(indices_5D), ::testing::ValuesIn(modes), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReverseLayerTest::getTestCaseName); -const std::vector> inputShapes6D = {{1, 1, 4, 3, 3, 3}}; -const std::vector> indices6D = {{2}, {1, 3}, {3, 5}, {1, 4, 5}}; +const std::vector> input_shapes_6D = {{1, 1, 4, 3, 3, 3}}; +const std::vector> indices_6D = {{2}, {1, 3}, {3, 5}, {1, 4, 5}}; INSTANTIATE_TEST_SUITE_P(smoke_Reverse6D, ReverseLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapes6D), - ::testing::ValuesIn(indices6D), + ::testing::Combine(::testing::ValuesIn(input_shapes_6D), + ::testing::ValuesIn(indices_6D), ::testing::ValuesIn(modes), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReverseLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp index a471761bc9db3e..b721d406aacffe 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp @@ -4,10 +4,10 @@ #include -#include "single_layer_tests/rnn_cell.hpp" +#include "single_op_tests/rnn_cell.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RNNCellTest; namespace { std::vector should_decompose{false, true}; @@ -16,14 +16,14 @@ namespace { std::vector input_size{1, 30}; std::vector> activations = {{"relu"}, {"sigmoid"}, {"tanh"}}; std::vector clip = {0.f, 0.7f}; - std::vector layer_types = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER + std::vector layer_types = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER }; - std::vector netPrecisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; + std::vector model_types = {ov::element::f32, + ov::element::f16}; - INSTANTIATE_TEST_SUITE_P(RNNCellCommon, RNNCellTest, + INSTANTIATE_TEST_SUITE_P(smoke_RNNCellCommon, RNNCellTest, ::testing::Combine( ::testing::ValuesIn(should_decompose), ::testing::ValuesIn(batch), @@ -34,7 +34,7 @@ namespace { ::testing::ValuesIn(layer_types), ::testing::ValuesIn(layer_types), ::testing::ValuesIn(layer_types), - ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), RNNCellTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 25b679cf22cc82..23699ee8aa75e9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -117,5 +117,7 @@ std::vector disabledTestPatterns() { R"(.*MemoryDynamicBatch.*)", // Issue: 123493 R"(.*GroupNormalizationTest.*CompareWithRefs.*NetType=f16.*)", + // Issue: 123507 + R"(.*ReverseLayerTest.*mask.*f16.*)", }; } diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/reverse.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/reverse.hpp new file mode 100644 index 00000000000000..be9181cce17140 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/reverse.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/reverse.hpp" + +namespace ov { +namespace test { +TEST_P(ReverseLayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/reverse_sequence.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/reverse_sequence.hpp new file mode 100644 index 00000000000000..f68be26ebaf1c8 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/reverse_sequence.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/reverse_sequence.hpp" + +namespace ov { +namespace test { +TEST_P(ReverseSequenceLayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/rnn_cell.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/rnn_cell.hpp new file mode 100644 index 00000000000000..58336cac837ab6 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/rnn_cell.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/rnn_cell.hpp" + +namespace ov { +namespace test { +TEST_P(RNNCellTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse.hpp new file mode 100644 index 00000000000000..c7b36227f81080 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using reverseParams = std::tuple< + std::vector, // Input shape + std::vector, // Axes + std::string, // Mode + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class ReverseLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse_sequence.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse_sequence.hpp new file mode 100644 index 00000000000000..a97e7eac4535a4 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reverse_sequence.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { +using ReverseSequenceParamsTuple = typename std::tuple< + int64_t, // Index of the batch dimension + int64_t, // Index of the sequence dimension + std::vector, // Input shapes + std::vector, // Shape of the input vector with sequence lengths to be reversed + ov::test::utils::InputLayerType, // Secondary input type + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class ReverseSequenceLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_cell.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_cell.hpp new file mode 100644 index 00000000000000..e7d2bfe1f5ba52 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rnn_cell.hpp @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2018-2023 Intel Corporation +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { + +using RNNCellParams = typename std::tuple< + bool, // Use decompose to sub-ops transformation + size_t, // Batch + size_t, // Hidden size + size_t, // Input size + std::vector, // Activations + float, // Clip + ov::test::utils::InputLayerType, // W input type (Constant or Parameter) + ov::test::utils::InputLayerType, // R input type (Constant or Parameter) + ov::test::utils::InputLayerType, // B input type (Constant or Parameter) + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class RNNCellTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/reverse.cpp b/src/tests/functional/shared_test_classes/src/single_op/reverse.cpp new file mode 100644 index 00000000000000..cd5a9958bf9780 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/reverse.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/reverse.hpp" + +namespace ov { +namespace test { + +std::string ReverseLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shape; + std::vector axes; + std::string mode; + ov::element::Type model_type; + std::string target_device; + std::tie(input_shape, axes, mode, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "in_shape=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "axes=" << ov::test::utils::vec2str(axes) << "_"; + result << "mode=" << mode << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "targetDevice=" << target_device; + return result.str(); +} + +void ReverseLayerTest::SetUp() { + std::vector input_shape; + std::vector axes; + std::string mode; + ov::element::Type model_type; + std::tie(input_shape, axes, mode, model_type, targetDevice) = GetParam(); + + auto param = std::make_shared(model_type, ov::Shape(input_shape)); + std::shared_ptr axes_constant; + if (mode == "index") { + axes_constant = std::make_shared(ov::element::i32, ov::Shape{axes.size()}, axes); + } else { + std::vector axes_mask(input_shape.size(), false); + for (auto axis : axes) + axes_mask[axis] = true; + axes_constant = + std::make_shared(ov::element::boolean, ov::Shape{axes_mask.size()}, axes_mask); + } + auto reverse = std::make_shared(param, axes_constant, mode); + function = std::make_shared(reverse->outputs(), ov::ParameterVector{param}, "reverse"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp b/src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp new file mode 100644 index 00000000000000..e2ff553a29aeb9 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/reverse_sequence.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/reverse_sequence.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +namespace ov { +namespace test { +std::string ReverseSequenceLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + int64_t batch_axis_idx; + int64_t seq_axis_idx; + ov::element::Type model_type; + std::string target_device; + std::vector input_shape; + std::vector second_input_shape; + ov::test::utils::InputLayerType secondary_input_type; + + std::tie(batch_axis_idx, seq_axis_idx, input_shape, second_input_shape, secondary_input_type, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "seqLengthsShape" << ov::test::utils::vec2str(second_input_shape) << "_"; + result << "secondaryInputType=" << secondary_input_type << "_"; + result << "batchAxis=" << batch_axis_idx << "_"; + result << "seqAxis=" << seq_axis_idx << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "targetDevice=" << target_device; + return result.str(); +} + +void ReverseSequenceLayerTest::SetUp() { + ov::element::Type model_type; + int64_t batch_axis_idx; + int64_t seq_axis_idx; + std::vector input_shape; + std::vector second_input_shape; + ov::test::utils::InputLayerType secondary_input_type; + + std::tie(batch_axis_idx, seq_axis_idx, input_shape, second_input_shape, secondary_input_type, model_type, targetDevice) = GetParam(); + + ov::ParameterVector params {std::make_shared(model_type, ov::Shape(input_shape))}; + auto second_data_type = ov::element::i32; //according to the specification + std::shared_ptr secondary_input; + if (ov::test::utils::InputLayerType::CONSTANT == secondary_input_type) { + auto tensor = ov::test::utils::create_and_fill_tensor(second_data_type, second_input_shape); + secondary_input = std::make_shared(tensor); + } else if (ov::test::utils::InputLayerType::PARAMETER == secondary_input_type) { + secondary_input = std::make_shared(second_data_type, ov::Shape(second_input_shape)); + params.push_back(std::dynamic_pointer_cast(secondary_input)); + } else { + throw std::runtime_error("Unsupported input type"); + } + + auto reverse = std::make_shared(params[0], secondary_input, batch_axis_idx, seq_axis_idx); + function = std::make_shared(reverse->outputs(), params, "ReverseSequence"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/rnn_cell.cpp b/src/tests/functional/shared_test_classes/src/single_op/rnn_cell.cpp new file mode 100644 index 00000000000000..88a9d4673dc804 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/rnn_cell.cpp @@ -0,0 +1,109 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/pass/manager.hpp" +#include "transformations/op_conversions/rnn_cell_decomposition.hpp" +#include "shared_test_classes/single_op/rnn_cell.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +namespace ov { +namespace test { + +using utils::InputLayerType; + +std::string RNNCellTest::getTestCaseName(const testing::TestParamInfo &obj) { + bool should_decompose; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + float clip; + InputLayerType WType; + InputLayerType RType; + InputLayerType BType; + ov::element::Type model_type; + std::string target_device; + std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, WType, RType, BType, + model_type, target_device) = obj.param; + std::vector> input_shapes = {{batch, input_size}, {batch, hidden_size}, + {hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}}; + std::ostringstream result; + result << "decomposition" << should_decompose << "_"; + result << "batch=" << batch << "_"; + result << "hidden_size=" << hidden_size << "_"; + result << "input_size=" << input_size << "_"; + result << "IS=" << ov::test::utils::vec2str(input_shapes) << "_"; + result << "activations=" << ov::test::utils::vec2str(activations) << "_"; + result << "clip=" << clip << "_"; + result << "WType=" << WType << "_"; + result << "RType=" << RType << "_"; + result << "BType=" << BType << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "targetDevice=" << target_device; + return result.str(); +} + +void RNNCellTest::SetUp() { + bool should_decompose; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + InputLayerType WType; + InputLayerType RType; + InputLayerType BType; + ov::element::Type model_type; + std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, WType, RType, BType, + model_type, targetDevice) = this->GetParam(); + + std::vector> input_shapes = {{batch, input_size}, {batch, hidden_size}, + {hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}}; + ov::ParameterVector params{std::make_shared(model_type, ov::Shape(input_shapes[0])), + std::make_shared(model_type, ov::Shape(input_shapes[1]))}; + std::vector WRB = {input_shapes[2], input_shapes[3], input_shapes[4]}; + + std::shared_ptr W; + if (WType == InputLayerType::PARAMETER) { + const auto param = std::make_shared(model_type, WRB[0]); + W = param; + params.push_back(param); + } else { + auto tensor = ov::test::utils::create_and_fill_tensor(model_type, WRB[0]); + W = std::make_shared(tensor); + } + + std::shared_ptr R; + if (RType == InputLayerType::PARAMETER) { + const auto param = std::make_shared(model_type, WRB[1]); + R = param; + params.push_back(param); + } else { + auto tensor = ov::test::utils::create_and_fill_tensor(model_type, WRB[1]); + R = std::make_shared(tensor); + } + + std::shared_ptr B; + if (BType == InputLayerType::PARAMETER) { + const auto param = std::make_shared(model_type, WRB[2]); + B = param; + params.push_back(param); + } else { + auto tensor = ov::test::utils::create_and_fill_tensor(model_type, WRB[2]); + B = std::make_shared(tensor); + } + + auto rnn_cell = std::make_shared(params[0], params[1], W, R, B, hidden_size, activations, + activations_alpha, activations_beta, clip); + function = std::make_shared(rnn_cell->outputs(), params, "rnn_cell"); + if (should_decompose) { + ov::pass::Manager m; + m.register_pass(); + m.run_passes(function); + } +} +} // namespace test +} // namespace ov From bc463e886b21c27fbce0293c5a5fa313a0eff0c0 Mon Sep 17 00:00:00 2001 From: Siddhant Chauhan Date: Wed, 25 Oct 2023 15:44:22 +0530 Subject: [PATCH 058/275] [PT FE] Add aten::log10 (#20621) * Add log10 operator and test * fix * Update test_log.py --------- Co-authored-by: Maxim Vafin --- src/frontends/pytorch/src/op/log.cpp | 12 ++++++++++++ src/frontends/pytorch/src/op_table.cpp | 3 +++ tests/layer_tests/pytorch_tests/test_log.py | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/src/frontends/pytorch/src/op/log.cpp b/src/frontends/pytorch/src/op/log.cpp index 20232e31dec5ce..a5eb3fb89bada6 100644 --- a/src/frontends/pytorch/src/op/log.cpp +++ b/src/frontends/pytorch/src/op/log.cpp @@ -41,6 +41,18 @@ OutputVector translate_log2(const NodeContext& context) { return {res}; }; +OutputVector translate_log10(const NodeContext& context) { + // torch.log10 returns a tensor with the logarithm to the base 10 of the elements of input. + num_inputs_check(context, 1, 1); + auto x = context.get_input(0); + auto ten = context.mark_node(v0::Constant::create(element::f32, Shape{}, {10})); + x = context.mark_node(std::make_shared(x, element::f32)); + auto log10 = context.mark_node(std::make_shared(ten)); + auto log = context.mark_node(std::make_shared(x)); + auto res = context.mark_node(std::make_shared(log, log10)); + return {res}; +}; + OutputVector translate_logsumexp(const NodeContext& context) { num_inputs_check(context, 1, 2); auto input = context.get_input(0); diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index c307be6fc22e2c..55434a49fd45e4 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -104,6 +104,7 @@ OP_CONVERTER(translate_log); OP_CONVERTER(translate_log1p); OP_CONVERTER(translate_log_softmax); OP_CONVERTER(translate_log2); +OP_CONVERTER(translate_log10); OP_CONVERTER(translate_logsumexp); OP_CONVERTER(translate_loop); OP_CONVERTER(translate_masked_fill); @@ -387,6 +388,8 @@ const std::map get_supported_ops_ts() { {"aten::log1p_", op::inplace_op}, {"aten::log2", op::translate_log2}, {"aten::log2_", op::inplace_op}, + {"aten::log10", op::translate_log10}, + {"aten::log10_", op::inplace_op}, {"aten::lt", op::translate_1to1_match_2_inputs_align_types}, {"aten::masked_fill", op::translate_masked_fill}, {"aten::masked_fill_", op::inplace_op}, diff --git a/tests/layer_tests/pytorch_tests/test_log.py b/tests/layer_tests/pytorch_tests/test_log.py index 1e4de2dd4f19af..264ba734bc9cc5 100644 --- a/tests/layer_tests/pytorch_tests/test_log.py +++ b/tests/layer_tests/pytorch_tests/test_log.py @@ -18,6 +18,8 @@ def create_model(self, op): "log_": torch.log_, "log2": torch.log2, "log2_": torch.log2_, + "log10": torch.log10, + "log10_": torch.log10_, "log1p": torch.log1p, "log1p_": torch.log1p_ } @@ -45,6 +47,9 @@ def forward(self, x): ["log2", "float32"], ["log2", "int32"], ["log2_", "float32"], + ["log10", "float32"], + ["log10", "int32"], + ["log10_", "float32"], ["log1p", "float32"], ["log1p", "int32"], ["log1p_", "float32"]]) From daa2c9ded0865bd8c9ddbf7f32624b2a2fc7c4a9 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Wed, 25 Oct 2023 14:04:17 +0200 Subject: [PATCH 059/275] [MO][Opset13] NMSRotated-13 support in MO IR Reader (#20354) * nms_rotated mo ir read init * Fix type infer and clean up * Update tests * Update tests * update package BOM file * Update type_infer * Avoid files collision in tests --------- Co-authored-by: Michal Lukaszewski --- tools/mo/automation/package_BOM.txt | 1 + tools/mo/openvino/tools/mo/ops/nms_rotated.py | 79 ++++++++++++++ .../unit_tests/mo/utils/ir_reader/ops_test.py | 100 ++++++++++++++++++ 3 files changed, 180 insertions(+) create mode 100644 tools/mo/openvino/tools/mo/ops/nms_rotated.py diff --git a/tools/mo/automation/package_BOM.txt b/tools/mo/automation/package_BOM.txt index b9bc64d1c8bf08..2e56196c3bc515 100644 --- a/tools/mo/automation/package_BOM.txt +++ b/tools/mo/automation/package_BOM.txt @@ -934,6 +934,7 @@ openvino/tools/mo/ops/mxfft.py openvino/tools/mo/ops/mxrepeat.py openvino/tools/mo/ops/mxreshape.py openvino/tools/mo/ops/NextIteration.py +openvino/tools/mo/ops/nms_rotated.py openvino/tools/mo/ops/non_max_suppression.py openvino/tools/mo/ops/non_zero.py openvino/tools/mo/ops/normalize.py diff --git a/tools/mo/openvino/tools/mo/ops/nms_rotated.py b/tools/mo/openvino/tools/mo/ops/nms_rotated.py new file mode 100644 index 00000000000000..d845373d4c5744 --- /dev/null +++ b/tools/mo/openvino/tools/mo/ops/nms_rotated.py @@ -0,0 +1,79 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging as log + +import numpy as np + +from openvino.tools.mo.front.common.partial_infer.utils import shape_array, dynamic_dimension_value +from openvino.tools.mo.front.extractor import bool_to_str +from openvino.tools.mo.graph.graph import Node, Graph +from openvino.tools.mo.middle.passes.convert_data_type import np_data_type_to_destination_type +from openvino.tools.mo.ops.op import Op +from openvino.tools.mo.utils.error import Error + + +class NMSRotated(Op): + op = 'NMSRotated' + enabled = False + + def __init__(self, graph: Graph, attrs: dict): + mandatory_props = { + 'type': self.op, + 'op': self.op, + 'version': 'opset13', + + 'infer': self.infer, + 'type_infer': self.type_infer, + + 'sort_result_descending': True, + 'output_type': np.int64, + 'clockwise': True, + + 'in_ports_count': 5, + 'out_ports_count': 3, + } + super().__init__(graph, mandatory_props, attrs) + + def backend_attrs(self): + return [('sort_result_descending', lambda node: bool_to_str(node, 'sort_result_descending')), + 'output_type', + ('clockwise', lambda node: bool_to_str(node, 'clockwise'))] + + def supported_attrs(self): + return [ + 'sort_result_descending', + 'output_type', + 'clockwise', + ] + + @staticmethod + def infer(node: Node): + num_of_inputs = len(node.in_ports()) + opset = node.get_opset() + required_num_inputs = 5 + input_msg_fmt = 'NMSRotated node {} from {} must have {} inputs' + node_name = node.soft_get('name', node.id) + inputs_msg = input_msg_fmt.format( + node_name, opset, required_num_inputs) + assert num_of_inputs == required_num_inputs, inputs_msg + + node.out_port(0).data.set_shape( + shape_array([dynamic_dimension_value, 3])) + num_of_outputs = len( + [port for port in node.out_ports().values() if not port.disconnected()]) + if num_of_outputs >= 2 and node.has_port('out', 1): + node.out_port(1).data.set_shape( + shape_array([dynamic_dimension_value, 3])) + if num_of_outputs >= 3 and node.has_port('out', 2): + node.out_port(2).data.set_shape(shape_array([1])) + + @staticmethod + def type_infer(node: Node): + node.out_port(1).set_data_type(np.float32) + if node.has_valid('output_type') and node['output_type'].lower() == 'i32': + node.out_port(0).set_data_type(np.int32) + node.out_port(2).set_data_type(np.int32) + else: + node.out_port(0).set_data_type(np.int64) + node.out_port(2).set_data_type(np.int64) diff --git a/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py b/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py index 3e5b35ef62fabb..fefc2653c64be6 100644 --- a/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py +++ b/tools/mo/unit_tests/mo/utils/ir_reader/ops_test.py @@ -357,3 +357,103 @@ def test_multinomial_13_const_inputs(self): self.assertEqual(loaded_model.get_output_element_type(0), Type.i64) self.assertEqual(loaded_model.get_output_partial_shape( 0), PartialShape([2, 3])) + + def test_nms_rotated_13_attrs_false_i32(self): + boxes_shape = [1, 100, 5] + scores_shape = [1, 2, 100] + max_output_boxes_val = 5 + iou_threshold_val = 0.5 + score_threshold_val = 0.4 + + boxes_parameter = opset13.parameter( + boxes_shape, name="Boxes", dtype=np.float32) + scores_parameter = opset13.parameter( + scores_shape, name="Scores", dtype=np.float32) + + max_output_boxes = opset13.constant([max_output_boxes_val], np.int64) + iou_threshold = opset13.constant([iou_threshold_val], np.float32) + score_threshold = opset13.constant([score_threshold_val], np.float32) + + sort_result_descending = False + output_type = "i32" + clockwise = False + + node = opset13.nms_rotated(boxes_parameter, scores_parameter, max_output_boxes, iou_threshold, + score_threshold, sort_result_descending, output_type, clockwise) + + model = Model(node, [boxes_parameter, scores_parameter]) + graph, loaded_model = TestOps.check_graph_can_save( + model, 'nms_rotated_model_1') + ir_node = graph.get_op_nodes(op="NMSRotated")[0] + + self.assertListEqual(ir_node.out_port( + 0).data.get_shape().tolist(), [None, 3]) + self.assertListEqual(ir_node.out_port( + 1).data.get_shape().tolist(), [None, 3]) + self.assertListEqual(ir_node.out_port( + 2).data.get_shape().tolist(), [1]) + + self.assertEqual(ir_node["version"], "opset13") + self.assertEqual(ir_node['sort_result_descending'], False) + self.assertEqual(ir_node['output_type'], "i32") + self.assertEqual(ir_node['clockwise'], False) + self.assertEqual(loaded_model.get_output_element_type(0), Type.i32) + self.assertEqual(loaded_model.get_output_element_type(1), Type.f32) + self.assertEqual(loaded_model.get_output_element_type(2), Type.i32) + + self.assertEqual(loaded_model.get_output_partial_shape( + 0), PartialShape([Dimension(-1, 10), 3])) + self.assertEqual(loaded_model.get_output_partial_shape( + 1), PartialShape([Dimension(-1, 10), 3])) + self.assertEqual(loaded_model.get_output_partial_shape( + 2), PartialShape([1])) + + def test_nms_rotated_13_attrs_true_i64(self): + boxes_shape = [1, 100, 5] + scores_shape = [1, 3, 100] + max_output_boxes_val = 5 + iou_threshold_val = 0.5 + score_threshold_val = 0.4 + + boxes_parameter = opset13.parameter( + boxes_shape, name="Boxes", dtype=np.float32) + scores_parameter = opset13.parameter( + scores_shape, name="Scores", dtype=np.float32) + + max_output_boxes = opset13.constant([max_output_boxes_val], np.int64) + iou_threshold = opset13.constant([iou_threshold_val], np.float32) + score_threshold = opset13.constant([score_threshold_val], np.float32) + + sort_result_descending = True + output_type = "i64" + clockwise = True + + node = opset13.nms_rotated(boxes_parameter, scores_parameter, max_output_boxes, iou_threshold, + score_threshold, sort_result_descending, output_type, clockwise) + + model = Model(node, [boxes_parameter, scores_parameter]) + graph, loaded_model = TestOps.check_graph_can_save( + model, 'nms_rotated_model_2') + ir_node = graph.get_op_nodes(op="NMSRotated")[0] + + self.assertListEqual(ir_node.out_port( + 0).data.get_shape().tolist(), [None, 3]) + self.assertListEqual(ir_node.out_port( + 1).data.get_shape().tolist(), [None, 3]) + self.assertListEqual(ir_node.out_port( + 2).data.get_shape().tolist(), [1]) + + self.assertEqual(ir_node["version"], "opset13") + self.assertEqual(ir_node['sort_result_descending'], True) + self.assertEqual(ir_node['output_type'], "i64") + self.assertEqual(ir_node['clockwise'], True) + self.assertEqual(loaded_model.get_output_element_type(0), Type.i64) + self.assertEqual(loaded_model.get_output_element_type(1), Type.f32) + self.assertEqual(loaded_model.get_output_element_type(2), Type.i64) + + self.assertEqual(loaded_model.get_output_partial_shape( + 0), PartialShape([Dimension(-1, 15), 3])) + self.assertEqual(loaded_model.get_output_partial_shape( + 1), PartialShape([Dimension(-1, 15), 3])) + self.assertEqual(loaded_model.get_output_partial_shape( + 2), PartialShape([1])) From 0d68bb3c146a17e64da73fd6f2710d3450002347 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 25 Oct 2023 19:07:48 +0400 Subject: [PATCH 060/275] OpenVINO library expose requirement for C++11 (#20691) --- src/cmake/openvino.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake index 7fb6e2fd77bd6b..ad73269d475748 100644 --- a/src/cmake/openvino.cmake +++ b/src/cmake/openvino.cmake @@ -31,6 +31,8 @@ add_library(${TARGET_NAME} add_library(openvino::runtime ALIAS ${TARGET_NAME}) set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME runtime) +target_compile_features(${TARGET_NAME} PUBLIC cxx_std_11) + ov_add_vs_version_file(NAME ${TARGET_NAME} FILEDESCRIPTION "OpenVINO runtime library") target_include_directories(${TARGET_NAME} PUBLIC From e4f4714fca4e76572d950258e3eaa666983ff1aa Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Wed, 25 Oct 2023 21:52:45 +0400 Subject: [PATCH 061/275] [CONFORMANCE] Prepare model filelist for SubgraphsDumper (#20617) * [CONFORMANCE] Prepare model filelist for SubgraphsDumper * Improve default and custom scenario * Update constants.py * remove extra file * improveemnt --- .../find_models_for_subgraphs_dumper.py | 108 ++++++++++++++++++ .../data/custom_re_to_find_models.lst | 1 + .../data/default_re_to_find_models.lst | 5 + .../layer_tests_summary/run_conformance.py | 3 +- .../layer_tests_summary/utils/constants.py | 13 +++ .../layer_tests_summary/utils/file_utils.py | 37 +++--- 6 files changed, 145 insertions(+), 22 deletions(-) create mode 100644 src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py create mode 100644 src/tests/test_utils/functional_test_utils/layer_tests_summary/data/custom_re_to_find_models.lst create mode 100644 src/tests/test_utils/functional_test_utils/layer_tests_summary/data/default_re_to_find_models.lst diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py new file mode 100644 index 00000000000000..3016f8c48c80aa --- /dev/null +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py @@ -0,0 +1,108 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from argparse import ArgumentParser +from pathlib import Path +import sys +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from utils.file_utils import prepare_filelist, find_latest_dir +from utils.conformance_utils import get_logger +from utils.constants import SUPPORTED_MODEL_EXTENSION + +logger = get_logger("prepare_model_list") + +def get_default_re_path(is_take_all_models = False): + SCRIPT_DIR_PATH, _ = os.path.split(os.path.abspath(__file__)) + return os.path.join(SCRIPT_DIR_PATH, "..", "data", "custom_re_to_find_models.lst") if is_take_all_models else os.path.join(SCRIPT_DIR_PATH, "..", "data", "default_re_to_find_models.lst") + +def parse_arguments(): + parser = ArgumentParser() + + model_help = "Path to model directories path file to prepare filelist. Separator is `,`" + output_help = "Path to output dir to save model list file" + filename_help = "Output filename to save model list file" + latest_only_help = "Use only latest directory matched reg exp. In other case all directories will be taken from the dir" + + parser.add_argument("-m", "--model_dirs", type=str, help=model_help, required=True) + parser.add_argument("-o", "--output_dir", type=str, help=output_help, required=False, default=".") + parser.add_argument("-f", "--filename", type=str, help=filename_help, required=False, default="model_filelist") + parser.add_argument("-l", "--latest_only", type=bool, help=latest_only_help, required=False, default=False) + + return parser.parse_args() + + +def str_to_dir_list(input_str: str): + dir_path_list = [] + while True: + separator_pos = input_str.find(',') + dir_path = "" + if separator_pos == -1: + if len(input_str) > 0: + dir_path = input_str + input_str = "" + else: + break + else: + dir_path = input_str[:separator_pos:] + input_str = input_str[separator_pos+1::] + separator_pos = input_str.find(',') + if os.path.isdir(dir_path): + dir_path_list.append(dir_path) + logger.info(f"Model dir list: {dir_path_list}") + return dir_path_list + + +def read_dir_re_exp(re_exp_file_path: str): + dir_re_exps = [] + if os.path.isfile(re_exp_file_path): + with open(re_exp_file_path, "r") as re_exp_file: + for line in re_exp_file.readlines(): + if "#" in line: + continue + dir_re_exps.append(line.replace('\n', '')) + if len(dir_re_exps) == 0: + dir_re_exps.append('*') + logger.info(f"Model dir re exp list: {dir_re_exps}") + return dir_re_exps + + +def generate_model_list_file(input_str: str, re_exp_file_path: str, output_file_path: os.path, is_latest_only: bool): + with open(output_file_path, 'w', newline='\n') as output_file: + model_dir_paths = str_to_dir_list(input_str) + dir_re_exps = read_dir_re_exp(re_exp_file_path) + model_list = list() + for model_dir_path in model_dir_paths: + for dir_re_exp in dir_re_exps: + dirs = [model_dir_path] + if dir_re_exp != "*": + if is_latest_only: + dirs = [find_latest_dir(model_dir_path, dir_re_exp)] + else: + dirs = Path(model_dir_path).glob(dir_re_exp) + for dir in dirs: + try: + logger.info(f"Processing dir: {dir}") + model_list.extend(prepare_filelist(str(dir), SUPPORTED_MODEL_EXTENSION, is_save_to_file=False)) + if is_latest_only: + break + except: + pass + for line in model_list: + output_file.write(f"{line}\n") + output_file.close() + +if __name__ == "__main__": + args = parse_arguments() + os.makedirs(args.output_dir, exist_ok=True) + logger.info(f"[ ARGUMENTS ] --model_dirs={args.model_dirs}") + logger.info(f"[ ARGUMENTS ] --output_dir={args.output_dir}") + logger.info(f"[ ARGUMENTS ] --filename={args.filename}") + logger.info(f"[ ARGUMENTS ] --latest_only={args.latest_only}") + re_file = get_default_re_path(not args.latest_only) + if not args.latest_only: + logger.warning(f"{re_file} will be taken to get all models from the dirs") + output_model_list_file = os.path.join(args.output_dir, f"{args.filename}.lst") + generate_model_list_file(args.model_dirs, re_file, output_model_list_file, args.latest_only) + logger.info(f"Model file list is saved to {output_model_list_file}") diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/custom_re_to_find_models.lst b/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/custom_re_to_find_models.lst new file mode 100644 index 00000000000000..72e8ffc0db8aad --- /dev/null +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/custom_re_to_find_models.lst @@ -0,0 +1 @@ +* diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/default_re_to_find_models.lst b/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/default_re_to_find_models.lst new file mode 100644 index 00000000000000..e7c64cbe45dc75 --- /dev/null +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/data/default_re_to_find_models.lst @@ -0,0 +1,5 @@ +ww*_dynamic* +ww*_pytorch* +ww*_static* +ww42*tfhub* +# ww42*tflite* \ No newline at end of file diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_conformance.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_conformance.py index d3e128e7b89a1e..70cd6fad7667b0 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_conformance.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_conformance.py @@ -164,8 +164,7 @@ def __dump_subgraph(self): rmtree(conformance_ir_path) os.mkdir(conformance_ir_path) self._model_path = file_utils.prepare_filelist(self._model_path, - ["*.onnx", "*.pdmodel", "*.__model__", "*.pb", "*.xml", - "*.tflite"]) + constants.SUPPORTED_MODEL_EXTENSION) logger.info(f"Stating model dumping from {self._model_path}") cmd = f'{subgraph_dumper_path} --input_folders="{self._model_path}" --output_folder="{conformance_ir_path}"' process = Popen(cmd, shell=True) diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py index 05018f1cbfda21..8d381f2758a2d5 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/constants.py @@ -56,3 +56,16 @@ META_EXTENSION = ".meta" XML_EXTENSION = ".xml" BIN_EXTENSION = ".bin" + +SUPPORTED_MODEL_EXTENSION = [ + # ONNX + "*.onnx", + #PDPD + "*.pdmodel", + # TF + "*.pb", + # OV IR + "*.xml", + # TFLITE + "*.tflite", +] diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/file_utils.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/file_utils.py index 002d7b91a7751b..edd74d330751d5 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/file_utils.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/utils/file_utils.py @@ -13,7 +13,7 @@ from . import conformance_utils # generates file list file inside directory. Returns path to saved filelist -def prepare_filelist(input_dir: os.path, patterns: list): +def prepare_filelist(input_dir: os.path, patterns: list, is_save_to_file = True): filelist_path = input_dir if os.path.isdir(filelist_path): filelist_path = os.path.join(input_dir, "conformance_ir_files.lst") @@ -24,15 +24,17 @@ def prepare_filelist(input_dir: os.path, patterns: list): conformance_utils.UTILS_LOGGER.info(f"{filelist_path} is exists! The script will update it!") model_list = list() for pattern in patterns: - for model in Path(input_dir).rglob(pattern): - model_list.append(model) - try: - with open(filelist_path, 'w') as file: - for xml in model_list: - file.write(str(xml) + '\n') - file.close() - except: - conformance_utils.UTILS_LOGGER.warning(f"Impossible to update {filelist_path}! Something going is wrong!") + model_list.extend(Path(input_dir).rglob(pattern)) + if is_save_to_file: + try: + with open(filelist_path, 'w') as file: + for xml in model_list: + file.write(str(xml) + '\n') + file.close() + except: + conformance_utils.UTILS_LOGGER.warning(f"Impossible to update {filelist_path}! Something going is wrong!") + else: + return model_list return filelist_path def is_archieve(input_path: os.path): @@ -68,27 +70,22 @@ def unzip_archieve(zip_path: os.path, dst_path: os.path): return dst_dir # find latest changed directory -def find_latest_dir(in_dir: Path, pattern_list = list()): - get_latest_dir = lambda path: sorted(Path(path).iterdir(), key=os.path.getmtime) +def find_latest_dir(in_dir: Path, pattern = "*"): + get_latest_dir = lambda path: sorted(Path(path).glob(pattern), key=os.path.getmtime) entities = get_latest_dir(in_dir) entities.reverse() for entity in entities: if entity.is_dir(): - if not pattern_list: - return entity - else: - for pattern in pattern_list: - if pattern in str(os.fspath(PurePath(entity))): - return entity - conformance_utils.UTILS_LOGGER.error(f"{in_dir} does not contain applicable directories to patterns: {pattern_list}") + return entity + conformance_utils.UTILS_LOGGER.error(f"{in_dir} does not contain applicable directories to pattern: {pattern}") exit(-1) def get_ov_path(script_dir_path: os.path, ov_dir=None, is_bin=False): if ov_dir is None or not os.path.isdir(ov_dir): ov_dir = os.path.abspath(script_dir_path)[:os.path.abspath(script_dir_path).find(constants.OPENVINO_NAME) + len(constants.OPENVINO_NAME)] if is_bin: - ov_dir = os.path.join(ov_dir, find_latest_dir(ov_dir, ['bin'])) + ov_dir = os.path.join(ov_dir, find_latest_dir(ov_dir, 'bin')) ov_dir = os.path.join(ov_dir, find_latest_dir(ov_dir)) ov_dir = os.path.join(ov_dir, find_latest_dir(ov_dir, [constants.DEBUG_DIR, constants.RELEASE_DIR])) return ov_dir From 214e08599b971999d95ea846ce2effac1b79b366 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Wed, 25 Oct 2023 19:36:25 +0100 Subject: [PATCH 062/275] [CI] [GHA] Switch Windows pipeline to dynamic build and to VS 2019 (#20630) * build dynamic in win main; transfer win main to MSVC 2019; use toolchain for win cc * Update windows_conditional_compilation.yml use cmake toolchain globally in build stage * Update windows_conditional_compilation.yml * Update windows_conditional_compilation.yml * use quotes * try w/o protobuf * do not restore cache * return * revert * add missing shell * skip Template OpImpl tests * skip OV C API tests * rm pr trigger --------- Co-authored-by: Ilya Lavrenov --- .github/workflows/windows.yml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index e6763d2a696377..df6e7612a0646b 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -1,9 +1,9 @@ -name: Windows (VS 2022, Python 3.11) +name: Windows (VS 2019, Python 3.11) on: + workflow_dispatch: schedule: # at 00:00 on workdays - cron: '0 0 * * 1,2,3,4,5' -# workflow_dispatch: # pull_request: # paths-ignore: # - '**/docs/**' @@ -36,7 +36,7 @@ jobs: defaults: run: shell: pwsh - runs-on: windows-latest-8-cores + runs-on: windows-2019-16-core env: CMAKE_BUILD_TYPE: 'Release' CMAKE_GENERATOR: 'Ninja Multi-Config' @@ -115,7 +115,7 @@ jobs: cmake -G "${{ env.CMAKE_GENERATOR }}" ` -DENABLE_CPPLINT=OFF ` -DBUILD_nvidia_plugin=OFF ` - -DBUILD_SHARED_LIBS=OFF ` + -DBUILD_SHARED_LIBS=ON ` -DENABLE_TESTS=ON ` -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF ` -DENABLE_STRICT_DEPENDENCIES=OFF ` @@ -183,7 +183,7 @@ jobs: defaults: run: shell: pwsh - runs-on: windows-latest-8-cores + runs-on: windows-2019 env: INSTALL_DIR: "${{ github.workspace }}\\install" INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" @@ -251,7 +251,7 @@ jobs: defaults: run: shell: pwsh - runs-on: windows-latest + runs-on: windows-2019 env: OPENVINO_REPO: "${{ github.workspace }}\\openvino" OPENVINO_CONTRIB_REPO: "${{ github.workspace }}\\openvino_contrib" @@ -451,7 +451,7 @@ jobs: defaults: run: shell: pwsh - runs-on: windows-latest + runs-on: windows-2019 env: INSTALL_DIR: "${{ github.workspace }}\\install" INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" @@ -565,10 +565,13 @@ jobs: call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml - name: SubgraphsDumper tests + shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SubgraphsDumperTests.xml - name: Template OpImpl tests + if: ${{ 'false' }} # Ticket: 123572 + shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateOpImplTests.xml @@ -598,6 +601,7 @@ jobs: call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/InferenceEngineCAPITests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-InferenceEngineCAPITests.xml - name: OpenVINO C API tests + if: ${{ 'false' }} # Ticket: 123594 shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_capi_test --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OpenVINOCAPITests.xml @@ -641,7 +645,7 @@ jobs: defaults: run: shell: pwsh - runs-on: windows-latest-8-cores + runs-on: windows-2019-8-core env: INSTALL_DIR: "${{ github.workspace }}\\install" INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" From 49c5099f5e67c0ef9a79a6ad508b1f995b4fc260 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Wed, 25 Oct 2023 22:16:01 +0200 Subject: [PATCH 063/275] [GHA] Removed dependency on git repo in python tests (#20390) * Removed repo deps in py tests * fixed typo * changed test cmd * add test_utils to pythonpath * set test_utils to LD_LIBRARY_PATH * changed path * changed path * source python site-packages * fixed LD_LIBRARY_PATH * print pip dir * changed python install path * source all openvino dir * isolate test_utils * system deps * reordered deps * dep conflict * tests reorder * changed test_utils path * cpack test dir * rebase fix * fixed env * frontend tests fix * removed already installed reqs * debug * debug 2 * added python action path * test * reset 3rdparties * set pip install path globally * reset 3rdparties * removed parallel code * moved doc snippets * switched back 4-core runner * removed global unused vars * removed unused requirements.txt * removed unused requirements * test * Revert "test" This reverts commit bdf22d22fd6ec5d488930bc362bdb0741c5bea76. --- .github/workflows/linux.yml | 192 +++++++++--------- src/bindings/python/CMakeLists.txt | 8 + .../src/pyopenvino/test_utils/CMakeLists.txt | 4 +- src/frontends/onnx/tests/CMakeLists.txt | 8 + tools/ovc/CMakeLists.txt | 10 + 5 files changed, 119 insertions(+), 103 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 9050ab3d161509..6732e5fac33f7e 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -105,6 +105,13 @@ jobs: PIP_VER=$(python3 -c "import pip; print(pip.__version__)") echo "Using pip version: ${PIP_VER}" echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV + - name: Get pip cache info + run: | + echo "Cache size: " + du -h -d2 ${PIP_CACHE_DIR} + echo "Cache info: " + python -m pip cache info + continue-on-error: true - name: Install python dependencies run: | @@ -865,38 +872,9 @@ jobs: LAYER_TESTS_INSTALL_DIR: /__w/openvino/openvino/install/tests/layer_tests steps: - - name: Install git - run: | - apt update - apt install --assume-yes --no-install-recommends git ca-certificates - - - name: Clone OpenVINO - uses: actions/checkout@v4 - with: - path: 'openvino' - # # Initialize OpenVINO # - - - uses: actions/setup-python@v4 - with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Setup pip cache dir - run: | - PIP_VER=$(python3 -c "import pip; print(pip.__version__)") - echo "Using pip version: ${PIP_VER}" - echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV - - - name: Install Python API tests dependencies - run: | - # For torchvision to OpenVINO preprocessing converter - python3 -m pip install -r ${OPENVINO_REPO}/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt - - # TODO: replace with Python API tests requirements - python3 -m pip install -r ${OPENVINO_REPO}/tools/mo/requirements_dev.txt - - name: Download OpenVINO package uses: actions/download-artifact@v3 with: @@ -919,6 +897,20 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd + - name: Install 'actions/setup-python@v4' dependencies + run: apt-get update && apt-get install -y libssl1.1 ca-certificates + - uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + env: + PIP_CACHE_DIR: ${{ env.PIP_CACHE_PATH }} + - name: Setup pip cache dir + run: | + PIP_VER=$(python3 -c "import pip; print(pip.__version__)") + echo "Using pip version: ${PIP_VER}" + echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV + echo "PIP_INSTALL_PATH=${Python_ROOT_DIR}/lib/python${PYTHON_VERSION}/site-packages" >> $GITHUB_ENV + - name: Install OpenVINO Python wheels run: | # Install the core OV wheel @@ -930,6 +922,14 @@ jobs: python3 -m pip install $ov_dev_wheel_name[mxnet,caffe,kaldi,onnx,tensorflow2,pytorch] popd + - name: Install Python API tests dependencies + run: | + # For torchvision to OpenVINO preprocessing converter + python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt + + # TODO: replace with Python API tests requirements + python3 -m pip install -r ${INSTALL_TEST_DIR}/mo/requirements_dev.txt + # # Tests # @@ -945,19 +945,10 @@ jobs: run: | # for 'template' extension export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH - python3 -m pytest -sv ${INSTALL_TEST_DIR}/pyopenvino \ --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - - name: Docs Python snippets - run: | - # to find 'snippets' module in docs - export PYTHONPATH=${OPENVINO_REPO}/docs/:$PYTHONPATH - # for 'template' extension - export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH - python3 ${OPENVINO_REPO}/docs/snippets/main.py - - name: Model Optimizer unit tests run: | # required for MxNet @@ -966,19 +957,57 @@ jobs: python3 -m pytest -s ${INSTALL_TEST_DIR}/mo/unit_tests \ --junitxml=${INSTALL_TEST_DIR}/TEST-ModelOptimizer.xml - - name: PyTorch Layer Tests + - name: Python ONNX operators tests + run: | + # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately + python3 -m pytest -sv ${INSTALL_TEST_DIR}/onnx -k 'not cuda' \ + --junitxml=${INSTALL_TEST_DIR}/TEST-onnx_frontend.xml \ + --ignore=${INSTALL_TEST_DIR}/onnx/test_python/test_zoo_models.py \ + + - name: OVC unit tests + run: python3 -m pytest -s ${INSTALL_TEST_DIR}/ovc/unit_tests --junitxml=${INSTALL_TEST_DIR}/TEST-OpenVinoConversion.xml + + - name: Install Python Layer tests dependencies run: | + # layer test requirements python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml + + - name: MO Python API Tests + run: | + # Import 'test_utils' installed in '/tests/python/openvino' + export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH + export PYTHONPATH=${INSTALL_TEST_DIR}/python + python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/mo_python_api_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_mo_convert.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + + - name: OVC Python API Tests + run: | + # Import 'test_utils' installed in '/tests/python/openvino' + export PYTHONPATH=${INSTALL_TEST_DIR}/python + export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH + python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/ovc_python_api_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_ovc_convert.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + + - name: Python Frontend tests + run: | + # to allow 'libtest_builtin_extensions.so' to find 'libopenvino_onnx_frontend.so' + export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH + python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml + + - name: PyTorch Layer Tests + run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 - name: ONNX Layer Tests run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt # requires 'unit_tests' from 'tools/mo' - export PYTHONPATH=${OPENVINO_REPO}/tools/mo/:$PYTHONPATH + export PYTHONPATH=${INSTALL_TEST_DIR}/mo:$PYTHONPATH python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml env: TEST_DEVICE: CPU @@ -986,9 +1015,8 @@ jobs: - name: TensorFlow 1 Layer Tests - TF FE run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - # requires 'unit_tests' from 'tools/mo' - export PYTHONPATH=${OPENVINO_REPO}/tools/mo/:$PYTHONPATH + # requires 'unit_tests' from 'mo' + export PYTHONPATH=${INSTALL_TEST_DIR}/mo python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${INSTALL_TEST_DIR}/TEST-tf_fe.xml env: TEST_DEVICE: CPU @@ -996,85 +1024,47 @@ jobs: - name: TensorFlow 2 Layer Tests - TF FE run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - # requires 'unit_tests' from 'tools/mo' - export PYTHONPATH=${OPENVINO_REPO}/tools/mo/:$PYTHONPATH + # requires 'unit_tests' from 'mo' + export PYTHONPATH=${INSTALL_TEST_DIR}/mo python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${INSTALL_TEST_DIR}/TEST-tf2_fe.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 - name: JAX Layer Tests - TF FE - run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-jax.xml + run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-jax.xml env: TEST_DEVICE: CPU - name: TensorFlow 1 Layer Tests - Legacy FE - run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=${INSTALL_TEST_DIR}/TEST-tf_Roll.xml + run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=${INSTALL_TEST_DIR}/TEST-tf_Roll.xml - name: TensorFlow 2 Layer Tests - Legacy FE - run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow2_keras_tests/test_tf2_keras_activation.py \ - --ir_version=11 --junitxml=${INSTALL_TEST_DIR}/TEST-tf2_Activation.xml -k "sigmoid" + run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow2_keras_tests/test_tf2_keras_activation.py --ir_version=11 -k "sigmoid" --junitxml=${INSTALL_TEST_DIR}/TEST-tf2_Activation.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 - name: TensorFlow Lite Layer Tests - TFL FE - run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_lite_tests/ --junitxml=${INSTALL_TEST_DIR}/TEST-tfl_fe.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - - - name: Python ONNX operators tests - run: | - # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately - python3 -m pytest -sv ${OPENVINO_REPO}/src/frontends/onnx/tests -k 'not cuda' \ - --junitxml=${INSTALL_TEST_DIR}/TEST-onnx_frontend.xml \ - --ignore=${OPENVINO_REPO}/src/frontends/onnx/tests/test_python/test_zoo_models.py - - - name: MO Python API Tests - run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - # TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' - source ${INSTALL_DIR}/setupvars.sh - bash ${INSTALL_DIR}/install_dependencies/install_openvino_dependencies.sh -c=core -y - - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/mo_python_api_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_mo_convert.xml + run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_lite_tests/ --junitxml=${INSTALL_TEST_DIR}/TEST-tfl_fe.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 - - name: OVC Python API Tests - run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - # TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' - source ${INSTALL_DIR}/setupvars.sh - bash ${INSTALL_DIR}/install_dependencies/install_openvino_dependencies.sh -c=core -y - - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/ovc_python_api_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_ovc_convert.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 + - name: Clone API snippets + uses: actions/checkout@v4 + with: + sparse-checkout: openvino/docs/snippets + path: ${{ env.OPENVINO_REPO }} + submodules: 'false' - - name: Python Frontend tests + - name: Docs Python snippets run: | - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - # to allow 'libtest_builtin_extensions.so' to find 'libopenvino_onnx_frontend.so' - source ${INSTALL_DIR}/setupvars.sh - - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml - - # TODO: install to 'tests' component via cpack - - name: OVC unit tests - run: python3 -m pytest -s ${OPENVINO_REPO}/tools/ovc/unit_tests --junitxml=${INSTALL_TEST_DIR}/TEST-OpenVinoConversion.xml + # to find 'snippets' module in docs + export PYTHONPATH=${OPENVINO_REPO}/docs + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH + python3 ${OPENVINO_REPO}/docs/snippets/main.py - name: Upload Test Results uses: actions/upload-artifact@v3 diff --git a/src/bindings/python/CMakeLists.txt b/src/bindings/python/CMakeLists.txt index a2e8945a807776..58ff9b74302059 100644 --- a/src/bindings/python/CMakeLists.txt +++ b/src/bindings/python/CMakeLists.txt @@ -403,6 +403,14 @@ endif() if(ENABLE_TESTS) add_subdirectory(tests/mock/mock_py_frontend) add_subdirectory(tests/mock/pyngraph_fe_mock_api) + install(FILES constraints.txt + DESTINATION tests/bindings/python + COMPONENT tests + EXCLUDE_FROM_ALL) + install(FILES src/openvino/preprocess/torchvision/requirements.txt + DESTINATION tests/python/preprocess/torchvision + COMPONENT tests + EXCLUDE_FROM_ALL) endif() if(OpenVINODeveloperPackage_FOUND) diff --git a/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt b/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt index 4b0c8df9c24c26..c63a30a30e7fef 100644 --- a/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt +++ b/src/bindings/python/src/pyopenvino/test_utils/CMakeLists.txt @@ -50,10 +50,10 @@ ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME} install(TARGETS ${TARGET_NAME} RUNTIME DESTINATION ${OV_CPACK_PYTHONDIR}/openvino/test_utils COMPONENT tests EXCLUDE_FROM_ALL - LIBRARY DESTINATION ${OV_CPACK_PYTHONDIR}/openvino/test_utils + LIBRARY DESTINATION tests/${OV_CPACK_PYTHONDIR}/openvino/test_utils COMPONENT tests EXCLUDE_FROM_ALL) install(PROGRAMS ${OpenVINOPython_SOURCE_DIR}/src/openvino/test_utils/__init__.py - DESTINATION ${OV_CPACK_PYTHONDIR}/openvino/test_utils + DESTINATION tests/${OV_CPACK_PYTHONDIR}/openvino/test_utils COMPONENT tests EXCLUDE_FROM_ALL) diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt index f0f8891b4a4945..76e7893efef623 100644 --- a/src/frontends/onnx/tests/CMakeLists.txt +++ b/src/frontends/onnx/tests/CMakeLists.txt @@ -165,6 +165,14 @@ target_include_directories(ov_onnx_frontend_tests PRIVATE install(TARGETS ov_onnx_frontend_tests RUNTIME DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL) +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DESTINATION tests/onnx + COMPONENT tests EXCLUDE_FROM_ALL + FILES_MATCHING + PATTERN "*.py" + PATTERN "*.onnx" + PATTERN "*.data") + add_custom_command(TARGET ov_onnx_frontend_tests POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/unit_test.manifest" "${TEST_MODEL_ZOO_OUTPUT_DIR}/onnx/unit_test.manifest" diff --git a/tools/ovc/CMakeLists.txt b/tools/ovc/CMakeLists.txt index cea078768604f7..959cc5d00bc720 100644 --- a/tools/ovc/CMakeLists.txt +++ b/tools/ovc/CMakeLists.txt @@ -35,6 +35,16 @@ install(DIRECTORY ${OpenVINOConverter_SOURCE_DIR}/openvino ${OV_CPACK_COMP_OVC_EXCLUDE_ALL} USE_SOURCE_PERMISSIONS) +# +# Tests +# +if(ENABLE_TESTS) + install(DIRECTORY unit_tests + DESTINATION tests/ovc + COMPONENT tests + EXCLUDE_FROM_ALL) +endif() + # # Cpack # From 00e2381d04febc49afd43e88206a8a155c1d4d7a Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Wed, 25 Oct 2023 22:39:31 +0200 Subject: [PATCH 064/275] [GHA] Jobs timeouts (#20601) * set linux timeouts * set win timeouts * other timeouts * reverted pugixml * fixed conformance timeout * reset omz --- .github/workflows/android_arm64.yml | 1 + .github/workflows/fedora.yml | 2 ++ .github/workflows/linux.yml | 13 ++++++++++++- .github/workflows/linux_conditional_compilation.yml | 5 ++++- .github/workflows/linux_riscv.yml | 1 + .github/workflows/mac.yml | 13 +++++++++---- .github/workflows/windows.yml | 9 +++++++-- .../workflows/windows_conditional_compilation.yml | 1 + 8 files changed, 37 insertions(+), 8 deletions(-) diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index fb4b36c69f5a55..34487b04903d70 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -27,6 +27,7 @@ concurrency: jobs: Build: + timeout-minutes: 15 defaults: run: shell: bash diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index f398b1a3623fc1..3bb6b69c76d1f1 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -28,6 +28,7 @@ concurrency: jobs: Build: + timeout-minutes: 150 defaults: run: shell: bash @@ -172,6 +173,7 @@ jobs: RPM_Packages: needs: Build + timeout-minutes: 5 defaults: run: shell: bash diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 6732e5fac33f7e..0f0a791b700f15 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -35,6 +35,7 @@ env: jobs: Build: + timeout-minutes: 150 defaults: run: shell: bash @@ -252,6 +253,7 @@ jobs: Debian_Packages: name: Debian Packages needs: Build + timeout-minutes: 5 defaults: run: shell: bash @@ -305,6 +307,7 @@ jobs: Samples: needs: Build + timeout-minutes: 10 defaults: run: shell: bash @@ -404,6 +407,7 @@ jobs: Conformance: needs: Build + timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 15 }} defaults: run: shell: bash @@ -500,6 +504,7 @@ jobs: ONNX_Runtime: name: ONNX Runtime Integration needs: Build + timeout-minutes: 10 defaults: run: shell: bash @@ -635,6 +640,7 @@ jobs: CXX_Unit_Tests: name: C++ unit tests needs: Build + timeout-minutes: 15 defaults: run: shell: bash @@ -857,6 +863,7 @@ jobs: Python_Unit_Tests: name: Python unit tests needs: Build + timeout-minutes: 40 defaults: run: shell: bash @@ -1079,6 +1086,7 @@ jobs: CPU_Functional_Tests: name: CPU functional tests needs: Build + timeout-minutes: 25 defaults: run: shell: bash @@ -1138,7 +1146,7 @@ jobs: run: | source ${INSTALL_DIR}/setupvars.sh python3 ${PARALLEL_TEST_SCRIPT} -e ${INSTALL_TEST_DIR}/ov_cpu_func_tests -c ${PARALLEL_TEST_CACHE} -w ${INSTALL_TEST_DIR} -s suite -rf 0 -- --gtest_print_time=1 --gtest_filter=*smoke* - timeout-minutes: 40 + timeout-minutes: 20 - name: Save tests execution time uses: actions/cache/save@v3 @@ -1170,6 +1178,7 @@ jobs: run: shell: bash runs-on: ${{ github.event_name == 'schedule' && 'ubuntu-20.04-16-cores' || 'ubuntu-20.04-8-cores'}} + timeout-minutes: ${{ github.event_name == 'schedule' && 300 || 5 }} # TODO: Switch back to self-hosted runners # container: # image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 @@ -1246,6 +1255,7 @@ jobs: PyTorch_Models_Tests: name: PyTorch Models tests needs: Build + timeout-minutes: ${{ github.event_name == 'schedule' && 300 || 30 }} defaults: run: shell: bash @@ -1339,6 +1349,7 @@ jobs: NVIDIA_Plugin: name: NVIDIA plugin needs: Build + timeout-minutes: 15 defaults: run: shell: bash diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 15acba9d441696..5baf92c143d4a1 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -31,6 +31,7 @@ env: jobs: Build: + timeout-minutes: 150 defaults: run: shell: bash @@ -201,6 +202,7 @@ jobs: CC_Build: name: Conditional Compilation needs: Build + timeout-minutes: 10 defaults: run: shell: bash @@ -296,6 +298,7 @@ jobs: CPU_Functional_Tests: name: CPU functional tests needs: Build + timeout-minutes: 25 defaults: run: shell: bash @@ -341,7 +344,7 @@ jobs: - name: Intel CPU plugin func tests (parallel) run: python3 ${PARALLEL_TEST_SCRIPT} -e ${INSTALL_TEST_DIR}/bin/intel64/Release/ov_cpu_func_tests -c ${PARALLEL_TEST_CACHE} -w ${INSTALL_TEST_DIR} -s suite -rf 0 -- --gtest_print_time=1 --gtest_filter=*smoke* - timeout-minutes: 40 + timeout-minutes: 20 - name: Upload Test Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 927b10ea80120c..7dda15f3552bc3 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -31,6 +31,7 @@ concurrency: jobs: Build: + timeout-minutes: 15 defaults: run: shell: bash diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 487536f615a8a6..c39df9691fd78b 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -34,6 +34,7 @@ env: jobs: Build: + timeout-minutes: 150 defaults: run: shell: bash @@ -142,7 +143,7 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake - name: Pack Artifacts run: | @@ -187,6 +188,7 @@ jobs: Samples: needs: Build + timeout-minutes: 5 defaults: run: shell: bash @@ -227,7 +229,7 @@ jobs: pushd ${INSTALL_DIR} tar -xzf openvino_package.tar.gz -C ${INSTALL_DIR} popd - + pushd ${INSTALL_TEST_DIR} tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd @@ -279,6 +281,7 @@ jobs: CXX_Unit_Tests: name: C++ Unit tests needs: Build + timeout-minutes: 20 defaults: run: shell: bash @@ -510,6 +513,7 @@ jobs: Python_Unit_Tests: name: Python unit tests needs: Build + timeout-minutes: 55 defaults: run: shell: bash @@ -652,9 +656,9 @@ jobs: - name: ONNX Layer Tests run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - + export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH - + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx.xml env: TEST_DEVICE: CPU @@ -740,6 +744,7 @@ jobs: CPU_Functional_Tests: name: CPU functional tests needs: Build + timeout-minutes: 25 defaults: run: shell: bash diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index df6e7612a0646b..c1617b4c3415ed 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -33,6 +33,7 @@ env: jobs: Build: + timeout-minutes: 180 defaults: run: shell: pwsh @@ -180,6 +181,7 @@ jobs: Samples: needs: Build + timeout-minutes: 10 defaults: run: shell: pwsh @@ -248,6 +250,7 @@ jobs: Python_Unit_Tests: name: Python unit tests needs: Build + timeout-minutes: 75 defaults: run: shell: pwsh @@ -448,6 +451,7 @@ jobs: CXX_Unit_Tests: name: C++ unit tests needs: Build + timeout-minutes: 15 defaults: run: shell: pwsh @@ -641,7 +645,8 @@ jobs: CPU_Functional_Tests: name: CPU functional tests - needs: Build + needs: Buildy + timeout-minutes: 30 defaults: run: shell: pwsh @@ -694,7 +699,7 @@ jobs: shell: cmd run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 ${{ env.PARALLEL_TEST_SCRIPT }} -e ${{ env.INSTALL_TEST_DIR }}\ov_cpu_func_tests.exe -c ${{ env.PARALLEL_TEST_CACHE }} -w ${{ env.INSTALL_TEST_DIR }} -s suite -- --gtest_filter=*smoke*" - timeout-minutes: 45 + timeout-minutes: 25 - name: Save tests execution time uses: actions/cache/save@v3 diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index f0a9741aee9537..d506272d0ad510 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -33,6 +33,7 @@ env: jobs: Build: + timeout-minutes: 180 defaults: run: shell: pwsh From 63299ec2179a6cfe2c6f3f1633452ee9d472542a Mon Sep 17 00:00:00 2001 From: Gorokhov Dmitriy Date: Thu, 26 Oct 2023 01:08:07 +0400 Subject: [PATCH 065/275] [CPU] FullyConnected acceleration with 4bit weights decompression (#20607) --- src/inference/dev_api/ie_ngraph_utils.hpp | 2 + .../intel_cpu/src/dnnl_extension_utils.cpp | 21 +++-- .../intel_cpu/src/dnnl_postops_composer.cpp | 64 ++++++++------- .../intel_cpu/src/dnnl_postops_composer.h | 6 +- src/plugins/intel_cpu/src/graph_optimizer.cpp | 73 +++++++++++++++-- .../intel_cpu/src/nodes/fullyconnected.cpp | 39 ++++------ .../intel_cpu/src/nodes/fullyconnected.h | 6 +- .../transformation_pipeline.cpp | 27 ++++--- .../src/matmul_weights_decompression.cpp | 78 ++++++++----------- src/plugins/intel_cpu/thirdparty/onednn | 2 +- .../ov_models/include/ov_models/builders.hpp | 2 + 11 files changed, 194 insertions(+), 126 deletions(-) diff --git a/src/inference/dev_api/ie_ngraph_utils.hpp b/src/inference/dev_api/ie_ngraph_utils.hpp index aeaa0b4d9b0ba2..2786fc4abf4198 100644 --- a/src/inference/dev_api/ie_ngraph_utils.hpp +++ b/src/inference/dev_api/ie_ngraph_utils.hpp @@ -53,6 +53,8 @@ INFERENCE_ENGINE_1_0_DEPRECATED inline ::ngraph::element::Type convertPrecision( return ::ngraph::element::Type(::ngraph::element::Type_t::boolean); case Precision::BIN: return ::ngraph::element::Type(::ngraph::element::Type_t::u1); + case Precision::NF4: + return ::ngraph::element::Type(::ngraph::element::Type_t::nf4); case Precision::Q78: case Precision::MIXED: case Precision::CUSTOM: diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 1cef0551d1eb08..1185a79f31c086 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -21,19 +21,18 @@ namespace intel_cpu { uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) { switch (dataType) { case dnnl::memory::data_type::f32: - return 4; case dnnl::memory::data_type::s32: return 4; case dnnl::memory::data_type::bf16: + case dnnl::memory::data_type::f16: return 2; case dnnl::memory::data_type::s8: - return 1; case dnnl::memory::data_type::u8: - return 1; case dnnl::memory::data_type::bin: + case dnnl::memory::data_type::nf4: + case dnnl::memory::data_type::s4: + case dnnl::memory::data_type::u4: return 1; - case dnnl::memory::data_type::f16: - return 2; case dnnl::memory::data_type::undef: return 0; default: @@ -58,6 +57,12 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin return memory::data_type::bin; case InferenceEngine::Precision::FP16: return memory::data_type::f16; + case InferenceEngine::Precision::NF4: + return memory::data_type::nf4; + case InferenceEngine::Precision::I4: + return memory::data_type::s4; + case InferenceEngine::Precision::U4: + return memory::data_type::u4; case InferenceEngine::Precision::UNSPECIFIED: return memory::data_type::undef; default: { @@ -82,6 +87,12 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat return InferenceEngine::Precision::BIN; case memory::data_type::f16: return InferenceEngine::Precision::FP16; + case memory::data_type::nf4: + return InferenceEngine::Precision::NF4; + case memory::data_type::s4: + return InferenceEngine::Precision::I4; + case memory::data_type::u4: + return InferenceEngine::Precision::U4; case memory::data_type::undef: return InferenceEngine::Precision::UNSPECIFIED; default: { diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index 1f85dd1f3ffbc6..865a1033a2c6e7 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -251,48 +251,58 @@ void DnnlPostOpsComposer::appendClip(const std::vector& low, const std::v } } -MemoryPtr DnnlPostOpsComposer::prepackDecompressionParams(const MemoryCPtr& params_ptr, size_t icBlock) { - // Prepacking params from [oc] to [oc, icBlock] layout, where for each icBlock corresponding parameter is duplicated +MemoryPtr DnnlPostOpsComposer::prepackDecompressionParams(const MemoryCPtr& params_ptr, bool needTranspose) { const auto shape = params_ptr->getShape().getStaticDims(); - const size_t elements_count = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape({icBlock * elements_count})); - auto mem = std::make_shared(engine, memoryDesc); - size_t dstIdx = 0; - auto decomp_scales_data = static_cast(params_ptr->getData()); - auto decomp_scales_buf = static_cast(mem->getData()); - for (size_t oc = 0; oc < elements_count; oc++) { - for (size_t intIdx = 0; intIdx < icBlock; intIdx++) { - decomp_scales_buf[dstIdx] = decomp_scales_data[oc]; + MemoryPtr mem; + + auto params_data = static_cast(params_ptr->getData()); + + if (needTranspose) { + VectorDims dnnlShape = {shape[0], shape[1]}; + DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape(dnnlShape)); + mem = std::make_shared(engine, memoryDesc); + auto memory_buf = static_cast(mem->getData()); + + // oi -> io + for (size_t oc = 0; oc < dnnlShape[0]; oc++) { + for (size_t ic = 0; ic < dnnlShape[1]; ic++) { + memory_buf[ic * dnnlShape[0] + oc] = params_data[oc * dnnlShape[1] + ic]; + } + } + } else { + VectorDims dnnlShape = {shape[shape.size() - 1], shape[0]}; + DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape(dnnlShape)); + mem = std::make_shared(engine, memoryDesc); + auto memory_buf = static_cast(mem->getData()); + const size_t elements_count = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + + // io -> io + size_t dstIdx = 0; + for (size_t oc = 0; oc < elements_count; oc++) { + memory_buf[dstIdx] = params_data[oc]; dstIdx++; } } + return mem; } -void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr, size_t icBlock) { +void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr, bool needTranspose) { if (scales_ptr == nullptr) return; - const auto shape = scales_ptr->getShape().getStaticDims(); - const auto elements_count = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - int mask = elements_count > 1 ? weightScaleMaskPerChannel : 0; - DEBUG_LOG("Set weights scales mask ", "DNNL_ARG: ", DNNL_ARG_WEIGHTS, " mask: ", mask); - attr.set_scales_mask(DNNL_ARG_WEIGHTS, mask); - - args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = prepackDecompressionParams(scales_ptr, icBlock); + auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose); + attr.set_scales_dims(DNNL_ARG_WEIGHTS, DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims())); + args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = scalesMem; } -void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr, size_t icBlock) { +void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr, bool needTranspose) { if (zero_points_ptr == nullptr) return; - const auto shape = zero_points_ptr->getShape().getStaticDims(); - const auto elements_count = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - int mask = elements_count > 1 ? weightScaleMaskPerChannel : 0; - DEBUG_LOG("Set weights zero points mask ", "DNNL_ARG: ", DNNL_ARG_WEIGHTS, " mask: ", mask); - attr.set_zero_points_mask(DNNL_ARG_WEIGHTS, mask); - - args[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = prepackDecompressionParams(zero_points_ptr, icBlock); + auto zeroPointsMem = prepackDecompressionParams(zero_points_ptr, needTranspose); + attr.set_zero_points_dims(DNNL_ARG_WEIGHTS, DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims())); + args[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = zeroPointsMem; } } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.h b/src/plugins/intel_cpu/src/dnnl_postops_composer.h index fd52863ed7a9bd..d31fe6bc852fce 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.h +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.h @@ -42,8 +42,8 @@ class DnnlPostOpsComposer { bool appendLinear(const std::vector& scale, const std::vector& shift, bool isLastPostOp, bool allowBinary = true); void appendClip(const std::vector& low, const std::vector& high); - void appendDecompressionScales(const MemoryCPtr& scales_ptr, size_t icBlock); - void appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr, size_t icBlock); + void appendDecompressionScales(const MemoryCPtr& scales_ptr, bool needTranspose); + void appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr, bool needTranspose); const VectorDims& getOutputDims() { return outputDims; @@ -69,7 +69,7 @@ class DnnlPostOpsComposer { void updateWeiScales(); void updateDestScales(); - MemoryPtr prepackDecompressionParams(const MemoryCPtr& params_ptr, size_t icBlock); + MemoryPtr prepackDecompressionParams(const MemoryCPtr& params_ptr, bool needTranspose); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 770527c09aa334..ea52e353a982aa 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -286,7 +286,8 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) { } void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { - std::set supportedWeightsPrecisions{InferenceEngine::Precision::U8, InferenceEngine::Precision::NF4}; + std::set supportedWeightsPrecisions{InferenceEngine::Precision::U8, InferenceEngine::Precision::NF4, + InferenceEngine::Precision::U4, InferenceEngine::Precision::I4}; const std::set supportedDataPrecisions{InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16}; auto expectedNode = [](NodePtr node, Type expectedType) { return node->getType() == expectedType && node->getChildEdges().size() == 1; @@ -335,7 +336,28 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { continue; } - const auto convertNode = withSubtract ? subtractNode->getParentEdgesAtPort(0)[0]->getParent() : mulParent; + const bool withPowerStatic = mulParent->getAlgorithm() == Algorithm::EltwisePowerStatic; + NodePtr powerStaticNode; + if (withPowerStatic) { + powerStaticNode = mulParent; + if (auto *eltwiseNode = dynamic_cast(powerStaticNode.get())) { + if (eltwiseNode->getAlpha() != 1 || eltwiseNode->getBeta() != 1) + continue; + } else { + continue; + } + } + + // Both operations fallbacks on IP zero-point attribute and cannot be combined + if (withSubtract && withPowerStatic) + continue; + + auto convertNode = mulParent; + if (withSubtract) + convertNode = subtractNode->getParentEdgesAtPort(0)[0]->getParent(); + if (withPowerStatic) + convertNode = powerStaticNode->getParentEdgesAtPort(0)[0]->getParent(); + if (!expectedNode(convertNode, Type::Convert)) continue; const auto weightsNode = convertNode->getParentEdgesAtPort(0)[0]->getParent(); @@ -347,6 +369,8 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { continue; if (withSubtract && subtractConstNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32) continue; + if (withPowerStatic && powerStaticNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32) + continue; if (supportedDataPrecisions.find(fcNode->getOriginalInputPrecisionAtPort(0)) == supportedDataPrecisions.end()) continue; if (supportedWeightsPrecisions.find(weightsNode->getOriginalOutputPrecisionAtPort(0)) == supportedWeightsPrecisions.end()) @@ -361,6 +385,7 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { VectorDims decompressionConstShape; const auto fcInputWeightsShape = fcNode->getInputShapeAtPort(1); + int groupNum = 1; // Ordinary case: one decompression group if (fcInputWeightsShape.getRank() == weightsShape.getRank()) { const auto& out_channels = fcInputWeightsShape.getDims()[0]; @@ -377,6 +402,7 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { const auto& O = withTranspose ? *weights_dims.rbegin() : *(weights_dims.rbegin() + 2); // Group decompression is applied by O and N dims decompressionConstShape = withTranspose ? VectorDims{N, 1, O} : VectorDims{O, N, 1}; + groupNum = N; } if (multiplyConstNode->getOutputShapeAtPort(0).getDims() != decompressionConstShape) continue; @@ -384,7 +410,8 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { continue; // HW specific shape limitations - if (impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_amx)) { + if (impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_amx) && + fcNode->getOriginalInputPrecisionAtPort(0) == InferenceEngine::Precision::BF16) { // OneDNN AMX IP implementation has limited shapes support due to performance considerations. As a current solution conditions below are copied // from OneDNN to make sure correct IP impl will be used since fallback one doesn't support weights decompression feature. size_t OC = fcInputWeightsShape.getDims()[0]; @@ -398,10 +425,38 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { continue; } + size_t IC = fcInputWeightsShape.getDims()[1]; + // OneDNN IP primitive provides limited decompression params support + if (IC % groupNum != 0 || IC / groupNum < 4) { + continue; + } + // Fusion processing - fcNode->fuseDecompressionMultiply(multiplyConstNode); - if (withSubtract) - fcNode->fuseDecompressionSubtract(subtractConstNode); + auto *multiplyInputNode = dynamic_cast(multiplyConstNode.get()); + if (!multiplyInputNode) { + IE_THROW() << "Cannot cast " << multiplyInputNode->getName() << " to Input node"; + } + fcNode->fuseDecompressionMultiply(multiplyInputNode->getMemoryPtr()); + + if (withSubtract) { + auto *subtractInputNode = dynamic_cast(subtractConstNode.get()); + if (!subtractInputNode) { + IE_THROW() << "Cannot cast " << subtractInputNode->getName() << " to Input node"; + } + fcNode->fuseDecompressionSubtract(subtractInputNode->getMemoryPtr()); + } + if (withPowerStatic) { + auto *eltwiseNode = dynamic_cast(powerStaticNode.get()); + if (!eltwiseNode) { + IE_THROW() << "Cannot cast " << eltwiseNode->getName() << " to Eltwise node"; + } + + VectorDims memoryDims(decompressionConstShape.size(), 1); + CpuBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(memoryDims)); + auto memory = std::make_shared(graph.getEngine(), memoryDesc, nullptr, false); + (static_cast(memory->getData()))[0] = -1.f * eltwiseNode->getGamma(); + fcNode->fuseDecompressionSubtract(memory); + } fcNode->addOriginalLayer(multiplyNode->getOriginalLayers()); fcNode->addOriginalLayer(convertNode->getOriginalLayers()); @@ -411,12 +466,18 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { auto subtractConstEdge = subtractConstNode->getChildEdges()[0].lock(); graph.RemoveEdge(subtractConstEdge); } + if (withPowerStatic) { + fcNode->addOriginalLayer(powerStaticNode->getOriginalLayers()); + } + auto multiplyConstEdge = multiplyConstNode->getChildEdges()[0].lock(); graph.RemoveEdge(multiplyConstEdge); graph.DropNode(convertNode); if (withSubtract) graph.DropNode(subtractNode); + if (withPowerStatic) + graph.DropNode(powerStaticNode); graph.DropNode(multiplyNode); const auto& weightsPrecision = weightsNode->getOriginalOutputPrecisionAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 3add267195ae34..d90cbfd8017321 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -208,7 +208,8 @@ void FullyConnected::getSupportedDescriptors() { useSparseWeights = useSparseWeightsDecompression(); useWeightsDecompressionImpl = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2) && one_of(inputDataType, memory::data_type::f32, memory::data_type::bf16) && - weightsDataType == memory::data_type::u8; + one_of(weightsDataType, memory::data_type::u8, memory::data_type::nf4, + memory::data_type::u4, memory::data_type::s4); // revert back outputDataType on special cases if (inputDataType == memory::data_type::f32) { @@ -724,15 +725,10 @@ void FullyConnected::setPostOps(dnnl::primitive_attr& attr, const VectorDims& di NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; - // OneDNN API doesn't provide an abilitiy to query optimal layout for runtime attributes - // As workaround we assume that all AMX IP implementations use equal internal IC block size for weights layout - // and prepack runtime attributes accordingly for better performance - bool withAMX = selected_pd->getImplementationType() & impl_desc_type::amx; - int icBlock = withAMX ? 2 : 1; if (decompressionMultiplyPtr) - dnnlpoc.appendDecompressionScales(decompressionMultiplyPtr, icBlock); + dnnlpoc.appendDecompressionScales(decompressionMultiplyPtr, !weightsNonTransposed); if (decompressionSubtractPtr) - dnnlpoc.appendDecompressionZeroPoints(decompressionSubtractPtr, icBlock); + dnnlpoc.appendDecompressionZeroPoints(decompressionSubtractPtr, !weightsNonTransposed); for (size_t i = 0; i < fusedWith.size(); ++i) { auto& node = fusedWith[i]; @@ -1132,30 +1128,25 @@ bool FullyConnected::useSparseWeightsDecompression() { return true; } -void FullyConnected::fuseDecompressionMultiply(const NodePtr& constData) { - fuseDecompressionConstant(constData, decompressionMultiplyPtr); +void FullyConnected::fuseDecompressionMultiply(const MemoryCPtr& memory) { + fuseDecompressionConstant(memory, decompressionMultiplyPtr); } -void FullyConnected::fuseDecompressionSubtract(const NodePtr& constData) { - fuseDecompressionConstant(constData, decompressionSubtractPtr); +void FullyConnected::fuseDecompressionSubtract(const MemoryCPtr& memory) { + fuseDecompressionConstant(memory, decompressionSubtractPtr); } -void FullyConnected::fuseDecompressionConstant(const NodePtr& constData, MemoryCPtr& decompressionValuesPtr) { - auto *constInputNode = dynamic_cast(constData.get()); - if (!constInputNode) { - IE_THROW() << "Cannot cast " << constData->getName() << " to Input"; - } +void FullyConnected::fuseDecompressionConstant(const MemoryCPtr& memory, MemoryCPtr& decompressionValuesPtr) { const auto decompression_prc = InferenceEngine::Precision::FP32; - if (constInputNode->getOriginalOutputPrecisionAtPort(0) == decompression_prc) { - decompressionValuesPtr = constInputNode->getMemoryPtr(); + if (memory->getDesc().getPrecision() == decompression_prc) { + decompressionValuesPtr = memory; } else { - const auto constBlob = constInputNode->getMemoryPtr(); - DnnlBlockedMemoryDesc memoryDesc(decompression_prc, constBlob->getShape()); + DnnlBlockedMemoryDesc memoryDesc(decompression_prc, memory->getShape()); decompressionValuesPtr = std::make_shared(getEngine(), memoryDesc, nullptr, false); - const auto elementsCount = constBlob->getDescWithType()->getPaddedElementsCount(); - cpu_convert(constBlob->getData(), + const auto elementsCount = memory->getDescWithType()->getPaddedElementsCount(); + cpu_convert(memory->getData(), decompressionValuesPtr->getData(), - DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->getDataType()), + DnnlExtensionUtils::DataTypeToIEPrecision(memory->getDataType()), Precision::FP32, elementsCount); } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 956767bcea1219..5bbdbc141742a3 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -60,8 +60,8 @@ class FullyConnected : public Node { this->weightsNonTransposed = weightsNonTransposed; } - void fuseDecompressionMultiply(const NodePtr& constData); - void fuseDecompressionSubtract(const NodePtr& constData); + void fuseDecompressionMultiply(const MemoryCPtr& memory); + void fuseDecompressionSubtract(const MemoryCPtr& memory); private: void createDescriptorInternal(const dnnl::memory::desc &inputDesc, @@ -99,7 +99,7 @@ class FullyConnected : public Node { const dnnl::engine& engine); bool canBeExecutedInConv1x1() const; - void fuseDecompressionConstant(const NodePtr& constData, MemoryCPtr& decompressionValuesPtr); + void fuseDecompressionConstant(const MemoryCPtr& memory, MemoryCPtr& decompressionValuesPtr); // sparse weights bool useSparseWeights = false; diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index f87bfb4f1b055f..d67c5047b992e0 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -201,11 +201,16 @@ void Transformations::PreLpt(const std::vector& defaultPrecis } else { // We need to fuse Transpose to MatMul to have a simpler callback for the next transformation CPU_REGISTER_PASS_COMMON(manager, ov::pass::TransposeMatMul); - const ov::element::TypeVector decompression_precisions{ - ov::element::u8, - // TODO: Uncomment when group decompression is supported - // ov::element::nf4 + ov::element::TypeVector decompression_precisions{ + ov::element::u8 }; + // We don't have BF16/FP16 FullyConnected kernels to work with 4bits compressed weights + // Convert node doesn't support 4bit precisions -> fallback on constant folding + if (inferencePrecision == ov::element::f32) { + decompression_precisions.push_back(ov::element::u4); + decompression_precisions.push_back(ov::element::i4); + decompression_precisions.push_back(ov::element::nf4); + } // MarkDequantizationSubgraph is used even in non-LPT pipeline on X64 platforms // in order to keep compressed MatMul weights with decompression operations as is CPU_REGISTER_PASS_X64(manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, true); @@ -223,15 +228,13 @@ void Transformations::PreLpt(const std::vector& defaultPrecis if (ov::is_type(consumer)) { return false; + } else if (ov::is_type(consumer)) { + consumer = get_single_consumer(consumer); + if (consumer != nullptr && ov::is_type(consumer)) { + return false; + } } - // TODO: Uncomment when group decompression is supported - // if (ov::is_type(consumer)) { - // consumer = get_single_consumer(consumer); - // if (consumer != nullptr && ov::is_type(consumer)) { - // return false; - // } - // } - if (ov::is_type(consumer)) { + if (consumer != nullptr && ov::is_type(consumer)) { consumer = get_single_consumer(consumer); if (consumer != nullptr && ov::is_type(consumer)) { return false; diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp index b107b406cd833a..35eb91d3fff04e 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp @@ -142,7 +142,8 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_precision, transformed_weights_shape, {}, true); + + auto weights = ngraph::builder::makeConstant(weights_precision, transformed_weights_shape, {}, true, 7); weights->set_friendly_name("Compressed_weights"); auto weights_convert = std::make_shared(weights, decompression_precision); @@ -164,7 +165,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_precision, scaleshift_const_shape, {}, true); + auto shift_const = ngraph::builder::makeConstant(weights_precision, scaleshift_const_shape, {}, true, 7); std::shared_ptr shift_convert = std::make_shared(shift_const, decompression_precision); if (reshape_on_decompression_constant) { auto shift_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); @@ -268,10 +269,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(test_param); - // TODO: remove this condition when group decompression is supported - if (weights_precision == ov::element::nf4 || std::get<0>(test_param).weights_group_size != -1) { - return; - } + bool weights_found = false; for (const auto& n : compiledModel.get_runtime_model()->get_ordered_ops()) { if (n->get_friendly_name() == "Compressed_weights") { @@ -301,48 +299,37 @@ std::vector> filterAdditionalConfigBasic() { std::vector> additional_config = {CPUTestUtils::cpuEmptyPluginConfig}; return additional_config; } -std::vector> filterAdditionalConfigBig() { - std::vector> additional_config = {CPUTestUtils::cpuEmptyPluginConfig}; +std::vector> filterAdditionalConfigAMX() { + std::vector> additional_config = {}; if (with_cpu_x86_avx512_core_amx()) additional_config.push_back({{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}); return additional_config; } -bool shouldUseDecompressionKernelBig() { - // No decompression support on non-avx systems - if (!with_cpu_x86_avx2()) - return false; - - return true; -} - -bool shouldUseDecompressionKernelBasic() { - // AMX decompression support has shape limitations - if (with_cpu_x86_avx512_core_amx()) - return false; - - return shouldUseDecompressionKernelBig(); -} - -const std::vector weights_precisions = {ov::element::u8, ov::element::nf4}; const std::vector decompression_precisions = {ov::element::f32}; +const std::vector weights_precisions_basic = {ov::element::u8, + ov::element::u4, + ov::element::i4, + ov::element::nf4}; +const std::vector weights_precisions_amx = {ov::element::u8}; + const std::vector input_shapes_basic = { {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}}, - {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, + {{{}, {{1, 8, 16}}}, {16, 32}, 4ul}, {{{}, {{1, 4, 16}}}, {1, 16, 32}}, {{{}, {{10, 40, 496}}}, {1, 496, 240}}, {{{}, {{1, 4, 48}}}, {48, 256}}, - {{{}, {{11, 339, 377}}}, {377, 335}}, + {{{}, {{1, 11, 154}}}, {154, 77}, 154ul}, + {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {1, 480, 256}}, }; -const std::vector input_shapes_big = { +const std::vector input_shapes_amx = { {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {1, 480, 256}}, - {{{-1, 1, 4096}, {{1, 1, 4096}}}, {4096, 3840}, 128ul}, {{{}, {{1, 4, 32}}}, {32, 256}}, - {{{}, {{1, 4, 512}}}, {512, 256}}, {{{}, {{1, 16, 32}}}, {32, 64}}, {{{}, {{2, 4, 32}}}, {32, 65}}, {{{}, {{3, 12, 768}}}, {768, 1024}}, {{{}, {{11, 339, 577}}}, {577, 335}}, + {{{}, {{1, 1, 256}}}, {256, 128}, 64ul}, }; const std::vector fusingParamsSet { emptyFusingSpec, @@ -352,35 +339,36 @@ const std::vector fusingParamsSet { INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_basic), - ::testing::ValuesIn(weights_precisions), + ::testing::ValuesIn(weights_precisions_basic), ::testing::ValuesIn(decompression_precisions), ::testing::Values(true), ::testing::Values(true), ::testing::Values(true), ::testing::ValuesIn(filterAdditionalConfigBasic()), ::testing::ValuesIn(fusingParamsSet), - ::testing::Values(shouldUseDecompressionKernelBasic())), + ::testing::Values(true)), MatmulWeightsDecompression::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_big, +INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_amx, MatmulWeightsDecompression, - ::testing::Combine(::testing::ValuesIn(input_shapes_big), - ::testing::ValuesIn(weights_precisions), + ::testing::Combine(::testing::ValuesIn(input_shapes_amx), + ::testing::ValuesIn(weights_precisions_amx), ::testing::ValuesIn(decompression_precisions), ::testing::Values(true), ::testing::Values(true), ::testing::Values(true), - ::testing::ValuesIn(filterAdditionalConfigBig()), + ::testing::ValuesIn(filterAdditionalConfigAMX()), ::testing::ValuesIn(fusingParamsSet), - ::testing::Values(shouldUseDecompressionKernelBig())), + ::testing::Values(true)), MatmulWeightsDecompression::getTestCaseName); const std::vector input_shapes_corner_cases_basic = { {{{-1, -1, -1}, {{1, 4, 16}}}, {1, 16, 32}}, {{{-1, -1, -1}, {{1, 4, 16}}}, {16, 32}}, {{{-1, -1, -1}, {{1, 4, 16}}}, {16, 32}, 4ul}, + {{{-1, -1, -1}, {{1, 1, 4096}}}, {4096, 4096}, 128ul}, }; -const std::vector input_shapes_corner_cases_big = { +const std::vector input_shapes_corner_cases_amx = { {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {1, 480, 256}}, {{{-1, -1, -1}, {{1, 1, 4096}}}, {4096, 4096}, 128ul}, }; @@ -393,27 +381,27 @@ const std::vector decompression_precisions_corner_cases = INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_basic, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_corner_cases_basic), - ::testing::ValuesIn(weights_precisions), + ::testing::ValuesIn(weights_precisions_basic), ::testing::ValuesIn(decompression_precisions_corner_cases), ::testing::ValuesIn(transpose_weights), ::testing::ValuesIn(add_decompression_sub), ::testing::ValuesIn(reshape_on_decompression), ::testing::ValuesIn(filterAdditionalConfigBasic()), ::testing::Values(emptyFusingSpec), - ::testing::Values(shouldUseDecompressionKernelBasic())), + ::testing::Values(true)), MatmulWeightsDecompression::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_big, +INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_amx, MatmulWeightsDecompression, - ::testing::Combine(::testing::ValuesIn(input_shapes_corner_cases_big), - ::testing::ValuesIn(weights_precisions), + ::testing::Combine(::testing::ValuesIn(input_shapes_corner_cases_amx), + ::testing::ValuesIn(weights_precisions_amx), ::testing::ValuesIn(decompression_precisions_corner_cases), ::testing::ValuesIn(transpose_weights), ::testing::ValuesIn(add_decompression_sub), ::testing::ValuesIn(reshape_on_decompression), - ::testing::ValuesIn(filterAdditionalConfigBig()), + ::testing::ValuesIn(filterAdditionalConfigAMX()), ::testing::Values(emptyFusingSpec), - ::testing::Values(shouldUseDecompressionKernelBig())), + ::testing::Values(true)), MatmulWeightsDecompression::getTestCaseName); } // namespace } // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index 36c2060a0dc85b..ff9205a8b42238 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit 36c2060a0dc85b4def72ea30823936c2ef861b82 +Subproject commit ff9205a8b42238e1fba992fad2429b722c4cfed0 diff --git a/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp b/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp index 7fa2e675372f26..15588fe27465fb 100644 --- a/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp +++ b/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp @@ -71,6 +71,8 @@ std::shared_ptr makeConstant(const ov::element::Type& type, makeNode(ov::element::Type_t::u64); makeNode(ov::element::Type_t::boolean); makeNode(ov::element::Type_t::nf4); + makeNode(ov::element::Type_t::u4); + makeNode(ov::element::Type_t::i4); #undef makeNode default: throw std::runtime_error("Unhandled precision"); From d9c4ca302146856835c93db7bad9d20fc2d6aaf8 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 26 Oct 2023 09:09:54 +0400 Subject: [PATCH 066/275] [GPU] Allow setting remote output for dynamic model (#20608) --- .../intel_gpu/plugin/sync_infer_request.hpp | 9 + .../src/plugin/sync_infer_request.cpp | 31 +- .../gpu_remote_tensor_tests.cpp | 447 +++++++++++++++++- 3 files changed, 460 insertions(+), 27 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp index 3050846e2c2354..24109144496df6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp @@ -25,8 +25,17 @@ enum class TensorOwner : uint8_t { }; struct TensorWrapper { + TensorWrapper(const std::shared_ptr& _ptr, TensorOwner _owner) + : ptr(_ptr) + , owner(_owner) + , actual_size(_ptr ? _ptr->get_byte_size() : 0) {} + + TensorWrapper(const TensorWrapper& other) = default; + TensorWrapper() = default; + std::shared_ptr ptr; TensorOwner owner; + size_t actual_size; }; class SyncInferRequest : public ov::ISyncInferRequest { diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 9c097d222fdc1b..574d78e3a5332d 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -3,6 +3,8 @@ // #include "intel_gpu/plugin/usm_host_tensor.hpp" +#include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/core/preprocess/input_tensor_info.hpp" #include "openvino/core/parallel.hpp" @@ -415,11 +417,13 @@ void SyncInferRequest::wait() { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait"); OPENVINO_ASSERT(!m_internal_outputs.empty(), "[GPU] Inference was not started!\n"); + auto& network = *m_graph->get_network(); + // wait for completion & collect outputs as requested by the model // for in_order_queue, it is enough to call finish only once - bool do_sync_per_output = (m_graph->get_network()->get_stream().get_queue_type() == QueueTypes::in_order) ? false : true; + bool do_sync_per_output = (network.get_stream().get_queue_type() == QueueTypes::in_order) ? false : true; if (!do_sync_per_output) - m_graph->get_network()->get_stream().finish(); + network.get_stream().finish(); std::vector copy_events; @@ -442,6 +446,7 @@ void SyncInferRequest::wait() { auto output_tensor = output_tensor_wrapper.ptr; auto remote_ptr = std::dynamic_pointer_cast(output_tensor); bool is_remote = remote_ptr != nullptr; + bool is_dynamic = port.get_partial_shape().is_dynamic(); if (is_remote) { GPU_DEBUG_TRACE_DETAIL << name << " handle output tensor (remote): " << remote_ptr->get_original_memory()->buffer_ptr() << std::endl; @@ -449,6 +454,10 @@ void SyncInferRequest::wait() { GPU_DEBUG_TRACE_DETAIL << name << " handle output tensor (host): " << output_tensor->data() << std::endl; } + OPENVINO_ASSERT(output_tensor_wrapper.owner == TensorOwner::PLUGIN || output_tensor_wrapper.actual_size >= output_memory->size(), + "[GPU] Output tensor set by user has smaller size (", output_tensor->get_byte_size(), ") ", + "than required (", output_memory->size(), ")"); + bool need_output_update = output_layout.bytes_count() == 0 || (output_memory && output_tensor->get_byte_size() != output_memory->size()); if (need_output_update) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait::update_output"); @@ -460,7 +469,7 @@ void SyncInferRequest::wait() { OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); } - if (port.get_partial_shape().is_dynamic()) { + if (is_dynamic) { bool need_reallocate = true; auto usm_host_tensor = std::dynamic_pointer_cast(output_tensor); if (usm_host_tensor && output_memory) @@ -488,11 +497,23 @@ void SyncInferRequest::wait() { copy_events.push_back(ev); } } + } else if (is_remote && is_dynamic) { + auto& stream = m_graph->get_network()->get_stream(); + auto user_mem = remote_ptr->get_original_memory(); + if (user_mem->get_allocation_type() == cldnn::allocation_type::cl_mem && output_memory->get_allocation_type() != cldnn::allocation_type::cl_mem) { + // WA: Copy between cl_mem and usm memory may fail for some reason (driver bug?) + // so this explicit memcpy is used to provide correct output for cl_mem output in dynamic cases + cldnn::mem_lock lock_dst(user_mem, stream); + cldnn::mem_lock lock_src(output_memory, stream); + std::memcpy(lock_dst.data(), lock_src.data(), output_memory->size()); + } else { + copy_events.push_back(output_memory->copy_to(stream, *user_mem, false)); + } } } if (!copy_events.empty()) { - auto& stream = m_graph->get_network()->get_stream(); + auto& stream = network.get_stream(); if (stream.get_queue_type() == QueueTypes::in_order) { // wait only the last one stream.wait_for_events({copy_events.back()}); @@ -831,7 +852,7 @@ std::vector SyncInferRequest::prepare_output(const std::strin auto device_tensor_et = convert_to_supported_device_type(element_type); bool convert_needed = is_convert_required(device_tensor_et, element_type); cldnn::primitive_id internal_name = m_output_names_map.at(name); - if (is_remote && !convert_needed) { + if (is_remote && !convert_needed && !is_dynamic) { m_plugin_outputs[name] = user_tensor_wrapper; } diff --git a/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp b/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp index f4b16858551ea4..6b98f98d44a228 100644 --- a/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp @@ -7,21 +7,25 @@ #include #include -#include "common_test_utils/test_assertions.hpp" +#include "openvino/core/dimension.hpp" #include "openvino/core/except.hpp" +#include "openvino/core/model.hpp" +#include "openvino/core/preprocess/pre_post_process.hpp" #include "openvino/runtime/intel_gpu/ocl/ocl.hpp" #include "openvino/runtime/core.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/runtime/remote_tensor.hpp" -#include -#include -#include +#include "remote_blob_tests/remote_blob_helpers.hpp" +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/test_common.hpp" #include "base/ov_behavior_test_utils.hpp" #include "ov_models/subgraph_builders.hpp" #include "functional_test_utils/blob_utils.hpp" -#include "openvino/core/preprocess/pre_post_process.hpp" +#include "subgraphs_builders.hpp" #include "transformations/utils/utils.hpp" -#include "common_test_utils/ov_tensor_utils.hpp" using namespace ::testing; @@ -35,6 +39,7 @@ class OVRemoteTensor_Test : public ov::test::TestsCommon { }; namespace { +std::vector ov_dynamic {true, false}; std::vector ov_with_auto_batching {true, false}; enum class RemoteTensorSharingType { USER_CL_TENSOR = 0, @@ -61,7 +66,7 @@ std::ostream& operator<<(std::ostream& stream, RemoteTensorSharingType sharing_t } } // namespace -using RemoteTensorSharingTestOptionsParams = std::tuple; +using RemoteTensorSharingTestOptionsParams = std::tuple; class OVRemoteTensorInputBlob_Test : public OVRemoteTensor_Test, public testing::WithParamInterface { @@ -75,7 +80,8 @@ class OVRemoteTensorInputBlob_Test : public OVRemoteTensor_Test, deviceName = ov::test::utils::DEVICE_GPU; RemoteTensorSharingType sharing_type; bool with_auto_batching; - std::tie(sharing_type, with_auto_batching) = this->GetParam(); + bool is_dynamic; + std::tie(sharing_type, with_auto_batching, is_dynamic) = this->GetParam(); if (with_auto_batching) { config = {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), @@ -84,17 +90,24 @@ class OVRemoteTensorInputBlob_Test : public OVRemoteTensor_Test, }; } fn_ptr = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(); + if (is_dynamic) { + std::map target_shape = {{0, ov::PartialShape::dynamic(4)}}; + fn_ptr->reshape(target_shape); + } } static std::string getTestCaseName(const testing::TestParamInfo& obj) { RemoteTensorSharingType sharing_type; bool with_auto_batching; - std::tie(sharing_type, with_auto_batching) = obj.param; + bool is_dynamic; + std::tie(sharing_type, with_auto_batching, is_dynamic) = obj.param; std::ostringstream result; result << "OVRemoteTensorInputBlob_Test_"; result << sharing_type; if (with_auto_batching) result << "_WITH_AUTO_BATCHING"; + if (is_dynamic) + result << "_DYNAMIC"; return result.str(); } }; @@ -102,8 +115,9 @@ class OVRemoteTensorInputBlob_Test : public OVRemoteTensor_Test, TEST_P(OVRemoteTensorInputBlob_Test, smoke_cantCreateBlobWithInvalidSize) { RemoteTensorSharingType sharing_type; bool with_auto_batching; - std::tie(sharing_type, with_auto_batching) = GetParam(); - if (with_auto_batching) + bool is_dynamic; + std::tie(sharing_type, with_auto_batching, is_dynamic) = GetParam(); + if (with_auto_batching || is_dynamic) GTEST_SKIP(); if (sharing_type == RemoteTensorSharingType::PLUGIN_CL_TENSOR || @@ -164,7 +178,8 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { auto function = p.build(); RemoteTensorSharingType sharing_type; bool with_auto_batching; - std::tie(sharing_type, with_auto_batching) = GetParam(); + bool is_dynamic; + std::tie(sharing_type, with_auto_batching, is_dynamic) = GetParam(); // auto-batching relies on availability of the lock() for the tensor (and the *USM_DEVICE is not lockable) if (with_auto_batching @@ -173,12 +188,13 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { GTEST_SKIP(); auto exec_net = ie.compile_model(function, deviceName, config); + ov::Shape input_shape{1, 2, 32, 32}; // regular inference auto inf_req_regular = exec_net.create_infer_request(); auto input = function->get_parameters().at(0); auto output = function->get_results().at(0); - auto fakeImageData = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input->get_shape()); + auto fakeImageData = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input_shape); inf_req_regular.set_tensor(input, fakeImageData); @@ -192,7 +208,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { auto ocl_instance = std::make_shared(ctx); cl_int err; - auto imSize = ov::shape_size(input->get_shape()); + auto imSize = ov::shape_size(input_shape); switch (sharing_type) { case RemoteTensorSharingType::USER_CL_TENSOR: { @@ -202,7 +218,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { ocl_instance->_queue.enqueueWriteBuffer(shared_buffer, true, 0, imSize, buffer); } - auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input->get_shape(), shared_buffer); + auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input_shape, shared_buffer); inf_req_shared.set_tensor(input, cldnn_tensor); inf_req_shared.infer(); @@ -220,7 +236,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { FAIL() << "Failed to copy data from host buffer to USM device"; } - auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input->get_shape(), shared_buffer); + auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input_shape, shared_buffer); inf_req_shared.set_tensor(input, cldnn_tensor); inf_req_shared.infer(); @@ -238,7 +254,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { std::memcpy(shared_buffer, buffer, imSize); } - auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input->get_shape(), shared_buffer); + auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input_shape, shared_buffer); inf_req_shared.set_tensor(input, cldnn_tensor); inf_req_shared.infer(); @@ -247,7 +263,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { break; } case RemoteTensorSharingType::PLUGIN_CL_TENSOR: { - auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input->get_shape()); + auto cldnn_tensor = cldnn_context.create_tensor(input->get_element_type(), input_shape); ASSERT_TRUE(cldnn_tensor.is()); auto cl_tensor = cldnn_tensor.as(); { @@ -263,7 +279,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { if (!ocl_instance->supports_usm()) GTEST_SKIP(); - auto cldnn_tensor = cldnn_context.create_usm_host_tensor(input->get_element_type(), input->get_shape()); + auto cldnn_tensor = cldnn_context.create_usm_host_tensor(input->get_element_type(), input_shape); ASSERT_TRUE(cldnn_tensor.is()); { auto cl_tensor = cldnn_tensor.as(); @@ -282,7 +298,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { if (!ocl_instance->supports_usm()) GTEST_SKIP(); - auto cldnn_tensor = cldnn_context.create_usm_device_tensor(input->get_element_type(), input->get_shape()); + auto cldnn_tensor = cldnn_context.create_usm_device_tensor(input->get_element_type(), input_shape); ASSERT_TRUE(cldnn_tensor.is()); { auto cl_tensor = cldnn_tensor.as(); @@ -300,7 +316,7 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { break; } case RemoteTensorSharingType::PLUGIN_HOST_TENSOR: { - auto cldnn_tensor = cldnn_context.create_host_tensor(input->get_element_type(), input->get_shape()); + auto cldnn_tensor = cldnn_context.create_host_tensor(input->get_element_type(), input_shape); { ASSERT_NO_THROW(cldnn_tensor.data()); void* shared_buffer = cldnn_tensor.data(); @@ -331,6 +347,277 @@ TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputRemoteTensor) { } } +TEST_P(OVRemoteTensorInputBlob_Test, smoke_canInputOutputRemoteTensor) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + auto ie = ov::Core(); + + using namespace ov::preprocess; + auto p = PrePostProcessor(fn_ptr); + p.input().tensor().set_element_type(ov::element::i8); + p.input().preprocess().convert_element_type(ov::element::f32); + + auto model = p.build(); + RemoteTensorSharingType sharing_type; + bool with_auto_batching; + bool is_dynamic; + std::tie(sharing_type, with_auto_batching, is_dynamic) = GetParam(); + + // auto-batching relies on availability of the lock() for the tensor (and the *USM_DEVICE is not lockable) + if (with_auto_batching) + GTEST_SKIP(); + + auto compiled_model = ie.compile_model(model, deviceName, config); + + ov::Shape input_shape{1, 2, 32, 32}; + ov::Shape output_shape{1, 2, 32, 32}; + // regular inference + auto inf_req_regular = compiled_model.create_infer_request(); + auto input = model->get_parameters().at(0); + auto output = model->get_results().at(0); + + auto input_data = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input_shape); + + inf_req_regular.set_tensor(input, input_data); + + inf_req_regular.infer(); + auto output_tensor_regular = inf_req_regular.get_tensor(output); + + // inference using remote tensor + auto inf_req_shared = compiled_model.create_infer_request(); + auto gpu_context = compiled_model.get_context().as(); + cl_context ctx = gpu_context; + auto ocl_instance = std::make_shared(ctx); + cl_int err; + + auto allocated_out_shape = output_shape; + if (is_dynamic) { + // In dynamic case we allocate more than required to check that out tensor is reshaped correctly + allocated_out_shape[1]++; + } + + auto in_size = ov::shape_size(input_shape); + auto out_size = ov::shape_size(output_shape) * output->get_output_element_type(0).bitwidth() / 8; + auto allocated_out_size = ov::shape_size(allocated_out_shape) * output->get_output_element_type(0).bitwidth() / 8; + auto output_tensor_shared = ov::test::utils::create_and_fill_tensor(output->get_output_element_type(0), output_shape); + + switch (sharing_type) { + case RemoteTensorSharingType::USER_CL_TENSOR: { + cl::Buffer shared_input_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, in_size, NULL, &err); + cl::Buffer shared_output_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, allocated_out_size, NULL, &err); + { + void* buffer = input_data.data(); + ocl_instance->_queue.enqueueWriteBuffer(shared_input_buffer, true, 0, in_size, buffer); + } + + auto input_remote_tensor = gpu_context.create_tensor(input->get_element_type(), input_shape, shared_input_buffer); + auto output_remote_tensor = gpu_context.create_tensor(output->get_output_element_type(0), allocated_out_shape, shared_output_buffer); + inf_req_shared.set_tensor(input, input_remote_tensor); + inf_req_shared.set_tensor(output, output_remote_tensor); + inf_req_shared.infer(); + + { + void* buffer = output_tensor_shared.data(); + auto out_tensor = inf_req_shared.get_output_tensor(); + ASSERT_EQ(out_tensor.get_shape(), output_shape); + ocl_instance->_queue.enqueueReadBuffer(shared_output_buffer, true, 0, out_size, buffer); + } + + break; + } + case RemoteTensorSharingType::USER_USM_DEVICE_TENSOR: { + if (!ocl_instance->supports_usm()) + GTEST_SKIP(); + + void* shared_input_buffer = ocl_instance->allocate_usm_device_buffer(in_size); + void* shared_output_buffer = ocl_instance->allocate_usm_device_buffer(allocated_out_size); + { + void* buffer = input_data.data(); + err = ocl_instance->memcpy(ocl_instance->_queue, shared_input_buffer, buffer, in_size, true, nullptr, nullptr); + if (err != CL_SUCCESS) + FAIL() << "Failed to copy data from host buffer to USM device"; + } + + auto input_remote_tensor = gpu_context.create_tensor(input->get_element_type(), input_shape, shared_input_buffer); + auto output_remote_tensor = gpu_context.create_tensor(output->get_output_element_type(0), allocated_out_shape, shared_output_buffer); + inf_req_shared.set_tensor(input, input_remote_tensor); + inf_req_shared.set_tensor(output, output_remote_tensor); + inf_req_shared.infer(); + + { + void* buffer = output_tensor_shared.data(); + auto out_tensor = inf_req_shared.get_output_tensor(); + ASSERT_EQ(out_tensor.get_shape(), output_shape); + err = ocl_instance->memcpy(ocl_instance->_queue, buffer, shared_output_buffer, out_size, true, nullptr, nullptr); + if (err != CL_SUCCESS) + FAIL() << "Failed to copy data from USM device to host buffer"; + } + + + ocl_instance->free_mem(shared_input_buffer); + ocl_instance->free_mem(shared_output_buffer); + + break; + } + case RemoteTensorSharingType::USER_USM_HOST_TENSOR: { + if (!ocl_instance->supports_usm()) + GTEST_SKIP(); + + void* shared_input_buffer = ocl_instance->allocate_usm_host_buffer(in_size); + void* shared_output_buffer = ocl_instance->allocate_usm_host_buffer(allocated_out_size); + { + void* buffer = input_data.data(); + std::memcpy(shared_input_buffer, buffer, in_size); + } + + auto input_remote_tensor = gpu_context.create_tensor(input->get_element_type(), input_shape, shared_input_buffer); + auto output_remote_tensor = gpu_context.create_tensor(output->get_output_element_type(0), allocated_out_shape, shared_output_buffer); + inf_req_shared.set_tensor(input, input_remote_tensor); + inf_req_shared.set_tensor(output, output_remote_tensor); + inf_req_shared.infer(); + + { + void* buffer = output_tensor_shared.data(); + auto out_tensor = inf_req_shared.get_output_tensor(); + ASSERT_EQ(out_tensor.get_shape(), output_shape); + err = ocl_instance->memcpy(ocl_instance->_queue, buffer, shared_output_buffer, out_size, true, nullptr, nullptr); + if (err != CL_SUCCESS) + FAIL() << "Failed to copy data from USM host to host buffer"; + } + + ocl_instance->free_mem(shared_input_buffer); + ocl_instance->free_mem(shared_output_buffer); + + break; + } + case RemoteTensorSharingType::PLUGIN_CL_TENSOR: { + auto input_remote_tensor = gpu_context.create_tensor(input->get_element_type(), input_shape); + auto output_remote_tensor = gpu_context.create_tensor(output->get_output_element_type(0), allocated_out_shape); + ASSERT_TRUE(input_remote_tensor.is()); + auto cl_tensor = input_remote_tensor.as(); + { + cl::Buffer shared_buffer = cl_tensor; + void* buffer = input_data.data(); + ocl_instance->_queue.enqueueWriteBuffer(shared_buffer, true, 0, in_size, buffer); + } + inf_req_shared.set_tensor(input, input_remote_tensor); + inf_req_shared.set_tensor(output, output_remote_tensor); + inf_req_shared.infer(); + + { + auto out_cl_tensor = output_remote_tensor.as(); + + void* buffer = output_tensor_shared.data(); + auto out_tensor = inf_req_shared.get_output_tensor(); + ASSERT_EQ(out_tensor.get_shape(), output_shape); + ocl_instance->_queue.enqueueReadBuffer(out_cl_tensor, true, 0, out_size, buffer); + } + + break; + } + case RemoteTensorSharingType::PLUGIN_USM_HOST_TENSOR: { + if (!ocl_instance->supports_usm()) + GTEST_SKIP(); + + auto input_remote_tensor = gpu_context.create_usm_host_tensor(input->get_element_type(), input_shape); + auto output_remote_tensor = gpu_context.create_usm_host_tensor(output->get_output_element_type(0), allocated_out_shape); + ASSERT_TRUE(input_remote_tensor.is()); + { + auto cl_tensor = input_remote_tensor.as(); + void* shared_buffer = cl_tensor.get(); + ASSERT_EQ(ocl_instance->get_allocation_type(shared_buffer), CL_MEM_TYPE_HOST_INTEL); + void* buffer = input_data.data(); + std::memcpy(shared_buffer, buffer, in_size); + } + + inf_req_shared.set_tensor(input, input_remote_tensor); + inf_req_shared.set_tensor(output, output_remote_tensor); + inf_req_shared.infer(); + + { + void* buffer = output_tensor_shared.data(); + auto out_tensor = inf_req_shared.get_output_tensor(); + auto cl_tensor = out_tensor.as(); + void* shared_output_buffer = cl_tensor.get(); + ASSERT_EQ(ocl_instance->get_allocation_type(shared_output_buffer), CL_MEM_TYPE_HOST_INTEL); + ASSERT_EQ(out_tensor.get_shape(), output_shape); + std::memcpy(buffer, shared_output_buffer, out_size); + } + + break; + } + case RemoteTensorSharingType::PLUGIN_USM_DEVICE_TENSOR: { + if (!ocl_instance->supports_usm()) + GTEST_SKIP(); + + auto input_remote_tensor = gpu_context.create_usm_device_tensor(input->get_element_type(), input_shape); + auto output_remote_tensor = gpu_context.create_usm_device_tensor(output->get_output_element_type(0), allocated_out_shape); + ASSERT_TRUE(input_remote_tensor.is()); + { + auto cl_tensor = input_remote_tensor.as(); + void* shared_buffer = cl_tensor.get(); + ASSERT_EQ(ocl_instance->get_allocation_type(shared_buffer), CL_MEM_TYPE_DEVICE_INTEL); + void* buffer = input_data.data(); + err = ocl_instance->memcpy(ocl_instance->_queue, shared_buffer, buffer, in_size, true, nullptr, nullptr); + if (err != CL_SUCCESS) + FAIL() << "Failed to copy data from host buffer to USM device"; + } + + inf_req_shared.set_tensor(input, input_remote_tensor); + inf_req_shared.set_tensor(output, output_remote_tensor); + inf_req_shared.infer(); + + { + auto cl_tensor = output_remote_tensor.as(); + void* shared_output_buffer = cl_tensor.get(); + + void* buffer = output_tensor_shared.data(); + auto out_tensor = inf_req_shared.get_output_tensor(); + ASSERT_EQ(out_tensor.get_shape(), output_shape); + err = ocl_instance->memcpy(ocl_instance->_queue, buffer, shared_output_buffer, out_size, true, nullptr, nullptr); + } + + break; + } + case RemoteTensorSharingType::PLUGIN_HOST_TENSOR: { + auto input_tensor = gpu_context.create_host_tensor(input->get_element_type(), input_shape); + auto output_tensor = gpu_context.create_host_tensor(output->get_output_element_type(0), allocated_out_shape); + { + ASSERT_NO_THROW(input_tensor.data()); + void* shared_buffer = input_tensor.data(); + if (ocl_instance->supports_usm()) { + ASSERT_EQ(ocl_instance->get_allocation_type(shared_buffer), CL_MEM_TYPE_HOST_INTEL); + } + void* buffer = input_data.data(); + std::memcpy(shared_buffer, buffer, in_size); + } + + inf_req_shared.set_tensor(input, input_tensor); + inf_req_shared.set_tensor(output, output_tensor); + inf_req_shared.infer(); + + { + void* buffer = output_tensor_shared.data(); + auto out_tensor = inf_req_shared.get_output_tensor(); + ASSERT_EQ(out_tensor.get_shape(), output_shape); + err = ocl_instance->memcpy(ocl_instance->_queue, buffer, output_tensor.data(), out_size, true, nullptr, nullptr); + } + break; + } + } + + // compare results + { + ASSERT_EQ(output->get_element_type(), ov::element::f32); + ASSERT_EQ(output_tensor_regular.get_size(), output_tensor_shared.get_size()); + auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32); + ASSERT_NO_THROW(output_tensor_regular.data()); + ASSERT_NO_THROW(output_tensor_shared.data()); + ov::test::utils::compare(output_tensor_regular, output_tensor_shared, thr); + } +} + INSTANTIATE_TEST_SUITE_P( smoke_GPU, OVRemoteTensorInputBlob_Test, @@ -342,9 +629,125 @@ INSTANTIATE_TEST_SUITE_P( RemoteTensorSharingType::PLUGIN_USM_HOST_TENSOR, RemoteTensorSharingType::PLUGIN_USM_DEVICE_TENSOR, RemoteTensorSharingType::PLUGIN_HOST_TENSOR}), - ::testing::ValuesIn(ov_with_auto_batching)), + ::testing::ValuesIn(ov_with_auto_batching), + ::testing::ValuesIn(ov_dynamic)), OVRemoteTensorInputBlob_Test::getTestCaseName); +TEST(OVRemoteTensorTests, smoke_MixedTensorTypes) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + auto core = ov::Core(); + auto model = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(); + std::map dynamic_shape = {{0, ov::PartialShape::dynamic(4)}}; + model->reshape(dynamic_shape); + + auto dynamic_compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU); + + auto input = model->get_parameters().at(0); + auto output = model->get_results().at(0); + + auto gpu_context = dynamic_compiled_model.get_context().as(); + cl_context ctx = gpu_context; + auto ocl_instance = std::make_shared(ctx); + + ov::Shape output_shape_allocated{1, 3, 32, 32}; + auto user_output_tensor = gpu_context.create_tensor(output->get_element_type(), output_shape_allocated); + ov::Tensor output_tensor_copy_0(output->get_element_type(), output_shape_allocated); + ov::Tensor output_tensor_copy_1(output->get_element_type(), output_shape_allocated); + + { + auto infer_request = dynamic_compiled_model.create_infer_request(); + { + // Run infer request with user's input & output tensor + // Output tensor size is larger than required + ov::Shape input_shape{1, 2, 32, 32}; + auto input_tensor = gpu_context.create_tensor(input->get_element_type(), input_shape); + ov::Shape output_shape_actual{1, 2, 32, 32}; + + infer_request.set_tensor(input, input_tensor); + infer_request.set_tensor(output, user_output_tensor); + infer_request.infer(); + auto output_tensor = infer_request.get_tensor(output); + + ASSERT_TRUE(output_tensor.is()); + ASSERT_TRUE(user_output_tensor.is()); + auto t1 = output_tensor.as(); + auto t2 = user_output_tensor.as(); + + ASSERT_EQ(t1.get(), t2.get()); + ASSERT_EQ(output_tensor.get_shape(), output_shape_actual); + } + + { + // Keep same output, but use larger input + // In that case user tensor is not enough to store the result and the plugin throws exception + ov::Shape input_shape{1, 4, 32, 32}; + auto input_tensor = gpu_context.create_tensor(input->get_element_type(), input_shape); + + infer_request.set_tensor(input, input_tensor); + OV_EXPECT_THROW(infer_request.infer(), ov::Exception, HasSubstr("Output tensor set by user has smaller size")); + } + + { + // Now try to increase buffer size comparing to the 1st run + // User output buffer is supposed to be the same + ov::Shape input_shape{1, 3, 32, 32}; + ov::Shape output_shape_actual{1, 3, 32, 32}; + auto input_tensor_1 = gpu_context.create_tensor(input->get_element_type(), input_shape); + auto data = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input_shape); + ASSERT_TRUE(input_tensor_1.is()); + auto cl_tensor = input_tensor_1.as(); + cl::Buffer shared_buffer = cl_tensor; + void* buffer = data.data(); + ocl_instance->_queue.enqueueWriteBuffer(shared_buffer, true, 0, ov::shape_size(input_shape), buffer); + + infer_request.set_tensor(input, input_tensor_1); + infer_request.infer(); + auto output_tensor = infer_request.get_tensor(output); + ASSERT_TRUE(output_tensor.is()); + ASSERT_TRUE(user_output_tensor.is()); + auto t1 = output_tensor.as(); + auto t2 = user_output_tensor.as(); + + // inference result of this iteration is stored to output_tensor_copy_0 for further values check + ocl_instance->_queue.enqueueReadBuffer(t2, true, 0, user_output_tensor.get_byte_size(), output_tensor_copy_0.data()); + ASSERT_EQ(t1.get(), t2.get()); + ASSERT_EQ(output_tensor.get_shape(), output_shape_actual); + } + } + + { + auto infer_request = dynamic_compiled_model.create_infer_request(); + ov::Shape input_shape_0{1, 2, 32, 32}; + ov::Shape output_shape_actual_0{1, 2, 32, 32}; + auto input_tensor_0 = gpu_context.create_tensor(input->get_element_type(), input_shape_0); + auto data = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input_shape_0); + ASSERT_TRUE(input_tensor_0.is()); + auto cl_tensor = input_tensor_0.as(); + cl::Buffer shared_buffer = cl_tensor; + void* buffer = data.data(); + ocl_instance->_queue.enqueueWriteBuffer(shared_buffer, true, 0, ov::shape_size(input_shape_0), buffer); + + infer_request.set_tensor(input, input_tensor_0); + infer_request.infer(); + + auto output_tensor = infer_request.get_tensor(output); + + ASSERT_FALSE(output_tensor.is()); + ASSERT_EQ(output_tensor.get_shape(), output_shape_actual_0); + } + + // Finally, check that last result stored in user output tensor is not corrupted when we run after one more iteration with another output buffer + ASSERT_TRUE(user_output_tensor.is()); + auto t2 = user_output_tensor.as(); + ocl_instance->_queue.enqueueReadBuffer(t2, true, 0, user_output_tensor.get_byte_size(), output_tensor_copy_1.data()); + + for (size_t i = 0; i < output_tensor_copy_0.get_size(); i++) { + ASSERT_EQ(output_tensor_copy_0.data()[i], output_tensor_copy_1.data()[i]) << " i = " << i; + } +} + class OVRemoteTensor_TestsWithContext : public OVRemoteTensor_Test, public testing::WithParamInterface { protected: std::shared_ptr fn_ptr; From b9c64370fbe2dcb817352b76726c60d01126ad26 Mon Sep 17 00:00:00 2001 From: Sun Xiaoxia Date: Thu, 26 Oct 2023 13:42:50 +0800 Subject: [PATCH 067/275] Fix memory leak on windows (#20590) --- src/inference/src/dev/threading/thread_affinity.cpp | 4 ++-- src/inference/src/dev/threading/thread_affinity.hpp | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/inference/src/dev/threading/thread_affinity.cpp b/src/inference/src/dev/threading/thread_affinity.cpp index 887e89237b5dd7..a91052f893858b 100644 --- a/src/inference/src/dev/threading/thread_affinity.cpp +++ b/src/inference/src/dev/threading/thread_affinity.cpp @@ -114,7 +114,7 @@ bool pin_current_thread_to_socket(int socket) { std::tuple get_process_mask() { DWORD_PTR pro_mask, sys_mask; if (0 != GetProcessAffinityMask(GetCurrentProcess(), &pro_mask, &sys_mask)) { - CpuSet mask(new DWORD_PTR(pro_mask)); + CpuSet mask = std::make_unique(pro_mask); return std::make_tuple(std::move(mask), 0); } return std::make_tuple(nullptr, 0); @@ -130,7 +130,7 @@ bool pin_thread_to_vacant_core(int thrIdx, return 0 != SetThreadAffinityMask(GetCurrentThread(), DWORD_PTR(1) << cpu_ids[thrIdx]); } bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) { - DWORD_PTR mask = static_cast(*procMask.get()); + DWORD_PTR mask = *procMask.get(); return 0 != SetThreadAffinityMask(GetCurrentThread(), mask); } bool pin_current_thread_to_socket(int socket) { diff --git a/src/inference/src/dev/threading/thread_affinity.hpp b/src/inference/src/dev/threading/thread_affinity.hpp index 20c0f7d513a59b..6d31989148de92 100644 --- a/src/inference/src/dev/threading/thread_affinity.hpp +++ b/src/inference/src/dev/threading/thread_affinity.hpp @@ -53,7 +53,11 @@ struct ReleaseProcessMaskDeleter { * @brief A unique pointer to CPU set structure with the ReleaseProcessMaskDeleter deleter * @ingroup ov_dev_api_threading */ +#if defined(_WIN32) +using CpuSet = std::unique_ptr; +#else using CpuSet = std::unique_ptr; +#endif /** * @brief Get the cores affinity mask for the current process From 301ea0beb07ba468fd2f737401b1f79cdbedf4a1 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Thu, 26 Oct 2023 08:35:38 +0200 Subject: [PATCH 068/275] [core] Migrate SpaceToDepth operator to new API (#20530) * Drop HostTensor and move to ov namespace * Drop HostTensor --------- Co-authored-by: Michal Lukaszewski --- .../include/openvino/op/space_to_depth.hpp | 4 +- src/core/src/op/space_to_depth.cpp | 81 ++++++++----------- 2 files changed, 35 insertions(+), 50 deletions(-) diff --git a/src/core/include/openvino/op/space_to_depth.hpp b/src/core/include/openvino/op/space_to_depth.hpp index 3b5515503502b6..41582981d39ae4 100644 --- a/src/core/include/openvino/op/space_to_depth.hpp +++ b/src/core/include/openvino/op/space_to_depth.hpp @@ -55,9 +55,7 @@ class OPENVINO_API SpaceToDepth : public Op { void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; protected: diff --git a/src/core/src/op/space_to_depth.cpp b/src/core/src/op/space_to_depth.cpp index ccaa4d43c0d0b9..b6ba80403445be 100644 --- a/src/core/src/op/space_to_depth.cpp +++ b/src/core/src/op/space_to_depth.cpp @@ -2,40 +2,40 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/space_to_depth.hpp" +#include "openvino/op/space_to_depth.hpp" #include #include #include #include -#include #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/builder/reshape.hpp" -#include "ngraph/shape.hpp" +#include "openvino/core/attribute_visitor.hpp" +#include "openvino/core/shape.hpp" #include "openvino/reference/space_to_depth.hpp" +#include "space_to_depth_shape_inference.hpp" -using namespace ngraph; - -ov::op::v0::SpaceToDepth::SpaceToDepth(const Output& data, const SpaceToDepthMode& mode, size_t block_size) +namespace ov { +namespace op { +namespace v0 { +SpaceToDepth::SpaceToDepth(const Output& data, const SpaceToDepthMode& mode, size_t block_size) : Op({data}), m_blocksize(block_size), m_mode(mode) { constructor_validate_and_infer_types(); } -ov::op::v0::SpaceToDepth::SpaceToDepth(const Output& data, const std::string& mode, size_t block_size) +SpaceToDepth::SpaceToDepth(const Output& data, const std::string& mode, size_t block_size) : SpaceToDepth(data, as_enum(mode), block_size) {} -bool ngraph::op::v0::SpaceToDepth::visit_attributes(AttributeVisitor& visitor) { +bool SpaceToDepth::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_SpaceToDepth_visit_attributes); visitor.on_attribute("block_size", m_blocksize); visitor.on_attribute("mode", m_mode); return true; } -std::shared_ptr ov::op::v0::SpaceToDepth::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr SpaceToDepth::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_SpaceToDepth_clone_with_new_inputs); if (new_args.size() != 1) { OPENVINO_THROW("Incorrect number of new arguments"); @@ -43,7 +43,7 @@ std::shared_ptr ov::op::v0::SpaceToDepth::clone_with_new_inputs(const Outp return std::make_shared(new_args.at(0), m_mode, m_blocksize); } -void ngraph::op::v0::SpaceToDepth::validate_and_infer_types() { +void SpaceToDepth::validate_and_infer_types() { OV_OP_SCOPE(v0_SpaceToDepth_validate_and_infer_types); OPENVINO_SUPPRESS_DEPRECATED_START @@ -52,61 +52,48 @@ void ngraph::op::v0::SpaceToDepth::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), output_shape); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace { -bool evaluate_space_to_depth(const HostTensorVector& outputs, - const HostTensorVector& inputs, - const std::size_t block_size, - const ov::op::v0::SpaceToDepth::SpaceToDepthMode mode) { +bool SpaceToDepth::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_SpaceToDepth_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + const auto& in = inputs[0]; const auto& out = outputs[0]; - size_t elem_size = in->get_element_type().size(); - - if (in->get_partial_shape().is_dynamic()) { - return false; - } - - ov::reference::space_to_depth(in->get_data_ptr(), - in->get_shape(), - out->get_data_ptr(), - out->get_shape(), - block_size, - mode, - elem_size); + reference::space_to_depth(static_cast(in.data()), + in.get_shape(), + static_cast(out.data()), + out.get_shape(), + m_blocksize, + m_mode, + in.get_element_type().size()); return true; } -} // namespace -bool ngraph::op::v0::SpaceToDepth::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_SpaceToDepth_evaluate); - return evaluate_space_to_depth(outputs, inputs, m_blocksize, m_mode); -} - -bool ngraph::op::v0::SpaceToDepth::has_evaluate() const { +bool SpaceToDepth::has_evaluate() const { OV_OP_SCOPE(v0_SpaceToDepth_has_evaluate); return !get_input_partial_shape(0).is_dynamic(); } -void op::v0::SpaceToDepth::set_block_size(size_t block_size) { +void SpaceToDepth::set_block_size(size_t block_size) { m_blocksize = block_size; } -void op::v0::SpaceToDepth::set_mode(SpaceToDepthMode mode) { +void SpaceToDepth::set_mode(SpaceToDepthMode mode) { m_mode = mode; } +} // namespace v0 +} // namespace op -std::ostream& ov::operator<<(std::ostream& s, const op::v0::SpaceToDepth::SpaceToDepthMode& type) { +std::ostream& operator<<(std::ostream& s, const op::v0::SpaceToDepth::SpaceToDepthMode& type) { return s << as_string(type); } -namespace ov { template <> -NGRAPH_API EnumNames& -EnumNames::get() { - static auto enum_names = EnumNames( +OPENVINO_API EnumNames& +EnumNames::get() { + static auto enum_names = EnumNames( "op::v0::SpaceToDepth::SpaceToDepthMode", - {{"blocks_first", ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST}, - {"depth_first", ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST}}); + {{"blocks_first", op::v0::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST}, + {"depth_first", op::v0::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST}}); return enum_names; } } // namespace ov From d532d14540d3358f4b50c90b94189f4826fa2412 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Thu, 26 Oct 2023 11:07:23 +0400 Subject: [PATCH 069/275] `Range`, `RDFT`, `RandormUniform`, `ReduceOps` layer tests to API2.0 (#20692) * `Range` to API2.0 * `RDFTLayerTest` to API2.0 * `RandomUniformLayerTest` to API2.0 * `ReduceOpsLayerTest` to API2.0 --- .../single_layer_tests/range.cpp | 16 +- .../single_layer_tests/rdft.cpp | 111 ++++----- .../single_layer_tests/reduce_ops.cpp | 224 ++++++++---------- .../skip_tests_config.cpp | 4 +- .../single_layer_tests/random_uniform.cpp | 14 +- .../single_layer_tests/range.cpp | 26 +- .../skip_tests_config.cpp | 4 +- .../single_op_tests/random_uniform.hpp | 16 ++ .../shared/include/single_op_tests/range.hpp | 15 ++ .../shared/include/single_op_tests/rdft.hpp | 15 ++ .../include/single_op_tests/reduce_ops.hpp | 19 ++ .../shared_test_classes/base/utils/ranges.hpp | 2 +- .../single_op/random_uniform.hpp | 40 ++++ .../shared_test_classes/single_op/range.hpp | 33 +++ .../shared_test_classes/single_op/rdft.hpp | 33 +++ .../single_op/reduce_ops.hpp | 41 ++++ .../src/single_op/random_uniform.cpp | 78 ++++++ .../src/single_op/range.cpp | 41 ++++ .../src/single_op/rdft.cpp | 42 ++++ .../src/single_op/reduce_ops.cpp | 79 ++++++ 20 files changed, 632 insertions(+), 221 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/random_uniform.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/range.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/rdft.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/reduce_ops.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/random_uniform.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/range.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rdft.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reduce_ops.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/random_uniform.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/range.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/rdft.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/reduce_ops.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp index 35c017ebd2bfac..9a7b29484d3c9b 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp @@ -4,10 +4,10 @@ #include -#include "single_layer_tests/range.hpp" +#include "single_op_tests/range.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RangeLayerTest; namespace { @@ -15,9 +15,9 @@ const std::vector start = { 1.0f, 1.2f }; const std::vector stop = { 5.0f, 5.2f }; const std::vector step = { 1.0f, 0.1f }; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector model_types = { + ov::element::f32, + ov::element::f16 }; INSTANTIATE_TEST_SUITE_P(smoke_Basic, RangeLayerTest, @@ -25,11 +25,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Basic, RangeLayerTest, ::testing::ValuesIn(start), ::testing::ValuesIn(stop), ::testing::ValuesIn(step), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_CPU)), RangeLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rdft.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rdft.cpp index a9c97e92f15954..05131c5eca3e5e 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rdft.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/rdft.cpp @@ -4,42 +4,43 @@ #include -#include "single_layer_tests/rdft.hpp" +#include "single_op_tests/rdft.hpp" +#include "common_test_utils/test_enums.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RDFTLayerTest; -const std::vector opTypes = { - ngraph::helpers::DFTOpType::FORWARD, - ngraph::helpers::DFTOpType::INVERSE +const std::vector op_types = { + ov::test::utils::DFTOpType::FORWARD, + ov::test::utils::DFTOpType::INVERSE }; -static const std::vector inputPrecision = { - InferenceEngine::Precision::FP32, +static const std::vector model_types = { + ov::element::f32, }; -const std::vector> shapesForward1d = { +const std::vector> shapes_forward_1d = { {10}, {64}, {100}, }; -const std::vector> signalSizes1d = { +const std::vector> signal_sizes_1d = { {}, {10}, }; //1D case doesn't work yet on reference implementation INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_RDFT_1d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesForward1d), - ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(shapes_forward_1d), + ::testing::ValuesIn(model_types), ::testing::Values(std::vector{0}), - ::testing::ValuesIn(signalSizes1d), - ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::ValuesIn(signal_sizes_1d), + ::testing::Values(ov::test::utils::DFTOpType::FORWARD), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); -const std::vector> shapesInverse1d = { +const std::vector> shapes_inverse_1d = { {10, 2}, {64, 2}, {100, 2}, @@ -47,38 +48,38 @@ const std::vector> shapesInverse1d = { INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_IRDFT_1d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesInverse1d), - ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(shapes_inverse_1d), + ::testing::ValuesIn(model_types), ::testing::Values(std::vector{0}), - ::testing::ValuesIn(signalSizes1d), - ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::ValuesIn(signal_sizes_1d), + ::testing::Values(ov::test::utils::DFTOpType::INVERSE), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); -const std::vector> shapesForward2d = { +const std::vector> shapes_forward_2d = { {10, 15}, {64, 32}, {100, 16}, }; -const std::vector> axes2d = { +const std::vector> axes_2d = { {0, 1}, {1, 0}, {-2, -1}, }; -const std::vector> signalSizes2d = { +const std::vector> signal_sizes_2d = { {}, {10, 10}, }; INSTANTIATE_TEST_SUITE_P(smoke_RDFT_2d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesForward2d), - ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(axes2d), - ::testing::ValuesIn(signalSizes2d), - ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::ValuesIn(shapes_forward_2d), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(axes_2d), + ::testing::ValuesIn(signal_sizes_2d), + ::testing::Values(ov::test::utils::DFTOpType::FORWARD), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); -const std::vector> shapesInverse2d = { +const std::vector> shapes_inverse_2d = { {10, 15, 2}, {64, 32, 2}, {100, 32, 2}, @@ -86,71 +87,71 @@ const std::vector> shapesInverse2d = { INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_2d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesInverse2d), - ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(axes2d), - ::testing::ValuesIn(signalSizes2d), - ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::ValuesIn(shapes_inverse_2d), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(axes_2d), + ::testing::ValuesIn(signal_sizes_2d), + ::testing::Values(ov::test::utils::DFTOpType::INVERSE), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); -const std::vector> shapesForward4d = { +const std::vector> shapes_forward_4d = { {1, 3, 10, 15}, {1, 4, 64, 32}, }; -const std::vector> axes4d = { +const std::vector> axes_4d = { {0, 1, 2, 3}, {1, 0, -2, -1} }; -const std::vector> signalSizes4d = { +const std::vector> signal_sizes_4d = { {}, }; INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesForward4d), - ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(axes4d), - ::testing::ValuesIn(signalSizes4d), - ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::ValuesIn(shapes_forward_4d), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(axes_4d), + ::testing::ValuesIn(signal_sizes_4d), + ::testing::Values(ov::test::utils::DFTOpType::FORWARD), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); -const std::vector> axes4d_2d = { +const std::vector> axes_4d_2d = { {2, 3}, {1, -1} }; INSTANTIATE_TEST_SUITE_P(smoke_RDFT_4d_axes_2d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesForward4d), - ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(axes4d_2d), + ::testing::ValuesIn(shapes_forward_4d), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(axes_4d_2d), ::testing::Values(std::vector{}), - ::testing::Values(ngraph::helpers::DFTOpType::FORWARD), + ::testing::Values(ov::test::utils::DFTOpType::FORWARD), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); -const std::vector> shapesInverse4d = { +const std::vector> shapes_inverse_4d = { {1, 3, 10, 15, 2}, {1, 4, 64, 32, 2}, }; INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesInverse4d), - ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(axes4d), - ::testing::ValuesIn(signalSizes4d), - ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::ValuesIn(shapes_inverse_4d), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(axes_4d), + ::testing::ValuesIn(signal_sizes_4d), + ::testing::Values(ov::test::utils::DFTOpType::INVERSE), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_IRDFT_4d_axes_2d, RDFTLayerTest, ::testing::Combine( - ::testing::ValuesIn(shapesInverse4d), - ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(axes4d_2d), + ::testing::ValuesIn(shapes_inverse_4d), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(axes_4d_2d), ::testing::Values(std::vector{}), - ::testing::Values(ngraph::helpers::DFTOpType::INVERSE), + ::testing::Values(ov::test::utils::DFTOpType::INVERSE), ::testing::Values(ov::test::utils::DEVICE_CPU)), RDFTLayerTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp index 27ff1b475e38c3..1079a43b8fc792 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp @@ -4,31 +4,32 @@ #include -#include "single_layer_tests/reduce_ops.hpp" +#include "single_op_tests/reduce_ops.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ReduceOpsLayerTest; +using ov::test::ReduceOpsLayerWithSpecificInputTest; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::U64 +const std::vector model_types = { + ov::element::f32, + ov::element::f16, + ov::element::i64, + ov::element::i32, + ov::element::u64 }; -const std::vector keepDims = { +const std::vector keep_dims = { true, false, }; -const std::vector> inputShapes = { +const std::vector> input_shapes = { std::vector{10, 20, 30, 40}, std::vector{3, 5, 7, 9}, }; -const std::vector> inputShapesOneAxis = { +const std::vector> input_shapes_one_axis = { std::vector{10, 20, 30, 40}, std::vector{3, 5, 7, 9}, std::vector{10}, @@ -53,108 +54,90 @@ const std::vector> axes = { {1, -1} }; -std::vector opTypes = { +std::vector op_types = { ov::test::utils::OpType::SCALAR, ov::test::utils::OpType::VECTOR, }; -const std::vector reductionTypes = { - ngraph::helpers::ReductionType::Mean, - ngraph::helpers::ReductionType::Min, - ngraph::helpers::ReductionType::Max, - ngraph::helpers::ReductionType::Sum, - ngraph::helpers::ReductionType::Prod, - ngraph::helpers::ReductionType::L1, - ngraph::helpers::ReductionType::L2, +const std::vector reduction_types = { + ov::test::utils::ReductionType::Mean, + ov::test::utils::ReductionType::Min, + ov::test::utils::ReductionType::Max, + ov::test::utils::ReductionType::Sum, + ov::test::utils::ReductionType::Prod, + ov::test::utils::ReductionType::L1, + ov::test::utils::ReductionType::L2, }; -const std::vector reductionLogicalTypes = { - ngraph::helpers::ReductionType::LogicalOr, - ngraph::helpers::ReductionType::LogicalAnd +const std::vector reduction_logical_types = { + ov::test::utils::ReductionType::LogicalOr, + ov::test::utils::ReductionType::LogicalAnd }; -const auto paramsOneAxis = testing::Combine( +const auto params_one_axis = testing::Combine( testing::Values(std::vector{0}), - testing::ValuesIn(opTypes), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionTypes), - testing::Values(netPrecisions[0]), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapesOneAxis), + testing::ValuesIn(op_types), + testing::ValuesIn(keep_dims), + testing::ValuesIn(reduction_types), + testing::Values(model_types[0]), + testing::ValuesIn(input_shapes_one_axis), testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto paramsOneAxisLogical = testing::Combine( +const auto params_one_axis_logical = testing::Combine( testing::Values(std::vector{0}), - testing::ValuesIn(opTypes), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionLogicalTypes), - testing::Values(InferenceEngine::Precision::BOOL), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapesOneAxis), + testing::ValuesIn(op_types), + testing::ValuesIn(keep_dims), + testing::ValuesIn(reduction_logical_types), + testing::Values(ov::element::boolean), + testing::ValuesIn(input_shapes_one_axis), testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto params_Precisions = testing::Combine( +const auto params_model_types = testing::Combine( testing::Values(std::vector{1, 3}), - testing::Values(opTypes[1]), - testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::Max, - ngraph::helpers::ReductionType::Mean, - ngraph::helpers::ReductionType::Min, - ngraph::helpers::ReductionType::Sum, - ngraph::helpers::ReductionType::Prod), - testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::Values(ov::test::utils::ReductionType::Max, + ov::test::utils::ReductionType::Mean, + ov::test::utils::ReductionType::Min, + ov::test::utils::ReductionType::Sum, + ov::test::utils::ReductionType::Prod), + testing::ValuesIn(model_types), testing::Values(std::vector{2, 2, 2, 2}), testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto params_Precisions_ReduceL1 = testing::Combine( +const auto params_model_types_ReduceL1 = testing::Combine( testing::Values(std::vector{1, 3}), - testing::Values(opTypes[1]), - testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::L1), - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::Values(ov::test::utils::ReductionType::L1), + testing::Values(ov::element::f32, + ov::element::f16, + ov::element::i64, + ov::element::i32), testing::Values(std::vector{2, 2, 2, 2}), testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto params_Precisions_ReduceL2 = testing::Combine( +const auto params_model_types_ReduceL2 = testing::Combine( testing::Values(std::vector{1, 3}), - testing::Values(opTypes[1]), - testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::L2), - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::Values(ov::test::utils::ReductionType::L2), + testing::Values(ov::element::f32, + ov::element::f16), testing::Values(std::vector{2, 2, 2, 2}), testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto params_InputShapes = testing::Combine( +const auto params_input_shapes = testing::Combine( testing::Values(std::vector{0}), - testing::Values(opTypes[1]), - testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::Mean), - testing::Values(netPrecisions[0]), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::Values(ov::test::utils::ReductionType::Mean), + testing::Values(model_types[0]), testing::Values(std::vector{3}, std::vector{3, 5}, std::vector{2, 4, 6}, @@ -164,54 +147,42 @@ const auto params_InputShapes = testing::Combine( testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto params_Axes = testing::Combine( +const auto params_axes = testing::Combine( testing::ValuesIn(axes), - testing::Values(opTypes[1]), - testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::Mean), - testing::Values(netPrecisions[0]), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::Values(ov::test::utils::ReductionType::Mean), + testing::Values(model_types[0]), + testing::ValuesIn(input_shapes), testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto params_ReductionTypes = testing::Combine( +const auto params_reduction_types = testing::Combine( testing::Values(std::vector{0, 1, 3}), - testing::Values(opTypes[1]), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionTypes), - testing::Values(netPrecisions[0]), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::ValuesIn(reduction_types), + testing::Values(model_types[0]), testing::Values(std::vector{2, 9, 2, 9}), testing::Values(ov::test::utils::DEVICE_CPU) ); -const auto params_ReductionTypesLogical = testing::Combine( +const auto params_reduction_types_logical = testing::Combine( testing::Values(std::vector{0, 1, 3}), - testing::Values(opTypes[1]), - testing::ValuesIn(keepDims), - testing::ValuesIn(reductionLogicalTypes), - testing::Values(InferenceEngine::Precision::BOOL), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::ValuesIn(reduction_logical_types), + testing::Values(ov::element::boolean), testing::Values(std::vector{2, 9, 2, 9}), testing::Values(ov::test::utils::DEVICE_CPU) ); const auto params_ReduceSum_accuracy = testing::Combine( testing::Values(std::vector{0}), - testing::Values(opTypes[1]), + testing::Values(op_types[1]), testing::Values(true), - testing::Values(ngraph::helpers::ReductionType::Sum), - testing::Values(InferenceEngine::Precision::FP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::test::utils::ReductionType::Sum), + testing::Values(ov::element::f32), testing::Values(std::vector{1000000}), testing::Values(ov::test::utils::DEVICE_CPU) ); @@ -226,63 +197,63 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P( smoke_ReduceOneAxis, ReduceOpsLayerTest, - paramsOneAxis, + params_one_axis, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_ReduceLogicalOneAxis, ReduceOpsLayerTest, - paramsOneAxisLogical, + params_one_axis_logical, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_Reduce_Precisions, ReduceOpsLayerTest, - params_Precisions, + params_model_types, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_Reduce_Precisions_L1, ReduceOpsLayerTest, - params_Precisions_ReduceL1, + params_model_types_ReduceL1, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_Reduce_Precisions_L2, ReduceOpsLayerTest, - params_Precisions_ReduceL2, + params_model_types_ReduceL2, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_Reduce_InputShapes, ReduceOpsLayerTest, - params_InputShapes, + params_input_shapes, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_Reduce_Axes, ReduceOpsLayerTest, - params_Axes, + params_axes, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_Reduce_ReductionTypes, ReduceOpsLayerTest, - params_ReductionTypes, + params_reduction_types, ReduceOpsLayerTest::getTestCaseName ); INSTANTIATE_TEST_SUITE_P( smoke_ReduceLogical_ReductionTypes, ReduceOpsLayerTest, - params_ReductionTypesLogical, + params_reduction_types_logical, ReduceOpsLayerTest::getTestCaseName ); @@ -291,14 +262,11 @@ INSTANTIATE_TEST_SUITE_P( ReduceOpsLayerWithSpecificInputTest, testing::Combine( testing::ValuesIn(decltype(axes) {{0}, {1}}), - testing::Values(opTypes[1]), + testing::Values(op_types[1]), testing::Values(true), - testing::Values(ngraph::helpers::ReductionType::Sum), - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::test::utils::ReductionType::Sum), + testing::Values(ov::element::f32, + ov::element::i32), testing::Values(std::vector {2, 10}), testing::Values(ov::test::utils::DEVICE_CPU)), ReduceOpsLayerWithSpecificInputTest::getTestCaseName diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 4a3f3f8ddffa35..c60fbe478af286 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -22,8 +22,6 @@ std::vector disabledTestPatterns() { // TODO: Issue 33886 R"(.*(QuantGroupConv2D).*)", R"(.*(QuantGroupConv3D).*)", - // TODO: Issue: 34518 - R"(.*RangeLayerTest.*)", R"(.*(RangeAddSubgraphTest).*Start=1.2.*Stop=(5.2|-5.2).*Step=(0.1|-0.1).*ET=f16.*)", R"(.*(RangeNumpyAddSubgraphTest).*ET=f16.*)", // TODO: Issue: 43793 @@ -195,6 +193,8 @@ std::vector disabledTestPatterns() { R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)", // Issue: 123321 R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=10.*relu.*)", + // Issue: 123427 + R"(.*RDFTLayerTest.*SignalSize=().*)", }; #if defined(OPENVINO_ARCH_X86) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/random_uniform.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/random_uniform.cpp index cd3937f9681a21..eb95642e301023 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/random_uniform.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/random_uniform.cpp @@ -2,19 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include #include "common_test_utils/test_constants.hpp" -#include "single_layer_tests/random_uniform.hpp" +#include "single_op_tests/random_uniform.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RandomUniformLayerTest; namespace { -const std::vector random_uniform_type_specific_params = { - {InferenceEngine::Precision::I32, -100, 100}, - {InferenceEngine::Precision::FP32, 0.0f, 1.0f}, - {InferenceEngine::Precision::FP16, -10.0, 10.0} +const std::vector random_uniform_type_specific_params = { + {ov::element::i32, -100, 100}, + {ov::element::f32, 0.0f, 1.0f}, + {ov::element::f16, -10.0, 10.0} }; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp index d648cb18fa649c..12eab5a0039f06 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/range.cpp @@ -2,34 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include -#include -#include +#include "single_op_tests/range.hpp" +#include "common_test_utils/test_constants.hpp" +using ov::test::RangeLayerTest; namespace { -const std::initializer_list start { 1.0, 1.2 }; -const std::initializer_list stop { 5.0, 5.2 }; -const std::initializer_list step { 1.0, 0.1 }; +const std::vector start = { 1.0f, 1.2f }; +const std::vector stop = { 5.0f, 5.2f }; +const std::vector step = { 1.0f, 0.1f }; -const std::initializer_list netPrecisions { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector model_types = { + ov::element::f32, + ov::element::f16 }; -using LayerTestsDefinitions::RangeLayerTest; INSTANTIATE_TEST_SUITE_P(smoke_Basic, RangeLayerTest, ::testing::Combine( ::testing::ValuesIn(start), ::testing::ValuesIn(stop), ::testing::ValuesIn(step), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(model_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), RangeLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 23699ee8aa75e9..70e397e665b886 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -56,8 +56,6 @@ std::vector disabledTestPatterns() { R"(smoke_PrePostProcess.*resize_dynamic.*)", // Dynamic batch allocates output using upper bound R"(.*smoke_BehaviorTests.*InferUpperBoundNetworkWithGetTensor.*)", - // need dynamic shapes - R"(.*RangeLayerTest.*)", // need dynamic rank R"(.*smoke.*BehaviorTests.*InferFullyDynamicNetworkWith(S|G)etTensor.*)", R"(.*smoke.*BehaviorTests.*DynamicOutputToDynamicInput.*)", @@ -119,5 +117,7 @@ std::vector disabledTestPatterns() { R"(.*GroupNormalizationTest.*CompareWithRefs.*NetType=f16.*)", // Issue: 123507 R"(.*ReverseLayerTest.*mask.*f16.*)", + // Issue: 123516 + R"(.*RandomUniformLayerTest.*f16.*)", }; } diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/random_uniform.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/random_uniform.hpp new file mode 100644 index 00000000000000..8c396788ff0ccf --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/random_uniform.hpp @@ -0,0 +1,16 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/random_uniform.hpp" + +namespace ov { +namespace test { +TEST_P(RandomUniformLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov + diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/range.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/range.hpp new file mode 100644 index 00000000000000..bd919c46790a13 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/range.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/range.hpp" + +namespace ov { +namespace test { +TEST_P(RangeLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/rdft.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/rdft.hpp new file mode 100644 index 00000000000000..e40bc93700d6ba --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/rdft.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/rdft.hpp" + +namespace ov { +namespace test { +TEST_P(RDFTLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/reduce_ops.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/reduce_ops.hpp new file mode 100644 index 00000000000000..df8cee8dc950d4 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/reduce_ops.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/reduce_ops.hpp" + +namespace ov { +namespace test { +TEST_P(ReduceOpsLayerTest, Inference) { + run(); +} + +TEST_P(ReduceOpsLayerWithSpecificInputTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp index 339c2a98845d54..e134bd7f018fd9 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp @@ -117,7 +117,7 @@ static std::map>> i { ov::op::v1::FloorMod::get_type_info_static(), {{{2, 4}}, {{2, 2, 128}}} }, { ov::op::v1::Mod::get_type_info_static(), {{{2, 4}}, {{2, 2, 128}}} }, { ov::op::v1::ReduceMax::get_type_info_static(), {{{0, 5}}, {{-5, 5, 1000}}} }, - { ov::op::v1::ReduceMean::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, + { ov::op::v1::ReduceMean::get_type_info_static(), {{{0, 5, 1000}}, {{0, 5, 1000}}} }, { ov::op::v1::ReduceMin::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, { ov::op::v1::ReduceProd::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, { ov::op::v1::ReduceSum::get_type_info_static(), {{{0, 5}}, {{0, 5, 1000}}} }, diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/random_uniform.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/random_uniform.hpp new file mode 100644 index 00000000000000..d3ce9a087170c8 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/random_uniform.hpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +struct RandomUniformTypeSpecificParams { + ov::element::Type model_type; // Model type + double min_value; // Min value constant, will be cast to the needed precision + double max_value; // Max value constant, will be cast to the needed precision +}; + +using RandomUniformParamsTuple = typename std::tuple< + ov::Shape, // Input shape + RandomUniformTypeSpecificParams, // Parameters which depends on output type + int64_t, // Global seed + int64_t, // Operation seed + ov::test::TargetDevice // Device name +>; + +class RandomUniformLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov + diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/range.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/range.hpp new file mode 100644 index 00000000000000..acfd8fbc6496bc --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/range.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using RangeParams = std::tuple< + float, // Start + float, // Stop + float, // Step + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class RangeLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rdft.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rdft.hpp new file mode 100644 index 00000000000000..83675c0e0fb0a4 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/rdft.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { +using RDFTParams = std::tuple< + std::vector, // Input shape + ov::element::Type, // Model type + std::vector, // Axes + std::vector, // Signal size + ov::test::utils::DFTOpType, + ov::test::TargetDevice // Device name +>; + +class RDFTLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reduce_ops.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reduce_ops.hpp new file mode 100644 index 00000000000000..10d82a526b7296 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/reduce_ops.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { +using reduceOpsParams = std::tuple< + std::vector, // Axis to reduce order + ov::test::utils::OpType, // Scalar or vector type axis + bool, // Keep dims + ov::test::utils::ReductionType, // Reduce operation type + ov::element::Type, // Model type + std::vector, // Input shape + ov::test::TargetDevice // Device name +>; + +class ReduceOpsLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +class ReduceOpsLayerWithSpecificInputTest : public ReduceOpsLayerTest { +protected: + void generate_inputs(const std::vector& targetInputStaticShapes) override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/random_uniform.cpp b/src/tests/functional/shared_test_classes/src/single_op/random_uniform.cpp new file mode 100644 index 00000000000000..e5abec2a7d0ff7 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/random_uniform.cpp @@ -0,0 +1,78 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/random_uniform.hpp" +#include "openvino/core/type/element_type_traits.hpp" + + +namespace ov { +namespace test { +std::string RandomUniformLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + RandomUniformTypeSpecificParams random_uniform_params; + ov::Shape input_shape; + int64_t global_seed; + int64_t op_seed; + std::string target_device; + std::tie(input_shape, random_uniform_params, global_seed, op_seed, target_device) = obj.param; + + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "global_seed=" << global_seed << "_"; + result << "op_seed=" << op_seed << "_"; + result << "min_val=" << random_uniform_params.min_value << "_"; + result << "max_val=" << random_uniform_params.max_value << "_"; + result << "modelType=" << random_uniform_params.model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void RandomUniformLayerTest::SetUp() { + RandomUniformTypeSpecificParams random_uniform_params; + ov::Shape input_shape; + int64_t global_seed; + int64_t op_seed; + std::tie(input_shape, random_uniform_params, global_seed, op_seed, targetDevice) = this->GetParam(); + auto model_type = random_uniform_params.model_type; + + // Use Parameter as input with desired model_type to properly configure execution configuration + // in CoreConfiguration() function + auto input = std::make_shared(model_type, input_shape); + auto shape_of = std::make_shared(input); + + std::shared_ptr min_value, max_value; + if (model_type == ov::element::f32) { + auto min_val = static_cast>(random_uniform_params.min_value); + auto max_val = static_cast>(random_uniform_params.max_value); + min_value = std::make_shared(model_type, ov::Shape{1}, + std::vector>{min_val}); + max_value = std::make_shared(model_type, ov::Shape{1}, + std::vector>{max_val}); + } else if (model_type == ov::element::f16) { + auto min_val = static_cast>(random_uniform_params.min_value); + auto max_val = static_cast>(random_uniform_params.max_value); + min_value = std::make_shared(model_type, ov::Shape{1}, + std::vector>{min_val}); + max_value = std::make_shared(model_type, ov::Shape{1}, + std::vector>{max_val}); + } else if (model_type == ov::element::i32) { + auto min_val = static_cast>(random_uniform_params.min_value); + auto max_val = static_cast>(random_uniform_params.max_value); + min_value = std::make_shared(model_type, ov::Shape{1}, + std::vector>{min_val}); + max_value = std::make_shared(model_type, ov::Shape{1}, + std::vector>{max_val}); + } else { + GTEST_FAIL() << model_type << " type isn't supported by the test"; + } + auto random_uniform = std::make_shared(shape_of, + min_value, + max_value, + model_type, + global_seed, + op_seed); + + function = std::make_shared(random_uniform->outputs(), ov::ParameterVector{input}, "random_uniform"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/range.cpp b/src/tests/functional/shared_test_classes/src/single_op/range.cpp new file mode 100644 index 00000000000000..5bb4917e3ec636 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/range.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/range.hpp" + + +namespace ov { +namespace test { +std::string RangeLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::element::Type model_type; + float start, stop, step; + std::string target_device; + std::tie(start, stop, step, model_type, target_device) = obj.param; + + std::ostringstream result; + const char separator = '_'; + result << "Start=" << start << separator; + result << "Stop=" << stop << separator; + result << "Step=" << step << separator; + result << "modelType=" << model_type.to_string() << separator; + result << "trgDev=" << target_device; + return result.str(); +} + +void RangeLayerTest::SetUp() { + ov::element::Type model_type; + float start, stop, step; + tie(start, stop, step, model_type, targetDevice) = GetParam(); + + ov::ParameterVector params{std::make_shared(model_type, ov::Shape()), + std::make_shared(model_type, ov::Shape()), + std::make_shared(model_type, ov::Shape())}; + params[0]->set_friendly_name("start"); + params[1]->set_friendly_name("stop"); + params[2]->set_friendly_name("step"); + auto range = std::make_shared(params[0], params[1], params[2], model_type); + function = std::make_shared(range->outputs(), params, "Range"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/rdft.cpp b/src/tests/functional/shared_test_classes/src/single_op/rdft.cpp new file mode 100644 index 00000000000000..d54b9961613e7d --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/rdft.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/rdft.hpp" +#include "ov_models/builders.hpp" + +namespace ov { +namespace test { +std::string RDFTLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shape; + ov::element::Type model_type; + std::vector axes; + std::vector signal_size; + ov::test::utils::DFTOpType op_type; + std::string target_device; + std::tie(input_shape, model_type, axes, signal_size, op_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "Axes=" << ov::test::utils::vec2str(axes) << "_"; + result << "SignalSize=" << ov::test::utils::vec2str(signal_size) << "_"; + result << "Inverse=" << (op_type == ov::test::utils::DFTOpType::INVERSE) << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void RDFTLayerTest::SetUp() { + std::vector input_shape; + ov::element::Type model_type; + std::vector axes; + std::vector signal_size; + ov::test::utils::DFTOpType op_type; + std::tie(input_shape, model_type, axes, signal_size, op_type, targetDevice) = this->GetParam(); + + auto param = std::make_shared(model_type, ov::Shape(input_shape)); + auto rdft = ngraph::builder::makeRDFT(param, axes, signal_size, op_type); + function = std::make_shared(rdft->outputs(), ov::ParameterVector{param}, "RDFT"); +} +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/functional/shared_test_classes/src/single_op/reduce_ops.cpp b/src/tests/functional/shared_test_classes/src/single_op/reduce_ops.cpp new file mode 100644 index 00000000000000..af23a65322fb5a --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/reduce_ops.cpp @@ -0,0 +1,79 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/reduce_ops.hpp" +#include "common_test_utils/data_utils.hpp" +#include "ov_models/builders.hpp" + +namespace ov { +namespace test { +std::string ReduceOpsLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shape; + ov::element::Type model_type; + bool keep_dims; + ov::test::utils::ReductionType reduction_type; + std::vector axes; + ov::test::utils::OpType op_type; + std::string target_device; + std::tie(axes, op_type, keep_dims, reduction_type, model_type, input_shape, target_device) = obj.param; + std::ostringstream result; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "axes=" << ov::test::utils::vec2str(axes) << "_"; + result << "opType=" << op_type << "_"; + result << "type=" << reduction_type << "_"; + if (keep_dims) result << "KeepDims_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ReduceOpsLayerTest::SetUp() { + std::vector input_shape; + ov::element::Type model_type; + bool keep_dims; + ov::test::utils::ReductionType reduction_type; + std::vector axes; + ov::test::utils::OpType op_type; + std::tie(axes, op_type, keep_dims, reduction_type, model_type, input_shape, targetDevice) = GetParam(); + + auto param = std::make_shared(model_type, ov::Shape(input_shape)); + + std::vector shape_axes; + switch (op_type) { + case ov::test::utils::OpType::SCALAR: { + if (axes.size() > 1) + FAIL() << "In reduce op if op type is scalar, 'axis' input's must contain 1 element"; + break; + } + case ov::test::utils::OpType::VECTOR: { + shape_axes.push_back(axes.size()); + break; + } + default: + FAIL() << "Reduce op doesn't support operation type: " << op_type; + } + auto reduction_axes_node = std::make_shared(ov::element::i64, ov::Shape(shape_axes), axes); + + const auto reduce = ngraph::builder::makeReduce(param, reduction_axes_node, keep_dims, reduction_type); + function = std::make_shared(reduce->outputs(), ov::ParameterVector{param}, "Reduce"); +} + +void ReduceOpsLayerWithSpecificInputTest::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + auto param = function->get_parameters()[0]; + auto axes = std::get<0>(GetParam()); + auto axis = axes[0]; + auto dims = targetInputStaticShapes[0]; + + // Slice of tensor through axis is {1, 0, 0, ....}, the mean value is 1/slice_size + auto raw_values = std::vector(dims[axis], 0); + raw_values[0] = 1; + + auto tensor = ov::Tensor(param->get_element_type(), dims); + ov::test::utils::fill_data_with_broadcast(tensor, axis, raw_values); + + inputs.insert({param, tensor}); +} +} // namespace test +} // namespace ov From 3a67d2a722d51c89ec195aaa621f8b9f7853dff7 Mon Sep 17 00:00:00 2001 From: cecilia peng Date: Thu, 26 Oct 2023 16:41:34 +0800 Subject: [PATCH 070/275] =?UTF-8?q?[CPU]=20Fix=20mergeTransposeAndReorder?= =?UTF-8?q?=20when=20Transpose=20and=20Reorder=20do=20oppos=E2=80=A6=20(#2?= =?UTF-8?q?0537)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [CPU]fix mergeTransposeAndReorder when Transpose and Reorder do opposite permutation to each other as expected, but isOptimized is false due to some reason like inPlace memory. The permuation is asked because the dnnl reorder primitive requires the source and destination tensors to have the same shape. * add unit test * Update src/plugins/intel_cpu/src/graph_optimizer.cpp Co-authored-by: Maksim Kutakov * simplify unit test * update comments --------- Co-authored-by: Maksim Kutakov --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 10 +- src/plugins/intel_cpu/src/nodes/reorder.h | 4 + .../graph/merge_transpose_reorder_test.cpp | 226 ++++++++++++++++++ 3 files changed, 237 insertions(+), 3 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index ea52e353a982aa..cf79a5b79f299d 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -2542,6 +2542,8 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph &graph) { std::string reorderlayerName = parentParentNode->getName() + "_" + Reorder::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake"; + DEBUG_LOG("mergeTransposeAndReorder ", parentNode->getName(), " and ", childNode->getName(), " -> ", reorderlayerName); + EdgePtr edge; for (auto &childEdge : parentParentNode->getChildEdges()) { if (childEdge.lock()->getChild() == childChildNode) { @@ -2555,16 +2557,18 @@ void GraphOptimizer::MergeTransposeAndReorder(Graph &graph) { std::vector srcPerm; auto configReorder = [&]() { - // transposeNode support blocked input & non-blocked output, in the case, the reorder + // case 1. transposeNode support blocked input & non-blocked output, in the case, the reorder // cannot be optimized + // case 2. Transpose and Reorder do opposite permutation to each other as expected, but isOptimized is already set false + // due to some preliminarily checks. We need to reinterpret layout Transpose input without physical change of the memory. auto* transposeNode = dynamic_cast(parentNode.get()); if (transposeNode == nullptr) { IE_THROW() << "[CPU] parent node of type:" << parentNode->getTypeStr() << " with name: " << parentNode->getName() << " is not a transpose node"; } auto inOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].getMemDesc()->as()->getOrder(); - - if (inOrder.size() > reorderOutDesc->as()->getOrder().size()) { + auto outOrder = reorderOutDesc->as()->getOrder(); + if (!isOptimized || inOrder.size() > outOrder.size()) { isOptimized = false; // inDesc should be permuted before calling reorder auto & ord = transposeNode->getOrder(); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index 5e6a125ca94346..29bab1ecdf72f1 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -53,6 +53,10 @@ class Reorder : public Node { this->isOptimized = isOptimized; } + bool getOptimized() const { + return isOptimized; + } + bool canBeInPlace() const override { return false; } diff --git a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp new file mode 100644 index 00000000000000..fa2ece2195a25f --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp @@ -0,0 +1,226 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include + +#include "node.h" +#include "nodes/reorder.h" +#include "nodes/input.h" +#include "nodes/transpose.h" +#include "graph.h" +#include "edge.h" + +#include "ov_models/builders.hpp" +#include +#include "ie_ngraph_utils.hpp" + +using namespace ov::intel_cpu; + +namespace MergeTransposeReorderCPUTest { +class DummyNode : public Node { +public: + DummyNode(const std::shared_ptr& op, const GraphContext::CPtr context) : + Node(op, context, PassThroughShapeInferFactory()) { + OPENVINO_THROW("Can't create DummyNode from ngraph node"); + } + DummyNode(const ov::Shape& shape, + const ov::element::Type_t& prc, + const std::string& name, + const std::string& type, + const GraphContext::CPtr context) : + Node(type, name, context) { + // dummy node of the same shape and precision to both input and output. + outputShapes.emplace_back(shape); + inputShapes.emplace_back(shape); + addOriginalOutputPrecision(InferenceEngine::details::convertPrecision(prc)); + addOriginalInputPrecision(InferenceEngine::details::convertPrecision(prc)); + } + + void getSupportedDescriptors() override { + if (getParentEdges().size() != 1) + OPENVINO_THROW("Incorrect number of input edges for layer " + getName()); + if (getChildEdges().empty()) + OPENVINO_THROW("Incorrect number of output edges for layer " + getName()); + } + + void initSupportedPrimitiveDescriptors() override { + if (!supportedPrimitiveDescriptors.empty()) + return; + + NodeConfig config; + config.inConfs.resize(1); + config.outConfs.resize(1); + + config.inConfs[0].inPlace(m_inplace & Edge::LOOK::LOOK_DOWN ? 0 : -1); + config.inConfs[0].constant(false); + config.outConfs[0].inPlace(m_inplace & Edge::LOOK::LOOK_UP ? 0 : -1); + config.outConfs[0].constant(false); + + auto layoutCreator = BlockedDescCreator::getCommonCreators().at(m_layout); + auto& originInputPrecisions = getOriginalInputPrecisions(); + config.inConfs[0].setMemDesc(layoutCreator->createSharedDesc(originInputPrecisions[0], getInputShapeAtPort(0))); + config.outConfs[0].setMemDesc(layoutCreator->createSharedDesc(originInputPrecisions[0], getOutputShapeAtPort(0))); + + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::undef); + }; + + void setLayout(LayoutType layout) {m_layout = layout;} + void setInplaceDirection(Edge::LOOK look) {m_inplace = look;} + + bool isExecutable() const override {return false;} + void execute(dnnl::stream strm) override {}; + bool created() const override {return true;} + +private: + LayoutType m_layout = LayoutType::ncsp; + Edge::LOOK m_inplace = Edge::LOOK::LOOK_UP; +}; +} // namespace MergeTransposeReorderCPUTest + +using namespace MergeTransposeReorderCPUTest; + +/* + * MergeTransposeReorderIsOptimizedCPUTest to test the CPU plugin-in MergeTransposeReorder graph optimizer + * under the circumstance that the upstream node or downstream node is inPlaced thereby the inserted Reorder + * cannot be optimized. + */ +class MergeTransposeReorderIsOptimizedCPUTest : public ::testing::Test { +public: + void Validate() const { + CheckTransposeCount(0); + CheckReorderOptimized(std::string("_fake"), false); // the fused node is of name "reshape_abcd_acdb_fake" + } + + void SetUp() override { + CreateGraph(); + } + +protected: + /* graph typology + --------- + |Input | + --------- + | + ---------- + | Dummy | <*NOTE: fake node with laytout NCSP, and inplace from upstream*> + ---------- + | + |---------------| + | ---------- | + | |Transpose| | + | --------- | + | | | + | --------- | + | |Reorder | | <*NOTE: Reorder is inheristically inserted since Multiply is asking NSPC input.*> + | --------- | + |---------------| + | + ----------- + | Dummy | <*NOTE: fake node with laytout NSPC, and inplace from downstream*> + ----------- + | + --------- + |Output | + --------- + */ + void CreateGraph() { + // + Config conf; + conf.rtCacheCapacity = 100; + auto context = std::make_shared(conf, nullptr, nullptr, false); + const dnnl::engine cpuEngine = context->getEngine(); + + m_graph = std::unique_ptr(new Graph()); + + // ov::Model with only a transpose node + ov::ParameterVector params{std::make_shared(testPrec, ov::Shape(testShape))}; + auto order = std::vector{0, 3, 1, 2}; + auto constOrder = ngraph::builder::makeConstant(ngraph::element::i32, {order.size()}, order); + auto transpose = std::make_shared(params[0], constOrder); + ov::ResultVector results{std::make_shared(transpose)}; + + // Replicate + auto replicate = [&](std::vector &nodes, std::vector &edges) -> void { + std::unordered_set nodesSet; + + auto addEdge = [&](const NodePtr& parent, const NodePtr& child, size_t parentPort, size_t childPort) -> void { + auto edge = std::make_shared(parent, child, parentPort, childPort); + child->addEdge(edge); + edges.push_back(edge); + nodesSet.insert(parent); + nodesSet.insert(child); + }; + + auto inputNode = std::make_shared(params[0], context); + + // dummy ncsp + inPlace LOOK_UP + auto dummyNode1 = std::make_shared(testShape, testPrec, "reshape", "DummyNode", context); + dummyNode1->setLayout(LayoutType::ncsp); + dummyNode1->setInplaceDirection(Edge::LOOK::LOOK_UP); + + auto orderNode = std::make_shared(constOrder, context); // const order + auto transposeNode = std::make_shared(transpose, context); + transposeNode->filterSupportedPrimitiveDescriptors(); + + // dummy nspc + inPlace LOOK_DOWN + const ov::Shape shape_tranpose{testShape[0], testShape[3], testShape[1], testShape[2]}; // shape after transpose + auto dummyNode2 = std::make_shared(shape_tranpose, testPrec, "multiply", "DummyNode", context); + dummyNode2->setLayout(LayoutType::nspc); + dummyNode2->setInplaceDirection(Edge::LOOK::LOOK_DOWN); + + auto outputNode = std::make_shared(results[0], context); + + addEdge(inputNode, dummyNode1, 0, 0); + addEdge(dummyNode1, transposeNode, 0, 0); + addEdge(orderNode, transposeNode, 0, 1); + addEdge(transposeNode, dummyNode2, 0, 0); + addEdge(dummyNode2, outputNode, 0, 0); + + for (auto &node : nodesSet) nodes.emplace_back(node); + }; + + std::vector graphNodes; + std::vector graphEdges; + replicate(graphNodes, graphEdges); + + m_graph->CreateGraph(graphNodes, graphEdges, context, "fused_graph"); + } + + // helper to check if Transpose node is fused. + void CheckTransposeCount(const size_t expectedTransposeCount) const { + const std::vector& graph_nodes = m_graph->GetNodes(); + size_t actualTransposeCount = 0; + for (auto &node : graph_nodes) { + if (node->getType() == Type::Transpose) { + actualTransposeCount++; + } + } + + ASSERT_EQ(expectedTransposeCount, actualTransposeCount); + } + + // helper to check isOptimized of Reorder node with a part of its name + void CheckReorderOptimized(const std::string &patial_name, const bool expectedOptimized) const { + const std::vector& graph_nodes = m_graph->GetNodes(); + size_t actualCount = 0; + for (auto &node : graph_nodes) { + auto reorder_node = std::dynamic_pointer_cast(node); + if (reorder_node && node->getName().find(patial_name) != std::string::npos) { + ASSERT_EQ(expectedOptimized, reorder_node->getOptimized()); + actualCount++; + } + } + + ASSERT_EQ(1, actualCount); + } + +private: + const ov::element::Type_t testPrec = ov::element::Type_t::f32; + const ov::Shape testShape{1, 3, 8, 16}; + + std::unique_ptr m_graph; +}; // class MergeTransposeReorderIsOptimizedCPUTest + +TEST_F(MergeTransposeReorderIsOptimizedCPUTest, smoke_Run_MergeTransposeReorder_isOptimized) { + Validate(); +} \ No newline at end of file From 4078bd9c19af54974e5a0abf2c2b0e507d202b07 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Thu, 26 Oct 2023 11:10:51 +0200 Subject: [PATCH 071/275] [GHA] Speed up PyTorch Layer unit tests (#20613) * test * fixed tests * typo * fixed tests * rest of the tests * fixed rsub test * tmp fix * Revert "tmp fix" This reverts commit b8bf1e9492e13497895da488612c9a137ef840bc. * fixed test params * reset thirdparty/pugixml * Revert "fixed rsub test" This reverts commit 9b6be34b8666936e8124b6622fcc5185b640de92. * fixed typo * fixed test data * reset test_rsub * removed unused param * reverrted runner * simplified call * fixed random * changed logical to auto mode * Revert "fixed random" This reverts commit 8a4f20b24641144f823a7e1f1ff92038634acf32. * fixed test_all * replaced random_sample with randn * fixed rebase issue * reverted logical splitting * Update tests/layer_tests/pytorch_tests/test_repeat_interleave.py Co-authored-by: Maxim Vafin * Update tests/layer_tests/pytorch_tests/test_all.py Co-authored-by: Maxim Vafin * Apply suggestions from code review Co-authored-by: Maxim Vafin * fixed merge conflict --------- Co-authored-by: Maxim Vafin --- .github/workflows/linux.yml | 6 +- .../pytorch_tests/test_adaptive_avg_pool.py | 29 ++-- .../pytorch_tests/test_adaptive_max_pool.py | 38 +++--- tests/layer_tests/pytorch_tests/test_add.py | 13 +- tests/layer_tests/pytorch_tests/test_all.py | 62 +++++---- .../layer_tests/pytorch_tests/test_argsort.py | 44 ++++--- .../pytorch_tests/test_floor_divide.py | 68 ++++++---- .../pytorch_tests/test_index_put_.py | 33 ++--- tests/layer_tests/pytorch_tests/test_len.py | 10 +- .../layer_tests/pytorch_tests/test_narrow.py | 8 +- .../pytorch_tests/test_remainder.py | 16 +-- .../pytorch_tests/test_repeat_interleave.py | 6 +- .../pytorch_tests/test_roi_align.py | 8 +- tests/layer_tests/pytorch_tests/test_rsub.py | 34 +++-- tests/layer_tests/pytorch_tests/test_sort.py | 20 +-- tests/layer_tests/pytorch_tests/test_stack.py | 41 +++--- tests/layer_tests/pytorch_tests/test_sub.py | 23 ++-- tests/layer_tests/pytorch_tests/test_topk.py | 10 +- .../pytorch_tests/test_unary_ops.py | 124 ++++++++++++------ .../layer_tests/pytorch_tests/test_unfold.py | 11 +- tests/layer_tests/pytorch_tests/test_view.py | 76 ++++++++--- 21 files changed, 414 insertions(+), 266 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0f0a791b700f15..ae512a64393a19 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -931,6 +931,8 @@ jobs: - name: Install Python API tests dependencies run: | + # To enable pytest parallel features + python3 -m pip install pytest-xdist[psutil] # For torchvision to OpenVINO preprocessing converter python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt @@ -1006,7 +1008,7 @@ jobs: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml - name: PyTorch Layer Tests - run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml + run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -n logical -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 @@ -1327,7 +1329,7 @@ jobs: - name: PyTorch Models Tests run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/torch_tests/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/torch_tests -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v env: TYPE: ${{ github.event_name == 'schedule' && 'nightly' || 'precommit'}} TEST_DEVICE: CPU diff --git a/tests/layer_tests/pytorch_tests/test_adaptive_avg_pool.py b/tests/layer_tests/pytorch_tests/test_adaptive_avg_pool.py index f50dc8a4051519..bedd9beb0d4bcf 100644 --- a/tests/layer_tests/pytorch_tests/test_adaptive_avg_pool.py +++ b/tests/layer_tests/pytorch_tests/test_adaptive_avg_pool.py @@ -8,13 +8,12 @@ from pytorch_layer_test_class import PytorchLayerTest -@pytest.mark.parametrize('input_tensor', (np.random.randn(1, 2, 8, 9, 10).astype(np.float32), - np.random.randn(2, 8, 9, 10).astype(np.float32))) -@pytest.mark.parametrize('output_size', ([5, 7, 9], 7)) +@pytest.mark.parametrize('input_tensor', [[1, 2, 8, 9, 10], [2, 8, 9, 10]]) +@pytest.mark.parametrize('output_size', [[5, 7, 9], 7]) class TestAdaptiveAvgPool3D(PytorchLayerTest): def _prepare_input(self): - return (self.input_tensor,) + return (self.input_tensor, ) def create_model(self, output_size): class aten_adaptive_avg_pool3d(torch.nn.Module): @@ -35,16 +34,16 @@ def forward(self, input_tensor): @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend def test_adaptive_avg_pool3d(self, ie_device, precision, ir_version, input_tensor, output_size): - self.input_tensor = input_tensor + self.input_tensor = np.random.randn(*input_tensor).astype(np.float32) self._test(*self.create_model(output_size), ie_device, precision, ir_version) -@pytest.mark.parametrize('input_tensor', [np.random.randn(2, 8, 9, 10).astype(np.float32), np.random.randn(8, 9, 10).astype(np.float32)]) -@pytest.mark.parametrize('output_size', ([7, 9], 7)) +@pytest.mark.parametrize('input_shape', [[2, 8, 9, 10], [8, 9, 10]]) +@pytest.mark.parametrize('output_size', [[7, 9], 7]) class TestAdaptiveAvgPool2D(PytorchLayerTest): def _prepare_input(self): - return (self.input_tensor,) + return (self.input_tensor, ) def create_model(self, output_size): class aten_adaptive_avg_pool2d(torch.nn.Module): @@ -64,17 +63,17 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend - def test_adaptive_avg_pool2d(self, ie_device, precision, ir_version, input_tensor, output_size): - self.input_tensor = input_tensor + def test_adaptive_avg_pool2d(self, ie_device, precision, ir_version, input_shape, output_size): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(output_size), ie_device, precision, ir_version) -@pytest.mark.parametrize('input_tensor', [np.random.randn(8, 9, 10).astype(np.float32), np.random.randn(9, 10).astype(np.float32)] ) -@pytest.mark.parametrize('output_size', ( 7, )) +@pytest.mark.parametrize('input_shape', [[8, 9, 10], [9, 10]]) +@pytest.mark.parametrize('output_size', [7, ]) class TestAdaptiveAvgPool1D(PytorchLayerTest): def _prepare_input(self): - return (self.input_tensor,) + return (self.input_tensor, ) def create_model(self, output_size): class aten_adaptive_avg_pool1d(torch.nn.Module): @@ -94,8 +93,8 @@ def forward(self, input_tensor): @pytest.mark.precommit @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend - def test_adaptive_avg_pool1d(self, ie_device, precision, ir_version, input_tensor, output_size): - self.input_tensor = input_tensor + def test_adaptive_avg_pool1d(self, ie_device, precision, ir_version, input_shape, output_size): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(output_size), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py index 09be641a0fb96e..b80a82a4a9dee7 100644 --- a/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py +++ b/tests/layer_tests/pytorch_tests/test_adaptive_max_pool.py @@ -34,11 +34,9 @@ def forward(self, input_tensor): return aten_adaptive_max_pool3d(output_size, return_indices), ref_net, "aten::adaptive_max_pool3d" - @pytest.mark.parametrize('input_tensor', ([ - np.random.randn(2, 1, 1, 4, 4).astype(np.float32), - np.random.randn(4, 1, 3, 32, 32).astype(np.float32), - np.random.randn(1, 3, 32, 32).astype(np.float32) - ])) + @pytest.mark.parametrize('input_shape', [[2, 1, 1, 4, 4], + [4, 1, 3, 32, 32], + [1, 3, 32, 32]]) @pytest.mark.parametrize('output_size', ([ [2, 2, 2], [4, 4, 4], @@ -53,8 +51,8 @@ def forward(self, input_tensor): @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_adaptive_max_pool3d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): - self.input_tensor = input_tensor + def test_adaptive_max_pool3d(self, ie_device, precision, ir_version, input_shape, output_size, return_indices): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) @@ -81,11 +79,9 @@ def forward(self, input_tensor): return aten_adaptive_max_pool2d(output_size, return_indices), ref_net, "aten::adaptive_max_pool2d" - @pytest.mark.parametrize('input_tensor', ([ - np.random.randn(2, 1, 4, 4).astype(np.float32), - np.random.randn(1, 3, 32, 32).astype(np.float32), - np.random.randn(3, 32, 32).astype(np.float32) - ])) + @pytest.mark.parametrize('input_shape', [[2, 1, 4, 4], + [1, 3, 32, 32], + [3, 32, 32]]) @pytest.mark.parametrize('output_size', ([ [2, 2], [4, 4], @@ -100,8 +96,8 @@ def forward(self, input_tensor): @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_adaptive_max_pool2d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): - self.input_tensor = input_tensor + def test_adaptive_max_pool2d(self, ie_device, precision, ir_version, input_shape, output_size, return_indices): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) @@ -128,11 +124,11 @@ def forward(self, input_tensor): return aten_adaptive_max_pool1d(output_size, return_indices), ref_net, "aten::adaptive_max_pool1d" - @pytest.mark.parametrize('input_tensor', ([ - np.random.randn(1, 4, 4).astype(np.float32), - np.random.randn(3, 32, 32).astype(np.float32), - np.random.randn(16, 8).astype(np.float32), - ])) + @pytest.mark.parametrize('input_shape', [ + [1, 4, 4], + [3, 32, 32], + [16, 8] + ]) @pytest.mark.parametrize('output_size', ([ 2, 4, @@ -147,6 +143,6 @@ def forward(self, input_tensor): @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_adaptive_max_pool1d(self, ie_device, precision, ir_version, input_tensor, output_size, return_indices): - self.input_tensor = input_tensor + def test_adaptive_max_pool1d(self, ie_device, precision, ir_version, input_shape, output_size, return_indices): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(output_size, return_indices), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_add.py b/tests/layer_tests/pytorch_tests/test_add.py index d89a05420eb7e1..f2b3897a68ed47 100644 --- a/tests/layer_tests/pytorch_tests/test_add.py +++ b/tests/layer_tests/pytorch_tests/test_add.py @@ -9,10 +9,11 @@ @pytest.mark.parametrize('alpha', (-0.5, 0, 0.5, 1, 2)) -@pytest.mark.parametrize('input_rhs', (np.random.randn(2, 5, 3, 4).astype(np.float32), - np.random.randn( - 1, 5, 3, 4).astype(np.float32), - np.random.randn(1).astype(np.float32))) +@pytest.mark.parametrize('input_shape_rhs', [ + [2, 5, 3, 4], + [1, 5, 3, 4], + [1] +]) class TestAdd(PytorchLayerTest): def _prepare_input(self): @@ -41,8 +42,8 @@ def forward2(self, lhs, rhs): @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("op_type", ["add", "add_"]) - def test_add(self, ie_device, precision, ir_version, alpha, input_rhs, op_type): - self.input_rhs = input_rhs + def test_add(self, ie_device, precision, ir_version, alpha, input_shape_rhs, op_type): + self.input_rhs = np.random.randn(*input_shape_rhs).astype(np.float32) self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_all.py b/tests/layer_tests/pytorch_tests/test_all.py index ca9b734c1ad1dd..c8b79ee0ff5e3b 100644 --- a/tests/layer_tests/pytorch_tests/test_all.py +++ b/tests/layer_tests/pytorch_tests/test_all.py @@ -24,10 +24,10 @@ def __init__(self, dim, keepdim) -> None: def forward(self, input_tensor): return torch.all( - input_tensor, + input_tensor, dim = self.dim ) if self.keepdim is None else torch.all( - input_tensor, + input_tensor, dim = self.dim, keepdim = self.keepdim ) @@ -36,32 +36,35 @@ class TestAll(PytorchLayerTest): def _prepare_input(self): return (self.input_tensor,) - @pytest.mark.parametrize("input_tensor", [ - np.eye(5,5), - np.zeros((5, 5)), - np.zeros((9,8)) + 1, - np.random.randint(0, 2, (5, 9, 7)), - np.random.randint(0, 2, (10, 13, 11)), - np.random.randint(0, 2, (8, 7, 6, 5, 4)), - np.random.randint(0, 2, (11, 11), dtype=np.uint8), - np.random.randint(0, 2, (7, 7), dtype=np.uint8), + @pytest.mark.parametrize("input_shape, d_type", [ + (np.eye(5,5), np.int64), + (np.zeros((5, 5)), np.int64), + (np.zeros((9,8)) + 1, np.int64), + ([5, 9, 7], np.int64), + ([10, 13, 11], np.int64), + ([8, 7, 6, 5, 4], np.int64), + ([11, 11], np.uint8), + ([7, 7], np.uint8) ]) @pytest.mark.nightly @pytest.mark.precommit - def test_all_noparams(self, input_tensor, ie_device, precision, ir_version): - self.input_tensor = input_tensor - self._test(aten_all_noparam(), None, "aten::all", + def test_all_noparams(self, input_shape, d_type, ie_device, precision, ir_version): + if type(input_shape) is list: + self.input_tensor = np.random.randint(0, 2, input_shape, dtype=d_type) + else: + self.input_tensor = input_shape + self._test(aten_all_noparam(), None, "aten::all", ie_device, precision, ir_version, trace_model=True, freeze_model=False) - - @pytest.mark.parametrize("input_tensor", [ - np.eye(5,5), - np.zeros((5, 5)), - np.zeros((9,8)) + 1, - np.random.randint(0, 2, (5, 9, 7)), - np.random.randint(0, 2, (10, 13, 11)), - np.random.randint(0, 2, (8, 7, 6, 5, 4)), - np.random.randint(0, 2, (11, 11), dtype=np.uint8), - np.random.randint(0, 2, (7, 7), dtype=np.uint8), + + @pytest.mark.parametrize("input_shape, d_type", [ + (np.eye(5,5), np.int64), + (np.zeros((5, 5)), np.int64), + (np.zeros((9,8)) + 1, np.int64), + ([5, 9, 7], np.int64), + ([10, 13, 11], np.int64), + ([8, 7, 6, 5, 4], np.int64), + ([11, 11], np.uint8), + ([7, 7], np.uint8) ]) @pytest.mark.parametrize("keepdim", [ True, @@ -72,8 +75,11 @@ def test_all_noparams(self, input_tensor, ie_device, precision, ir_version): @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_all(self, input_tensor, keepdim, ie_device, precision, ir_version): - self.input_tensor = input_tensor - for dim in range(len(input_tensor.shape)): - self._test(aten_all(dim, keepdim), None, "aten::all", + def test_all(self, input_shape, d_type, keepdim, ie_device, precision, ir_version): + if type(input_shape) is list: + self.input_tensor = np.random.randint(0, 2, input_shape, dtype=d_type) + else: + self.input_tensor = input_shape + for dim in range(len(self.input_tensor.shape)): + self._test(aten_all(dim, keepdim), None, "aten::all", ie_device, precision, ir_version, trace_model=True, freeze_model=False) diff --git a/tests/layer_tests/pytorch_tests/test_argsort.py b/tests/layer_tests/pytorch_tests/test_argsort.py index 667edc5f8a0091..6468e553381513 100644 --- a/tests/layer_tests/pytorch_tests/test_argsort.py +++ b/tests/layer_tests/pytorch_tests/test_argsort.py @@ -22,24 +22,24 @@ def __init__(self, dim, descending, stable) -> None: def forward(self, input_tensor): if self.stable is not None: - return torch.argsort(input_tensor, - dim = self.dim, - descending = self.descending, + return torch.argsort(input_tensor, + dim = self.dim, + descending = self.descending, stable = self.stable ) else: - return torch.argsort(input_tensor, - dim = self.dim, + return torch.argsort(input_tensor, + dim = self.dim, descending = self.descending - ) + ) ref_net = None return aten_argsort(dim, descending, stable), ref_net, "aten::argsort" @pytest.mark.parametrize("tensor_stable_pair", [ - (np.random.rand(1, 4), False), - (np.random.rand(4, 4), False), - (np.random.rand(4, 4, 4), False), + ([1, 4], False), + ([4, 4], False), + ([4, 4, 4], False), (np.array([1, 2, 4, 6, 5, 8, 7]), False), (np.array([6, 5, 4, 2, 3, 0, 1]), False), (np.array([1, 1, 1, 2, 1, 3, 1, 4, 2, 5, 1, 2, 4, 4, 0]), True), @@ -49,20 +49,20 @@ def forward(self, input_tensor): (np.array([[9, 8, 8], [8, 7, 7], [7, 5, 6], [8, 8, 9], [7, 7, 8], [6, 5, 7], [8, 9, 8], [7, 8, 7], [5, 6, 7]]), True), - (np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], - [[5, 2, 4], [4, 9, 0], [7, 7, 9]], + (np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[5, 2, 4], [4, 9, 0], [7, 7, 9]], [[5, 2, 4], [4, 9, 0], [7, 7, 9]]]), True), - (np.array([[[3, 2, 2], [1, 2, 1], [3, 2, 2]], - [[1, 2, 1], [4, 3, 4], [3, 2, 2]], + (np.array([[[3, 2, 2], [1, 2, 1], [3, 2, 2]], + [[1, 2, 1], [4, 3, 4], [3, 2, 2]], [[3, 2, 2], [1, 2, 1], [7, 9, 9]]]), True), - (np.array([[[2, 1, 3], [3, 2, 1], [1, 2, 3]], - [[2, 0, 2], [1, 2, 1], [3, 2, 8]], + (np.array([[[2, 1, 3], [3, 2, 1], [1, 2, 3]], + [[2, 0, 2], [1, 2, 1], [3, 2, 8]], [[3, 2, 2], [3, 2, 1], [1, 2, 3]], - [[2, 1, 3], [3, 2, 1], [1, 2, 3]], - [[2, 0, 2], [1, 2, 1], [3, 2, 8]], + [[2, 1, 3], [3, 2, 1], [1, 2, 3]], + [[2, 0, 2], [1, 2, 1], [3, 2, 8]], [[3, 2, 2], [3, 2, 1], [1, 2, 3]], - [[2, 1, 3], [3, 2, 1], [1, 2, 3]], - [[2, 0, 2], [1, 2, 1], [3, 2, 8]], + [[2, 1, 3], [3, 2, 1], [1, 2, 3]], + [[2, 0, 2], [1, 2, 1], [3, 2, 8]], [[3, 2, 2], [3, 2, 1], [1, 2, 3]]]), True) ]) @pytest.mark.parametrize("descending", [ @@ -72,7 +72,11 @@ def forward(self, input_tensor): @pytest.mark.nightly @pytest.mark.precommit def test_argsort(self, tensor_stable_pair, descending, ie_device, precision, ir_version): - self.input_tensor, stable = tensor_stable_pair + input_shape, stable = tensor_stable_pair + if type(input_shape) is list: + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) + else: + self.input_tensor = input_shape dims = len(self.input_tensor.shape) for dim in range(-dims, dims): stable_values = [True] if stable else [True, False, None] diff --git a/tests/layer_tests/pytorch_tests/test_floor_divide.py b/tests/layer_tests/pytorch_tests/test_floor_divide.py index 0cdc46333b2651..40d1eb5afa7fed 100644 --- a/tests/layer_tests/pytorch_tests/test_floor_divide.py +++ b/tests/layer_tests/pytorch_tests/test_floor_divide.py @@ -41,41 +41,59 @@ def forward(self, input_tensor, other_tensor): return aten_floor_divide(), ref_net, "aten::floor_divide" - @pytest.mark.parametrize('input_tensor', ([ - np.random.randn(5).astype(np.float32), - np.random.randn(5, 5, 1).astype(np.float32), - np.random.randn(1, 1, 5, 5).astype(np.float32), + @pytest.mark.parametrize('input_tensor', + ([ + [5], [5, 5, 1], [1, 1, 5, 5], ])) - @pytest.mark.parametrize('other_tensor', ([ - np.array([[0.5]]).astype(np.float32), - np.random.randn(5).astype(np.float32), - np.random.randn(5, 1).astype(np.float32), - np.random.randn(1, 5).astype(np.float32), + @pytest.mark.parametrize('other_tensor', + ([ + np.array([[0.5]]).astype(np.float32), [5], [5, 1], [1, 5] ])) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_floor_divide(self, input_tensor, other_tensor, ie_device, precision, ir_version): - self.input_tensor = input_tensor - self.other_tensor = other_tensor + if type(input_tensor) is list: + self.input_tensor = np.random.randn(*input_tensor).astype(np.float32) + else: + self.input_tensor = input_tensor + if type(other_tensor) is list: + self.other_tensor = np.random.randn(*other_tensor).astype(np.float32) + else: + self.other_tensor = other_tensor self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) - @pytest.mark.parametrize('input_tensor', ([ - np.random.randint(low=0, high=10, size=5).astype(np.float32), - np.random.randint(low=1, high=10, size=(5, 5, 1)).astype(np.float32), - np.random.randint(low=1, high=10, size=(1, 1, 5, 5)).astype(np.float32), - ])) - @pytest.mark.parametrize('other_tensor', ([ - np.array([[2]]).astype(np.float32), - np.random.randint(low=1, high=10, size=5).astype(np.float32), - np.random.randint(low=1, high=10, size=(5, 1)).astype(np.float32), - np.random.randint(low=1, high=10, size=(1, 5)).astype(np.float32), - ])) + @pytest.mark.parametrize('input_data', + [ + { "tensor": [5], "low": 0, "high": 10 }, + { "tensor": [5, 5, 1], "low": 1, "high": 10 }, + { "tensor": [1, 1, 5, 5], "low": 1, "high": 10 } + ]) + @pytest.mark.parametrize('other_data', + [ + { "tensor": np.array([[2]]).astype(np.float32) }, + { "tensor": [5], "low": 1, "high": 10 }, + { "tensor": [5, 1], "low": 1, "high": 10 }, + { "tensor": [5, 1], "low": 1, "high": 10 } + ]) @pytest.mark.nightly @pytest.mark.precommit - def test_floor_divide_int(self, input_tensor, other_tensor, ie_device, precision, ir_version): - self.input_tensor = input_tensor - self.other_tensor = other_tensor + def test_floor_divide_int(self, input_data, other_data, ie_device, precision, ir_version): + input_tensor = input_data["tensor"] + if type(input_tensor) is list: + self.input_tensor = np.random.randint(low=input_data["low"], + high=input_data["high"], + size=input_tensor).astype(np.float32) + else: + self.input_tensor = input_tensor + + other_tensor = other_data["tensor"] + if type(other_tensor) is list: + self.other_tensor = np.random.randint(low=other_data["low"], + high=other_data["high"], + size=other_tensor).astype(np.float32) + else: + self.other_tensor = other_tensor self.create_model = self.create_model_int self._test(*self.create_model(), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_index_put_.py b/tests/layer_tests/pytorch_tests/test_index_put_.py index 6f94a0912d4df2..68eaed216261d6 100644 --- a/tests/layer_tests/pytorch_tests/test_index_put_.py +++ b/tests/layer_tests/pytorch_tests/test_index_put_.py @@ -32,10 +32,10 @@ def forward(self, input_tensor, values): "input_data", ( { - "input_tensor": np.random.randn(5).astype(np.float32), + "input_shape": [5], "values": np.array(11).astype(np.float32)}, { - "input_tensor": np.random.randn(3, 3).astype(np.float32), + "input_shape": [3, 3], "values": np.array([10, 11, 12]).astype(np.float32), }, ), @@ -54,7 +54,7 @@ def forward(self, input_tensor, values): @pytest.mark.nightly @pytest.mark.precommit def test_index_put_single_indices(self, ie_device, precision, ir_version, input_data, indices, accumulate): - self.input_tensor = input_data["input_tensor"] + self.input_tensor = np.random.randn(*input_data["input_shape"]).astype(np.float32) self.values = input_data["values"] self._test(*self.create_model(indices, accumulate), ie_device, precision, ir_version) @@ -83,11 +83,11 @@ def forward(self, input_tensor, values): "input_data", ( { - "input_tensor": np.random.randn(3, 3).astype(np.float32), + "input_shape": [3, 3], "values": np.array(12).astype(np.float32) }, { - "input_tensor": np.random.randn(3, 3, 3).astype(np.float32), + "input_shape": [3, 3, 3], "values": np.array([10, 11, 12]).astype(np.float32), }, ), @@ -107,7 +107,7 @@ def forward(self, input_tensor, values): @pytest.mark.nightly @pytest.mark.precommit def test_index_put_many_indices(self, ie_device, precision, ir_version, input_data, indices, accumulate): - self.input_tensor = input_data["input_tensor"] + self.input_tensor = np.random.randn(*input_data["input_shape"]).astype(np.float32) self.values = input_data["values"] self._test(*self.create_model(indices, accumulate), ie_device, precision, ir_version) @@ -135,11 +135,11 @@ def forward(self, input_tensor, values, indices_0, indices_1): "input_data", ( { - "input_tensor": np.random.randn(3).astype(np.float32), + "input_shape": [3], "values": np.array(11).astype(np.float32), }, { - "input_tensor": np.random.randn(3, 3).astype(np.float32), + "input_shape": [3, 3], "values": np.array([10, 11, 12]).astype(np.float32), }, ), @@ -147,19 +147,22 @@ def forward(self, input_tensor, values, indices_0, indices_1): @pytest.mark.parametrize( "indices", ( - (np.random.randint(low=0, high=2, size=(1,)), np.random.randint(low=0, high=2, size=(1,))), - (np.random.randint(low=0, high=2, size=(2,)), np.random.randint(low=0, high=2, size=(2,))), - (np.array([0, 1, 0]), np.array([1, 1, 0])), - (np.ones(shape=(3,)), np.ones(shape=(3,))), - (np.ones(shape=(3,)), np.zeros(shape=(3,))), + [[1, ], [1, ]], + [[2, ], [2, ]], + [np.array([0, 1, 0]), np.array([1, 1, 0])], + [np.ones(shape=(3,)), np.ones(shape=(3,))], + [np.ones(shape=(3,)), np.zeros(shape=(3,))], ), ) @pytest.mark.parametrize("accumulate", (False, True)) @pytest.mark.nightly @pytest.mark.precommit def test_nonzero_index_put_(self, ie_device, precision, ir_version, input_data, indices, accumulate): - self.input_tensor = input_data["input_tensor"] + self.input_tensor = np.random.randn(*input_data["input_shape"]).astype(np.float32) self.values = input_data["values"] + for i in range(len(indices)): + if type(indices[i]) is list: + indices[i] = np.random.randint(0, 2, indices[i]) self.indices_0 = indices[0] self.indices_1 = indices[1] self._test(*self.create_model(accumulate), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) @@ -167,7 +170,7 @@ def test_nonzero_index_put_(self, ie_device, precision, ir_version, input_data, class TestMask_IndexPut(PytorchLayerTest): def _prepare_input(self): return (np.random.randn(100, 5).astype(np.float32),np.random.randn(100, 5).astype(np.float32)) - + def create_model(self): class aten_index_put_mask(torch.nn.Module): def forward(self, x, y): diff --git a/tests/layer_tests/pytorch_tests/test_len.py b/tests/layer_tests/pytorch_tests/test_len.py index 7aa5f020b9c7fc..9443f2576efc77 100644 --- a/tests/layer_tests/pytorch_tests/test_len.py +++ b/tests/layer_tests/pytorch_tests/test_len.py @@ -8,8 +8,10 @@ from pytorch_layer_test_class import PytorchLayerTest -@pytest.mark.parametrize('input_tensor', (np.random.randn(2, 1, 3), np.random.randn(3, 7), - np.random.randn(1, 1, 4, 4))) +@pytest.mark.parametrize('input_tensor', +[ + [2, 1, 3], [3, 7], [1, 1, 4, 4] +]) class TestLen(PytorchLayerTest): def _prepare_input(self): @@ -40,13 +42,13 @@ def forward(self, input_tensor): @pytest.mark.nightly @pytest.mark.precommit def test_len(self, ie_device, precision, ir_version, input_tensor): - self.input_tensor = input_tensor + self.input_tensor = np.random.randn(*input_tensor).astype(np.float32) self._test(*self.create_model(), ie_device, precision, ir_version) @pytest.mark.nightly @pytest.mark.precommit def test_len_int_list(self, ie_device, precision, ir_version, input_tensor): - self.input_tensor = input_tensor + self.input_tensor = np.random.randn(*input_tensor).astype(np.float32) self._test(*self.create_model_int_list(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_narrow.py b/tests/layer_tests/pytorch_tests/test_narrow.py index 9197492553e5af..c472e4f225a1cf 100644 --- a/tests/layer_tests/pytorch_tests/test_narrow.py +++ b/tests/layer_tests/pytorch_tests/test_narrow.py @@ -23,8 +23,8 @@ def forward(self, input_tensor, dim: int, start, length: int): return aten_narrow(), ref_net, "aten::narrow" - @pytest.mark.parametrize("input_tensor", [ - np.random.randn(3, 3), np.random.randn(3, 4, 5) + @pytest.mark.parametrize("input_shape", [ + [3, 3], [3, 4, 5] ]) @pytest.mark.parametrize("dim", [ np.array(0).astype(np.int32), np.array(1).astype(np.int32), np.array(-1).astype(np.int32) @@ -37,8 +37,8 @@ def forward(self, input_tensor, dim: int, start, length: int): ]) @pytest.mark.nightly @pytest.mark.precommit - def test_narrow(self, input_tensor, dim, start, length, ie_device, precision, ir_version): - self.input_tensor = input_tensor + def test_narrow(self, input_shape, dim, start, length, ie_device, precision, ir_version): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self.dim = dim self.start = start self.length = length diff --git a/tests/layer_tests/pytorch_tests/test_remainder.py b/tests/layer_tests/pytorch_tests/test_remainder.py index 05ad2d3b1387fe..d295fe3ad0fe6d 100644 --- a/tests/layer_tests/pytorch_tests/test_remainder.py +++ b/tests/layer_tests/pytorch_tests/test_remainder.py @@ -8,12 +8,12 @@ @pytest.mark.parametrize( - "input_rhs", - ( - np.random.randn(2, 5, 3, 4).astype(np.float32), - np.random.randn(1, 5, 3, 4).astype(np.float32), - np.random.randn(1).astype(np.float32), - ), + "input_shape_rhs", + [ + [2, 5, 3, 4], + [1, 5, 3, 4], + [1] + ] ) class TestRemainder(PytorchLayerTest): def _prepare_input(self): @@ -30,8 +30,8 @@ def forward(self, lhs, rhs): @pytest.mark.nightly @pytest.mark.precommit - def test_remainder(self, ie_device, precision, ir_version, input_rhs): - self.input_rhs = input_rhs + def test_remainder(self, ie_device, precision, ir_version, input_shape_rhs): + self.input_rhs = np.random.randn(*input_shape_rhs).astype(np.float32) self._test(*self.create_model(), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_repeat_interleave.py b/tests/layer_tests/pytorch_tests/test_repeat_interleave.py index dc937a7d27d784..3df1ae1b421c02 100644 --- a/tests/layer_tests/pytorch_tests/test_repeat_interleave.py +++ b/tests/layer_tests/pytorch_tests/test_repeat_interleave.py @@ -12,9 +12,8 @@ {'repeats': 2, 'dim': 2}, {'repeats': [2, 3], 'dim': 1}, {'repeats': [3, 2, 1], 'dim': 3}, - {'repeats': [3, 2, 1], 'dim': 3}, {'repeats': 2, 'dim': None}, - {'repeats': [random.randint(1, 5) for _ in range(36)], 'dim': None})) + {'repeats': [36], 'dim': None})) class TestRepeatInterleaveConstRepeats(PytorchLayerTest): def _prepare_input(self): @@ -39,6 +38,9 @@ def forward(self, input_tensor): @pytest.mark.precommit def test_repeat_interleave_const_repeats(self, ie_device, precision, ir_version, input_data): repeats = input_data['repeats'] + if type(repeats) is list and len(repeats) == 1: + repeats = [random.randint(1, 5) for _ in range(repeats[0])] + dim = input_data['dim'] self._test(*self.create_model_const_repeat(repeats, dim), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_roi_align.py b/tests/layer_tests/pytorch_tests/test_roi_align.py index fb03c51b0914e0..574741aaa26db0 100644 --- a/tests/layer_tests/pytorch_tests/test_roi_align.py +++ b/tests/layer_tests/pytorch_tests/test_roi_align.py @@ -39,7 +39,9 @@ def forward(self, input_tensor, rois): return (torchvision_roi_align(output_size, spatial_scale, sampling_ratio, aligned), ref_net, "torchvision::roi_align") - @pytest.mark.parametrize('input_tensor', (np.random.randn(4, 5, 6, 7).astype(np.float32),)) + @pytest.mark.parametrize('input_shape', [ + [4, 5, 6, 7], + ]) @pytest.mark.parametrize('boxes', (np.array([[1, 2, 2, 3, 3]]).astype(np.float32), np.array([[0, 1, 2, 5, 4], [2, 1, 2, 5, 4], @@ -50,9 +52,9 @@ def forward(self, input_tensor, rois): @pytest.mark.parametrize('aligned', (True, False)) @pytest.mark.nightly @pytest.mark.precommit - def test_roi_align(self, ie_device, precision, ir_version, input_tensor, boxes, output_size, + def test_roi_align(self, ie_device, precision, ir_version, input_shape, boxes, output_size, spatial_scale, sampling_ratio, aligned): - self.input_tensor = input_tensor + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self.boxes = boxes self._test(*self.create_model(output_size, spatial_scale, sampling_ratio, aligned), ie_device, precision, ir_version, trace_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_rsub.py b/tests/layer_tests/pytorch_tests/test_rsub.py index 9c144ad4da247b..68e36f8abd2167 100644 --- a/tests/layer_tests/pytorch_tests/test_rsub.py +++ b/tests/layer_tests/pytorch_tests/test_rsub.py @@ -34,24 +34,34 @@ def forward(self, x, y:int, alpha: float): return model(), ref_net, "aten::rsub" - @pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 4).astype(np.float32), - np.array(5).astype(np.float32), - np.random.randn(1)),]) - + @pytest.mark.parametrize('input_data', + [ + [[2, 3, 4], np.array(5).astype(np.float32), [1]] + ]) @pytest.mark.nightly @pytest.mark.precommit - def test_rsub_f(self, ie_device, precision, ir_version, input_data): - self.input_data = input_data + def test_rsub(self, ie_device, precision, ir_version, input_data): + self.input_data = [] + for input in input_data: + if type(input) is list: + self.input_data.append(np.random.randn(*input).astype(np.float32)) + else: + self.input_data.append(input) self._test(*self.create_model(second_type="float"), ie_device, precision, ir_version, use_convert_model=True) - @pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 4).astype(np.float32), - np.array(5).astype(int), - np.random.randn(1)),]) - + @pytest.mark.parametrize('input_data', + [ + [[2, 3, 4], np.array(5).astype(int), [1]] + ]) @pytest.mark.nightly @pytest.mark.precommit - def test_rsub_i(self, ie_device, precision, ir_version, input_data): - self.input_data = input_data + def test_rsub(self, ie_device, precision, ir_version, input_data): + self.input_data = [] + for input in input_data: + if type(input) is list: + self.input_data.append(np.random.randn(*input).astype(np.float32)) + else: + self.input_data.append(input) self._test(*self.create_model(second_type="int"), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_sort.py b/tests/layer_tests/pytorch_tests/test_sort.py index c2f6243bb81a63..28ff2b7d485e56 100644 --- a/tests/layer_tests/pytorch_tests/test_sort.py +++ b/tests/layer_tests/pytorch_tests/test_sort.py @@ -36,11 +36,11 @@ def forward(self, input_tensor): ref_net = None return aten_sort(dim, descending, stable), ref_net, "aten::sort" - @pytest.mark.parametrize("input_tensor", [ - np.random.rand(16), - np.random.rand(1, 4), - np.random.rand(4, 4), - np.random.rand(4, 4, 4), + @pytest.mark.parametrize("input_shape", [ + [16], + [1, 4], + [4, 4], + [4, 4, 4], np.array([1, 2, 4, 6, 5, 8, 7]), np.array([6, 5, 4, 2, 3, 0, 1]), np.array([1, 1, 1, 2, 1, 3, 1, 4, 2, 5, 1, 2, 4, 4, 0]), @@ -78,9 +78,13 @@ def forward(self, input_tensor): ]) @pytest.mark.nightly @pytest.mark.precommit - def test_sort(self, input_tensor, descending, stable, ie_device, precision, ir_version): - self.input_tensor = input_tensor - dims = len(input_tensor.shape) + def test_sort(self, input_shape, descending, stable, ie_device, precision, ir_version): + self.input_tensor = [] + if type(input_shape) is list: + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) + else: + self.input_tensor = input_shape + dims = len(self.input_tensor.shape) for dim in range(-dims, dims): self._test(*self.create_model(dim, descending, stable), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_stack.py b/tests/layer_tests/pytorch_tests/test_stack.py index 670033c7b294c1..50b44251e27ada 100644 --- a/tests/layer_tests/pytorch_tests/test_stack.py +++ b/tests/layer_tests/pytorch_tests/test_stack.py @@ -17,7 +17,7 @@ def create_model(self, dim): class aten_stack(torch.nn.Module): def __init__(self, dim): super(aten_stack, self).__init__() - self.dim = dim + self.dim = dim def forward(self, x, y): inputs = [x, y] @@ -27,18 +27,22 @@ def forward(self, x, y): return aten_stack(dim), ref_net, "aten::stack" - @pytest.mark.parametrize("input_tensor", ([ - [np.random.rand(1, 3, 3), np.random.rand(1, 3, 3)], - [np.random.rand(4, 4, 2), np.random.rand(4, 4, 2)], - [np.random.rand(8, 1, 1, 9), np.random.rand(8, 1, 1, 9)] - ])) + @pytest.mark.parametrize("input_shape", + [ + [1, 3, 3], + [4, 4, 2], + [8, 1, 1, 9] + ]) @pytest.mark.parametrize("dim", ([ 0, 1, 2, ])) @pytest.mark.nightly @pytest.mark.precommit - def test_stack2D(self, input_tensor, dim, ie_device, precision, ir_version): - self.input_tensors = input_tensor + def test_stack2D(self, input_shape, dim, ie_device, precision, ir_version): + self.input_tensors = [ + np.random.randn(*input_shape).astype(np.float32), + np.random.randn(*input_shape).astype(np.float32), + ] self._test(*self.create_model(dim), ie_device, precision, ir_version) @@ -52,7 +56,7 @@ def create_model(self, dim): class aten_stack(torch.nn.Module): def __init__(self, dim): super(aten_stack, self).__init__() - self.dim = dim + self.dim = dim def forward(self, x, y, z): inputs = [x, y, z] @@ -62,16 +66,21 @@ def forward(self, x, y, z): return aten_stack(dim), ref_net, "aten::stack" - @pytest.mark.parametrize("input_tensor", ([ - [np.random.rand(1, 3, 3), np.random.rand(1, 3, 3), np.random.rand(1, 3, 3)], - [np.random.rand(4, 4, 2), np.random.rand(4, 4, 2), np.random.rand(4, 4, 2)], - [np.random.rand(8, 1, 1, 9), np.random.rand(8, 1, 1, 9), np.random.rand(8, 1, 1, 9)] - ])) + @pytest.mark.parametrize("input_shape", + [ + [1, 3, 3], + [4, 4, 2], + [8, 1, 1, 9] + ]) @pytest.mark.parametrize("dim", ([ 0, 1, 2, ])) @pytest.mark.nightly @pytest.mark.precommit - def test_stack3D(self, input_tensor, dim, ie_device, precision, ir_version): - self.input_tensors = input_tensor + def test_stack3D(self, input_shape, dim, ie_device, precision, ir_version): + self.input_tensors = [ + np.random.randn(*input_shape).astype(np.float32), + np.random.randn(*input_shape).astype(np.float32), + np.random.randn(*input_shape).astype(np.float32) + ] self._test(*self.create_model(dim), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_sub.py b/tests/layer_tests/pytorch_tests/test_sub.py index 381d1672454cbe..5ba3f1a5506053 100644 --- a/tests/layer_tests/pytorch_tests/test_sub.py +++ b/tests/layer_tests/pytorch_tests/test_sub.py @@ -37,19 +37,22 @@ def _forward_inplace(self, x, y, alpha: float): return aten_sub(inplace), ref_net, op_name - @pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 4).astype(np.float32), - np.random.randn( - 2, 3, 4).astype(np.float32), - np.random.randn(1)), - (np.random.randn(4, 2, 3).astype(np.float32), - np.random.randn( - 1, 2, 3).astype(np.float32), - np.random.randn(1)), ]) + @pytest.mark.parametrize('input_shapes', + [ + [ + [2, 3, 4], [2, 3, 4], [1] + ], + [ + [4, 2, 3], [1, 2, 3], [1] + ] + ]) @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit - def test_sub(self, ie_device, precision, ir_version, input_data, inplace): - self.input_data = input_data + def test_sub(self, ie_device, precision, ir_version, input_shapes, inplace): + self.input_data = [] + for input_shape in input_shapes: + self.input_data.append(np.random.randn(*input_shape).astype(np.float32)) self._test(*self.create_model(inplace), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_topk.py b/tests/layer_tests/pytorch_tests/test_topk.py index 675f7e1e98ec56..1b657f25ade1a5 100644 --- a/tests/layer_tests/pytorch_tests/test_topk.py +++ b/tests/layer_tests/pytorch_tests/test_topk.py @@ -32,9 +32,9 @@ def forward(self, input_tensor): return aten_topk(k, dim, largest, sort), ref_net, "aten::topk" - @pytest.mark.parametrize(("input_tensor"), [ - np.random.rand(7, 5, 5, 4), - np.random.rand(5, 6, 6, 7, 8), + @pytest.mark.parametrize(("input_shape"), [ + [7, 5, 5, 4], + [5, 6, 6, 7, 8] ]) @pytest.mark.parametrize(("k"), [ @@ -62,6 +62,6 @@ def forward(self, input_tensor): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.skipif(os.getenv("GITHUB_ACTIONS") == 'true', reason="Ticket - 115085") - def test_topK(self, input_tensor, k, dim, largest, sort, ie_device, precision, ir_version): - self.input_tensor = input_tensor + def test_topK(self, input_shape, k, dim, largest, sort, ie_device, precision, ir_version): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(k, dim, largest, sort), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_unary_ops.py b/tests/layer_tests/pytorch_tests/test_unary_ops.py index 2f1e75753b1ebb..04346bdef48ef5 100644 --- a/tests/layer_tests/pytorch_tests/test_unary_ops.py +++ b/tests/layer_tests/pytorch_tests/test_unary_ops.py @@ -7,6 +7,44 @@ from pytorch_layer_test_class import PytorchLayerTest +OPS = { + "aten::rsqrt": torch.rsqrt, + "aten::sqrt": torch.sqrt, + "aten::exp": torch.exp, + "aten::exp_": torch.exp_, + "aten::relu": torch.relu, + "aten::relu_": torch.relu_, + "aten::ceil": torch.ceil, + "aten::ceil_": torch.ceil_, + "aten::floor": torch.floor, + "aten::floor_": torch.floor_, + "aten::sigmoid": torch.sigmoid, + "aten::sigmoid_": torch.sigmoid_, + "aten::cos": torch.cos, + "aten::cos_": torch.cos_, + "aten::sin": torch.sin, + "aten::sin_": torch.sin_, + "aten::tan": torch.tan, + "aten::tan_": torch.tan_, + "aten::cosh": torch.cosh, + "aten::cosh_": torch.cosh_, + "aten::sinh": torch.sinh, + "aten::sinh_": torch.sinh_, + "aten::tanh": torch.tanh, + "aten::tanh_": torch.tanh_, + "aten::acos": torch.acos, + "aten::acos_": torch.acos_, + "aten::asin": torch.asin, + "aten::asin_": torch.asin_, + "aten::atan": torch.atan, + "aten::atan_": torch.atan_, + "aten::acosh": torch.acosh, + "aten::acosh_": torch.acosh_, + "aten::asinh": torch.asinh, + "aten::asinh_": torch.asinh_, + "aten::atanh": torch.atanh, + "aten::atanh_": torch.atanh_ +} class unary_op_net(torch.nn.Module): def __init__(self, op, dtype): @@ -29,60 +67,62 @@ def _prepare_input(self): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.parametrize("dtype", [torch.float32, torch.float64, torch.int8, torch.uint8, torch.int32, torch.int64]) - @pytest.mark.parametrize("op,op_type", [ - (torch.rsqrt, "aten::rsqrt"), - (torch.sqrt, "aten::sqrt"), - (torch.exp, "aten::exp"), - (torch.relu, "aten::relu"), - (torch.relu_, "aten::relu_"), - (torch.ceil, "aten::ceil"), - (torch.ceil_, "aten::ceil_"), - (torch.floor, "aten::floor"), - (torch.floor_, "aten::floor_"), - (torch.sigmoid, "aten::sigmoid"), + @pytest.mark.parametrize("op_type", + [ + "aten::rsqrt", + "aten::sqrt", + "aten::exp", + "aten::relu", + "aten::relu_", + "aten::ceil", + "aten::ceil_", + "aten::floor", + "aten::floor_", + "aten::sigmoid", # trigonometry - (torch.cos, "aten::cos"), - (torch.sin, "aten::sin"), - (torch.tan, "aten::tan"), - (torch.cosh, "aten::cosh"), - (torch.sinh, "aten::sinh"), - (torch.tanh, "aten::tanh"), - (torch.acos, "aten::acos"), - (torch.asin, "aten::asin"), - (torch.atan, "aten::atan"), - (torch.acosh, "aten::acosh"), - (torch.asinh, "aten::asinh"), - (torch.atanh, "aten::atanh"), + "aten::cos", + "aten::sin", + "aten::tan", + "aten::cosh", + "aten::sinh", + "aten::tanh", + "aten::acos", + "aten::asin", + "aten::atan", + "aten::acosh", + "aten::asinh", + "aten::atanh" ]) - def test_unary_op(self, op, op_type, dtype, ie_device, precision, ir_version): + def test_unary_op(self, op_type, dtype, ie_device, precision, ir_version): self.dtype = dtype - self._test(unary_op_net(op, dtype), None, op_type, + self._test(unary_op_net(OPS[op_type], dtype), None, op_type, ie_device, precision, ir_version) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.parametrize("dtype", [torch.float32, torch.float64]) - @pytest.mark.parametrize("op,op_type", [ + @pytest.mark.parametrize("op_type", + [ # some pytorch inplace ops do not support int - (torch.exp_, "aten::exp_"), - (torch.sigmoid_, "aten::sigmoid_"), + "aten::exp_", + "aten::sigmoid_", # trigonometry - (torch.cos_, "aten::cos_"), - (torch.sin_, "aten::sin_"), - (torch.tan_, "aten::tan_"), - (torch.cosh_, "aten::cosh_"), - (torch.sinh_, "aten::sinh_"), - (torch.tanh_, "aten::tanh_"), - (torch.acos_, "aten::acos_"), - (torch.asin_, "aten::asin_"), - (torch.atan_, "aten::atan_"), - (torch.acosh_, "aten::acosh_"), - (torch.asinh_, "aten::asinh_"), - (torch.atanh_, "aten::atanh_"), + "aten::cos_", + "aten::sin_", + "aten::tan_", + "aten::cosh_", + "aten::sinh_", + "aten::tanh_", + "aten::acos_", + "aten::asin_", + "aten::atan_", + "aten::acosh_", + "aten::asinh_", + "aten::atanh_" ]) - def test_unary_op_float(self, op, op_type, dtype, ie_device, precision, ir_version): + def test_unary_op_float(self, op_type, dtype, ie_device, precision, ir_version): self.dtype = dtype - self._test(unary_op_net(op, dtype), None, op_type, + self._test(unary_op_net(OPS[op_type], dtype), None, op_type, ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_unfold.py b/tests/layer_tests/pytorch_tests/test_unfold.py index b5d6ec787470b1..b4138fa772bb02 100644 --- a/tests/layer_tests/pytorch_tests/test_unfold.py +++ b/tests/layer_tests/pytorch_tests/test_unfold.py @@ -11,9 +11,10 @@ @pytest.mark.parametrize('dimension', (0, 1, 2)) @pytest.mark.parametrize('size', (1, 2)) @pytest.mark.parametrize('step', (1, 2, 3, 4)) -@pytest.mark.parametrize('input_tensor', (np.random.randn(2, 2, 5).astype(np.float32), - np.random.randn(3, 3, 3, 3).astype(np.float32), - np.random.randn(2, 3, 4, 5).astype(np.float32))) +@pytest.mark.parametrize('input_shape', +[ + [2, 2, 5], [3, 3, 3, 3], [2, 3, 4, 5] +]) class TestUnfold(PytorchLayerTest): def _prepare_input(self): @@ -37,7 +38,7 @@ def forward(self, input_tensor): @pytest.mark.nightly @pytest.mark.precommit - def test_unfold(self, ie_device, precision, ir_version, dimension, size, step, input_tensor): - self.input_tensor = input_tensor + def test_unfold(self, ie_device, precision, ir_version, dimension, size, step, input_shape): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model(dimension, size, step), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_view.py b/tests/layer_tests/pytorch_tests/test_view.py index 2d2c80633f7397..2b77c46f184a3f 100644 --- a/tests/layer_tests/pytorch_tests/test_view.py +++ b/tests/layer_tests/pytorch_tests/test_view.py @@ -8,8 +8,15 @@ from pytorch_layer_test_class import PytorchLayerTest -@pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 2), np.array(2), np.array(6)), - (np.random.randn(4), np.array(2), np.array(2))]) +@pytest.mark.parametrize('input_shapes', +[ + [ + [2, 3, 2], np.array(2), np.array(6) + ], + [ + [4], np.array(2), np.array(2) + ] +]) class TestViewListConstruct(PytorchLayerTest): def _prepare_input(self): @@ -27,11 +34,21 @@ def forward(self, input_tensor, dim1: int, dim2: int): @pytest.mark.nightly @pytest.mark.precommit - def test_view_list_construct(self, ie_device, precision, ir_version, input_data): - self.input_data = input_data + def test_view_list_construct(self, ie_device, precision, ir_version, input_shapes): + self.input_data = [] + for input_shape in input_shapes: + if type(input_shape) is list: + self.input_data.append(np.random.randn(*input_shape).astype(np.float32)) + else: + self.input_data.append(input_shape) self._test(*self.create_model(), ie_device, precision, ir_version) -@pytest.mark.parametrize('input_data', [(np.random.randn(4), np.array(2))]) +@pytest.mark.parametrize('input_shapes', +[ + [ + [4], np.array(2) + ] +]) class TestViewDtype(PytorchLayerTest): def _prepare_input(self): @@ -49,12 +66,22 @@ def forward(self, input_tensor, dtype): @pytest.mark.nightly @pytest.mark.precommit - def test_view_dtype(self, ie_device, precision, ir_version, input_data): - self.input_data = input_data + def test_view_dtype(self, ie_device, precision, ir_version, input_shapes): + self.input_data = [] + for input_shape in input_shapes: + if type(input_shape) is list: + self.input_data.append(np.random.randn(*input_shape).astype(np.float32)) + else: + self.input_data.append(input_shape) self._test(*self.create_model(), ie_device, precision, ir_version) -@pytest.mark.parametrize('input_data', [(np.random.randn(4), np.random.randn(2, 2))]) +@pytest.mark.parametrize('input_shapes', +[ + [ + [4], [2, 2] + ] +]) class TestViewSize(PytorchLayerTest): def _prepare_input(self): @@ -72,13 +99,27 @@ def forward(self, input_tensor, input_size): @pytest.mark.nightly @pytest.mark.precommit - def test_view_size(self, ie_device, precision, ir_version, input_data): - self.input_data = input_data + def test_view_size(self, ie_device, precision, ir_version, input_shapes): + self.input_data = [] + for input_shape in input_shapes: + if type(input_shape) is list: + self.input_data.append(np.random.randn(*input_shape).astype(np.float32)) + else: + self.input_data.append(input_shape) self._test(*self.create_model(), ie_device, precision, ir_version) -@pytest.mark.parametrize('input_data', [(np.random.randn(2, 3, 2), 2, 6), - (np.random.randn(4), 2, 2), - (np.random.randn(4), 2, 2.1)]) +@pytest.mark.parametrize('input_shapes', +[ + [ + [2, 3, 2], 2, 6 + ], + [ + [4], 2, 2 + ], + [ + [4], 2, 2.1 + ] +]) class TestView(PytorchLayerTest): def _prepare_input(self): @@ -101,6 +142,11 @@ def forward(self, input_tensor): @pytest.mark.nightly @pytest.mark.precommit - def test_view(self, ie_device, precision, ir_version, input_data): - self.input_data = input_data + def test_view(self, ie_device, precision, ir_version, input_shapes): + self.input_data = [] + for input_shape in input_shapes: + if type(input_shape) is list: + self.input_data.append(np.random.randn(*input_shape).astype(np.float32)) + else: + self.input_data.append(input_shape) self._test(*self.create_model(), ie_device, precision, ir_version) From 4aa6899b07dd0184a937a93ae34e7f8fe52b6d37 Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Thu, 26 Oct 2023 17:14:43 +0800 Subject: [PATCH 072/275] update linux oneTBB with version 2021.2.4 (#20675) --- cmake/dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 257263f663bec6..c90309ab294553 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -104,10 +104,10 @@ function(ov_download_tbb) elseif(LINUX AND X86_64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17) # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17) RESOLVE_DEPENDENCY(TBB - ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin-20231012.tgz" + ARCHIVE_LIN "oneapi-tbb-2021.2.4-lin.tgz" TARGET_PATH "${TEMP}/tbb" ENVIRONMENT "TBBROOT" - SHA256 "6f39d18783b37fdcc15ca137fbf70bc78206848af1a510cada806279fae49718" + SHA256 "6523661559a340e88131472ea9a595582c306af083e55293b7357d11b8015546" USE_NEW_LOCATION TRUE) elseif(YOCTO_AARCH64) RESOLVE_DEPENDENCY(TBB From f86d80e14c00d44ed792f096c7b9a9708f55d2ad Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Thu, 26 Oct 2023 10:15:52 +0100 Subject: [PATCH 073/275] increase timeout (#20705) --- .github/workflows/linux_riscv.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 7dda15f3552bc3..0b181c3c8ff0ed 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -31,7 +31,7 @@ concurrency: jobs: Build: - timeout-minutes: 15 + timeout-minutes: 30 defaults: run: shell: bash From 5a1b13a069a1ddd53ad0621afa3ff6e16e0f8293 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Thu, 26 Oct 2023 10:17:52 +0100 Subject: [PATCH 074/275] fix parent job name (#20702) --- .github/workflows/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index c1617b4c3415ed..84e9209aecb092 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -645,7 +645,7 @@ jobs: CPU_Functional_Tests: name: CPU functional tests - needs: Buildy + needs: Build timeout-minutes: 30 defaults: run: From 0d458f086d94a0bc78712e7e1097370dc3e0ac9c Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 26 Oct 2023 14:06:34 +0400 Subject: [PATCH 075/275] Dependabot/pip/src/bindings/python/paddlepaddle 2.5.2 (#20708) * Bump paddlepaddle from 2.5.1 to 2.5.2 in /src/bindings/python Bumps [paddlepaddle](https://github.com/paddlepaddle/paddle) from 2.5.1 to 2.5.2. - [Release notes](https://github.com/paddlepaddle/paddle/releases) - [Changelog](https://github.com/PaddlePaddle/Paddle/blob/develop/RELEASE.md) - [Commits](https://github.com/paddlepaddle/paddle/compare/v2.5.1...v2.5.2) --- updated-dependencies: - dependency-name: paddlepaddle dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Removed WA for Ubuntu 22.04 and PDPD --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .ci/azure/windows.yml | 2 +- .github/workflows/linux_conditional_compilation.yml | 3 --- src/bindings/python/constraints.txt | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index 9a6aba6a88971e..144c605995e367 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -243,7 +243,7 @@ jobs: - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_onnx_frontend_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ONNXFrontend.xml displayName: 'ONNX Frontend Tests' - # TODO Reenable PDPD after paddlepaddle==2.5.1 with compliant protobuf is released (ticket 95904) + # TODO Reenable PDPD after paddlepaddle==2.5.2 with compliant protobuf is released (ticket 95904) - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-Paddle.xml displayName: 'Paddle Frontend UT' enabled: 'false' diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 5baf92c143d4a1..74bfd4e2e203ce 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -116,9 +116,6 @@ jobs: # For running Paddle frontend unit tests python3 -m pip install -r ${OPENVINO_REPO}/src/frontends/paddle/tests/requirements.txt - # see https://github.com/PaddlePaddle/Paddle/issues/55597#issuecomment-1718131420 - wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb - apt-get install ./libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb # # Build diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 20e888bde84984..914e48ed444c97 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -16,7 +16,7 @@ patchelf<=0.17.2.1 # Frontends docopt~=0.6.2 -paddlepaddle==2.5.1 +paddlepaddle==2.5.2 tensorflow>=1.15.5,<2.15.0 six~=1.16.0 protobuf>=3.18.1,<4.0.0 From bec011cf1df1eeab8f716a22be6ed4c2c9fd2256 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Thu, 26 Oct 2023 12:22:13 +0200 Subject: [PATCH 076/275] Fix TupleUnpackInBodyReplacer transformation to run inside body (#20681) --- src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp index 9352d148e823f7..5884ae6d590eaf 100644 --- a/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp @@ -176,7 +176,7 @@ bool TupleUnpackInBodyReplacer::run_on_model(const std::shared_ptr& model } if (const auto multiSubGraph = ov::as_type_ptr(op)) { for (size_t i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++) - result = result || run_on_model(multiSubGraph->get_function(i)); + result = run_on_model(multiSubGraph->get_function(i)) || result; } } From c482e5f807c97d1e44a7e51f97e65a6d188cad1c Mon Sep 17 00:00:00 2001 From: Sofya Balandina Date: Thu, 26 Oct 2023 12:57:33 +0100 Subject: [PATCH 077/275] [apiConformance] Add test CheckInferIsNotChangeInput to io_tensor (#20159) --- .../behavior/ov_infer_request/io_tensor.cpp | 18 ++++++++++++++++++ .../skip_configs/CPU/expected_failures_API.csv | 1 + 2 files changed, 19 insertions(+) diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/io_tensor.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/io_tensor.cpp index 344f0e57daf5d0..6d23a1f615d95f 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/io_tensor.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_infer_request/io_tensor.cpp @@ -219,6 +219,24 @@ TEST_P(OVInferRequestIOTensorTest, InferStaticNetworkSetChangedOutputTensorThrow ASSERT_ANY_THROW(req.infer()); } +TEST_P(OVInferRequestIOTensorTest, CheckInferIsNotChangeInput) { + ov::Tensor input_tensor = utils::create_and_fill_tensor(input.get_element_type(), input.get_shape()); + OV_ASSERT_NO_THROW(req.set_tensor(input, input_tensor)); + OV_ASSERT_NO_THROW(req.get_tensor(input)); + + OV_ASSERT_NO_THROW(req.infer()); + + ov::Tensor input_after_infer; + OV_ASSERT_NO_THROW(input_after_infer = req.get_tensor(input)); + ov::test::utils::compare(input_tensor, input_after_infer); + + OV_ASSERT_NO_THROW(req.infer()); + + ov::Tensor input_after_several_infer; + OV_ASSERT_NO_THROW(input_after_several_infer = req.get_tensor(input)); + ov::test::utils::compare(input_tensor, input_after_several_infer); +} + std::string OVInferRequestIOTensorSetPrecisionTest::getTestCaseName(const testing::TestParamInfo& obj) { element::Type type; std::string target_device; diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_API.csv b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_API.csv index 7e956981216699..db8f560869e79c 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_API.csv +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_API.csv @@ -124,6 +124,7 @@ ov_infer_request_mandatory/OVInferRequestIOTensorTest.canInferWithGetIn/targetDe ov_infer_request_mandatory/OVInferRequestIOTensorTest.canInferAfterIOBlobReallocation/targetDevice=BATCH.CPU_,1.0 ov_infer_request_mandatory/OVInferRequestIOTensorTest.InferStaticNetworkSetChangedOutputTensorThrow/targetDevice=BATCH.CPU_,1.0 ov_infer_request_mandatory/OVInferRequestIOTensorTest.InferStaticNetworkSetChangedInputTensorThrow/targetDevice=BATCH.CPU_,1.0 +ov_infer_request_mandatory/OVInferRequestIOTensorTest.CheckInferIsNotChangeInput/targetDevice=BATCH.CPU_,1.0 ov_infer_request_mandatory/OVInferRequestIOTensorSetPrecisionTest.CanSetOutBlobWithDifferentPrecision/type=u8_target_device=BATCH.CPU_,1.0 ov_infer_request_mandatory/OVInferRequestIOTensorSetPrecisionTest.CanSetOutBlobWithDifferentPrecision/type=u64_target_device=BATCH.CPU_,1.0 ov_infer_request_mandatory/OVInferRequestIOTensorSetPrecisionTest.CanSetOutBlobWithDifferentPrecision/type=u32_target_device=BATCH.CPU_,1.0 From 3b9606f2170dc15a8f80c9d2a790e7a92dddb97c Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Thu, 26 Oct 2023 15:35:57 +0300 Subject: [PATCH 078/275] [PyOV] fix set_node_friendly_name (#20695) --- .../python/src/openvino/runtime/utils/decorators.py | 2 +- src/bindings/python/tests/test_graph/test_reduction.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/src/openvino/runtime/utils/decorators.py b/src/bindings/python/src/openvino/runtime/utils/decorators.py index a4b4d33fe6a00d..b980c4c7e50f06 100644 --- a/src/bindings/python/src/openvino/runtime/utils/decorators.py +++ b/src/bindings/python/src/openvino/runtime/utils/decorators.py @@ -9,7 +9,7 @@ from openvino.runtime.utils.types import NodeInput, as_node, as_nodes -def _set_node_friendly_name(node: Node, **kwargs: Any) -> Node: +def _set_node_friendly_name(node: Node, /, **kwargs: Any) -> Node: if "name" in kwargs: node.friendly_name = kwargs["name"] return node diff --git a/src/bindings/python/tests/test_graph/test_reduction.py b/src/bindings/python/tests/test_graph/test_reduction.py index 7480bb2009ae5f..9747af0b926834 100644 --- a/src/bindings/python/tests/test_graph/test_reduction.py +++ b/src/bindings/python/tests/test_graph/test_reduction.py @@ -154,3 +154,9 @@ def test_normalize_l2(): assert node.get_output_size() == 1 assert node.get_type_name() == "NormalizeL2" assert list(node.get_output_shape(0)) == input_shape + + +def test_reduce_with_keywork(): + const = ov.constant([-1], np.int64) + min_op = ov.reduce_min(node=const, reduction_axes=0) + assert min_op.get_output_size() == 1 From 7720135f58d1a0f2b2bb92ba7ff25a8f44e869b1 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Thu, 26 Oct 2023 17:03:03 +0400 Subject: [PATCH 079/275] [SUBGRAPHS DUMPER] Fix Application Crash for Subgraphs Dumper (#20698) * [SUBGRAPHS DUMPER] Fix Application Crash for Subgraphs Dumper * Update model.hpp --- .../subgraphs_dumper/include/cache/graph_cache.hpp | 7 +++++-- .../src/matchers/subgraph/repeat_pattern.cpp | 8 +++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp index c3f6ae6aecf971..a07cef86d8d8d0 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/graph_cache.hpp @@ -52,10 +52,13 @@ class GraphCache : public ICache { GraphCache(const std::string& device = "") { ExtractorsManager::ExtractorsMap matchers = { - // temporary disabling according mem leaks in CI and not using swap mem - { "fused_names", FusedNamesExtractor::Ptr(new FusedNamesExtractor(device)) }, { "repeat_pattern", RepeatPatternExtractor::Ptr(new RepeatPatternExtractor) }, }; + try { + matchers.insert({ "fused_names", FusedNamesExtractor::Ptr(new FusedNamesExtractor(device)) }); + } catch(const std::exception& e) { + std::cout << "[ GRAPH CACHE ][ WARNING ] Fused names extractor is disabled according: " << e.what() << std::endl; + } m_manager.set_extractors(matchers); m_cache_subdir = "subgraph"; } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp index 006714774cc2a0..0963a401b74ba9 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/repeat_pattern.cpp @@ -82,7 +82,7 @@ RepeatPatternExtractor::update_extractor_cache( const std::map& pattern_in_info) { for (auto& extracted_pattern : extracted_patterns) { auto& pattern_structure = extracted_pattern.front(); - const auto& cached_pattern = std::get<0>(pattern_structure); + const auto cached_pattern = std::get<0>(pattern_structure); if (model_comparator->match(pattern, cached_pattern)) { try { const auto& cached_in_info = std::get<2>(pattern_structure); @@ -99,17 +99,15 @@ void RepeatPatternExtractor::update_extractor_cache( std::list>& extracted_patterns, std::list>& secondary_extracted_patterns) { - auto extern_it = secondary_extracted_patterns.begin(); while (!secondary_extracted_patterns.empty()) { - auto it = extern_it->rbegin(); + auto extern_it = secondary_extracted_patterns.begin(); while (!extern_it->empty()) { - auto& pattern_structure = *it; + auto& pattern_structure = *(extern_it->rbegin()); const auto& pattern = std::get<0>(pattern_structure); const auto& pattern_node_vector = std::get<1>(pattern_structure); const auto& pattern_in_info = std::get<2>(pattern_structure); update_extractor_cache(extracted_patterns, pattern, pattern_node_vector, pattern_in_info); extern_it->pop_back(); - it = extern_it->rbegin(); } secondary_extracted_patterns.pop_front(); } From 26632d1cd93e3b3ab77e941a445b76ec4f93dafb Mon Sep 17 00:00:00 2001 From: Karan Jakhar Date: Thu, 26 Oct 2023 19:58:47 +0530 Subject: [PATCH 080/275] [PT FE] Add aten::__xor__ (#20662) * Add __xor__ * Add xor tests * add more xfail tests * Update src/frontends/pytorch/src/op_table.cpp Co-authored-by: Maxim Vafin * Update src/frontends/pytorch/src/op_table.cpp Co-authored-by: Maxim Vafin * fix code style --------- Co-authored-by: Maxim Vafin --- src/frontends/pytorch/src/op/bitwise.cpp | 11 +++ src/frontends/pytorch/src/op_table.cpp | 2 + tests/layer_tests/pytorch_tests/test_xor.py | 87 +++++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 tests/layer_tests/pytorch_tests/test_xor.py diff --git a/src/frontends/pytorch/src/op/bitwise.cpp b/src/frontends/pytorch/src/op/bitwise.cpp index 8cbae192ca6bef..673ba77dda14ac 100644 --- a/src/frontends/pytorch/src/op/bitwise.cpp +++ b/src/frontends/pytorch/src/op/bitwise.cpp @@ -6,6 +6,7 @@ #include "openvino/op/logical_and.hpp" #include "openvino/op/logical_not.hpp" #include "openvino/op/logical_or.hpp" +#include "openvino/op/logical_xor.hpp" #include "utils.hpp" namespace ov { @@ -45,6 +46,16 @@ OutputVector translate_bitwise_or(const NodeContext& context) { return {or_x}; }; +OutputVector translate_bitwise_xor(const NodeContext& context) { + num_inputs_check(context, 2, 2); + auto x = context.get_input(0); + auto y = context.get_input(1); + FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), + "aten::bitwise_xor supported only for boolean input"); + auto xor_x = context.mark_node(std::make_shared(x, y)); + return {xor_x}; +}; + } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 55434a49fd45e4..bd625b9643770c 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -131,6 +131,7 @@ OP_CONVERTER(translate_one_hot); OP_CONVERTER(translate_ones); OP_CONVERTER(translate_ones_like); OP_CONVERTER(translate_or); +OP_CONVERTER(translate_bitwise_xor); OP_CONVERTER(translate_outer); OP_CONVERTER(translate_pad); OP_CONVERTER(translate_pairwise_distance); @@ -233,6 +234,7 @@ const std::map get_supported_ops_ts() { {"aten::__getitem__", op::translate_getitem}, {"aten::__not__", op::translate_1to1_match_1_inputs}, {"aten::__or__", op::translate_or}, + {"aten::__xor__", op::translate_bitwise_xor}, {"aten::__range_length", op::translate_range_length}, {"aten::_convolution", op::translate_convolution}, {"aten::_convolution_mode", op::translate_convolution_mode}, diff --git a/tests/layer_tests/pytorch_tests/test_xor.py b/tests/layer_tests/pytorch_tests/test_xor.py new file mode 100644 index 00000000000000..6fc9b467a78bf2 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_xor.py @@ -0,0 +1,87 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestXor(PytorchLayerTest): + + def _prepare_input(self): + return self.input_data + + def create_model_tensor_input(self): + class aten_xor_tensor(torch.nn.Module): + + def __init__(self) -> None: + super().__init__() + + def forward(self, tensor_a, tensor_b): + return tensor_a ^ tensor_b + + ref_net = None + + return aten_xor_tensor(), ref_net, "aten::__xor__" + + def create_model_bool_input(self): + class aten_xor_bool(torch.nn.Module): + + def __init__(self) -> None: + super().__init__() + + def forward(self, bool_a: bool, bool_b: bool): + return bool_a ^ bool_b + + ref_net = None + + return aten_xor_bool(), ref_net, "aten::__xor__" + + def create_model_int_input(self): + class aten_xor_int(torch.nn.Module): + + def __init__(self) -> None: + super().__init__() + + def forward(self, int_a: int, int_b: int): + return int_a ^ int_b + + ref_net = None + + return aten_xor_int(), ref_net, "aten::__xor__" + + @pytest.mark.nightly + @pytest.mark.precommit + def test_xor_tensor(self, ie_device, precision, ir_version): + self.input_data = (np.array([True, False, False], dtype=np.bool_), np.array( + [True, True, False], dtype=np.bool_)) + self._test(*self.create_model_tensor_input(), + ie_device, precision, ir_version) + + @pytest.mark.nightly + @pytest.mark.precommit + def test_xor_bool(self, ie_device, precision, ir_version): + self.input_data = (np.array(True, dtype=np.bool_), + np.array(True, dtype=np.bool_)) + self._test(*self.create_model_bool_input(), + ie_device, precision, ir_version) + + @pytest.mark.xfail(reason="bitwise_xor is not implemented") + @pytest.mark.nightly + @pytest.mark.precommit + def test_xor_int(self, ie_device, precision, ir_version): + self.input_data = (np.array(3, dtype=np.int), + np.array(4, dtype=np.int)) + self._test(*self.create_model_int_input(), + ie_device, precision, ir_version) + + @pytest.mark.xfail(reason="bitwise_xor is not implemented") + @pytest.mark.nightly + @pytest.mark.precommit + def test_xor_tensor(self, ie_device, precision, ir_version): + self.input_data = (np.array([3, 5, 8], dtype=np.int), np.array( + [7, 11, 2], dtype=np.int)) + self._test(*self.create_model_tensor_input(), + ie_device, precision, ir_version) From 1e4f3f18fe2fe1e991062edc4b6deb56fa720c48 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Thu, 26 Oct 2023 16:40:45 +0200 Subject: [PATCH 081/275] [CPU][ARM] Restore ACL cache variables (#18996) * Update CMakeLists.txt * Update CMakeLists.txt * updated cmake f16 enabling condition * updated condition --- src/plugins/intel_cpu/CMakeLists.txt | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 8276d5a3188970..e21e959fdf441b 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -21,18 +21,33 @@ elseif(OV_COMPILER_IS_CLANG) ov_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor) endif() +set(OV_CPU_ARM_TARGET_GENERIC_ARCHS armv8a + armv8.2-a + armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2 + armv8r64 # the same as armv8.4-a +) if(ARM) set(OV_CPU_ARM_TARGET_ARCH_DEFAULT armv7a) + set(OV_CPU_ARM_TARGET_ARCHS armv7a armv7a-hf + # requires estate=32 + ${OV_CPU_ARM_TARGET_GENERIC_ARCHS}) elseif(AARCH64) if(APPLE) # Apple M1 / M2 is assumed set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a) - add_compile_definitions(OV_CPU_ARM_ENABLE_FP16) else() set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8a) endif() + set(OV_CPU_ARM_TARGET_ARCHS arm64-v8a + arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2 + # used with estate=64 + ${OV_CPU_ARM_TARGET_GENERIC_ARCHS}) +endif() +set(OV_CPU_ARM_TARGET_ARCH ${OV_CPU_ARM_TARGET_ARCH_DEFAULT} CACHE STRING "Architecture for ARM ComputeLibrary") +set_property(CACHE OV_CPU_ARM_TARGET_ARCH PROPERTY STRINGS ${OV_CPU_ARM_TARGET_ARCHS}) +if(OV_CPU_ARM_TARGET_ARCH MATCHES "(armv|arm64-v)[8-9]\\.") + add_definitions(-DOV_CPU_ARM_ENABLE_FP16) endif() -set(OV_CPU_ARM_TARGET_ARCH ${OV_CPU_ARM_TARGET_ARCH_DEFAULT}) if(X86 OR X86_64 OR AARCH64) # disable mlas with webassembly From 66dca04b453b9496c00572906052fa6bc6caf9d6 Mon Sep 17 00:00:00 2001 From: Andrei Gorbachev Date: Thu, 26 Oct 2023 17:00:51 +0100 Subject: [PATCH 082/275] [GPU] Refactor CumSum, DeformableConvolution, DeformablePSROIPooling, DepthToSpaceBS (#20499) * CumSum * deformable_convolution * DeformablePSROIPooling * DepthToSpaceBS * smoke_DetectionOutput3In, smoke_DetectionOutput5In * tmp * refactor 3 embedding * extract_image_patches and fake_quantize * gather_nd and gather_tree * gather_elements * Gather * dft * CumSum * deformable_convolution * DeformablePSROIPooling * DepthToSpaceBS * smoke_DetectionOutput3In, smoke_DetectionOutput5In * tmp * refactor 3 embedding * extract_image_patches and fake_quantize * gather_nd and gather_tree * gather_elements * Gather * dft * restore dft * fix after review --------- Co-authored-by: Sergeys Shlyapnikov --- .../single_layer_tests/cum_sum.cpp | 27 +- .../deformable_convolution.cpp | 487 +++++++---------- .../deformable_psroi_pooling.cpp | 39 +- .../single_layer_tests/depth_to_space.cpp | 32 +- .../single_layer_tests/detection_output.cpp | 6 +- .../single_layer_tests/eltwise.cpp | 38 +- .../embedding_bag_offsets_sum.cpp | 34 +- .../embedding_bag_packed_sum.cpp | 28 +- .../embedding_segments_sum.cpp | 36 +- .../extract_image_patches.cpp | 29 +- .../single_layer_tests/fake_quantize.cpp | 28 +- .../single_layer_tests/gather.cpp | 502 ++++++++---------- .../single_layer_tests/gather_elements.cpp | 184 ++++--- .../single_layer_tests/gather_nd.cpp | 96 ++-- .../single_layer_tests/gather_tree.cpp | 26 +- 15 files changed, 693 insertions(+), 899 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp index 5ede32199f9062..6d014408fd68f7 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp @@ -4,31 +4,30 @@ #include -#include "single_layer_tests/cum_sum.hpp" +#include "single_op_tests/cum_sum.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::CumSumLayerTest; -std::vector> inShapes = { - {10, 10}, - {10, 10, 10}, - {10, 10, 10, 10}, - {10, 10, 10, 10, 10}, - {10, 10, 10, 10, 10, 10}, +std::vector> inShapes = { + {{10, 10}}, + {{10, 10, 10}}, + {{10, 10, 10, 10}}, + {{10, 10, 10, 10, 10}}, + {{10, 10, 10, 10, 10, 10}}, }; std::vector axes = {-1, 0, 1}; std::vector exclusive = {false, true}; std::vector reverse = {false, true}; -std::vector precisions = {InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16}; +std::vector precisions = {ov::element::f32, + ov::element::f16}; -std::vector> shape1d = {{10}}; +std::vector> shape1d = {{{10}}}; std::vector axis1d = {0}; INSTANTIATE_TEST_SUITE_P(smoke_CumSum1D, CumSumLayerTest, ::testing::Combine( - ::testing::ValuesIn(shape1d), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shape1d)), ::testing::ValuesIn(precisions), ::testing::ValuesIn(axis1d), ::testing::ValuesIn(exclusive), @@ -38,7 +37,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_CumSum1D, CumSumLayerTest, INSTANTIATE_TEST_SUITE_P(smoke_CumSum, CumSumLayerTest, ::testing::Combine( - ::testing::ValuesIn(inShapes), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapes)), ::testing::ValuesIn(precisions), ::testing::ValuesIn(axes), ::testing::ValuesIn(exclusive), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_convolution.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_convolution.cpp index d81199b1eaf2b1..0d72281019ee74 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_convolution.cpp @@ -3,13 +3,14 @@ // #include #include "common_test_utils/test_constants.hpp" -#include "single_layer_tests/deformable_convolution.hpp" -using namespace LayerTestsDefinitions; +#include "single_op_tests/deformable_convolution.hpp" + namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I16 +using ov::test::DeformableConvolutionLayerTest; +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i16 }; /* ============= 2D DeformableConvolution ============= */ @@ -30,57 +31,55 @@ const std::vector with_bilinear_interpolation_pad = { false, true }; const std::vector with_modulated_scalar = { false, true }; const auto deformableConv2DParams_ExplicitPadding = ::testing::Combine( - ::testing::ValuesIn(deformable_values_0), - ::testing::ValuesIn(kernel_shapes_0), ::testing::ValuesIn(stride_values), - ::testing::ValuesIn(pad_begin_values), ::testing::ValuesIn(pad_end_values), - ::testing::ValuesIn(dilation_values), ::testing::ValuesIn(num_groups), - ::testing::ValuesIn(num_deformable_groups_0), ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)); + ::testing::ValuesIn(stride_values), + ::testing::ValuesIn(pad_begin_values), + ::testing::ValuesIn(pad_end_values), + ::testing::ValuesIn(dilation_values), + ::testing::ValuesIn(num_groups), + ::testing::ValuesIn(num_deformable_groups_0), + ::testing::ValuesIn(num_out_channels), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad)); const auto deformableConv2DParams_AutoPadValid = ::testing::Combine( - ::testing::ValuesIn(deformable_values_0), - ::testing::ValuesIn(kernel_shapes_0), ::testing::ValuesIn(stride_values), + ::testing::ValuesIn(stride_values), ::testing::Values(std::vector({0, 0})), ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), ::testing::ValuesIn(num_groups), - ::testing::ValuesIn(num_deformable_groups_0), ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::VALID), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)); + ::testing::ValuesIn(dilation_values), + ::testing::ValuesIn(num_groups), + ::testing::ValuesIn(num_deformable_groups_0), + ::testing::ValuesIn(num_out_channels), + ::testing::Values(ov::op::PadType::VALID), + ::testing::ValuesIn(with_bilinear_interpolation_pad)); const auto deformableConv2DParams_DeformableGroups_AutoPadExplicit = ::testing::Combine( - ::testing::ValuesIn(deformable_values_1), - ::testing::ValuesIn(kernel_shapes_1), ::testing::ValuesIn(stride_values), + ::testing::ValuesIn(stride_values), ::testing::Values(std::vector({0, 0})), ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), ::testing::ValuesIn(num_groups), - ::testing::ValuesIn(multiple_defor_groups), ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)); + ::testing::ValuesIn(dilation_values), + ::testing::ValuesIn(num_groups), + ::testing::ValuesIn(multiple_defor_groups), + ::testing::ValuesIn(num_out_channels), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad)); INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_ExplicitPadding, DeformableConvolutionLayerTest, ::testing::Combine( - deformableConv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 2, 3, 3})), + deformableConv2DParams_ExplicitPadding, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{1, 2, 3, 3}, {1, 16, 2, 2}, {2, 2, 2, 2}, {1, 8, 2, 2}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), DeformableConvolutionLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_AutoPadValid, DeformableConvolutionLayerTest, ::testing::Combine( - deformableConv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 2, 3, 3})), + deformableConv2DParams_AutoPadValid, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{1, 2, 3, 3}, {1, 16, 2, 2}, {2, 2, 2, 2}, {1, 8, 2, 2}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), DeformableConvolutionLayerTest::getTestCaseName); @@ -88,12 +87,12 @@ INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_DeformableGroups_ExplicitPadding, DeformableConvolutionLayerTest, ::testing::Combine( deformableConv2DParams_DeformableGroups_AutoPadExplicit, + ::testing::ValuesIn(with_modulated_scalar), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 16, 66, 66})), + ::testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{1, 16, 66, 66}, + {1, 72, 64, 64}, + {16, 16, 3, 3}, + {1, 36, 64, 64}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), DeformableConvolutionLayerTest::getTestCaseName); @@ -103,8 +102,6 @@ const std::vector> kernel_shapes_2 = {{1, 3, 3, 3}}; const std::vector num_deformable_groups_1 = {3}; const auto deformableConv2DParams_SingleTestCase = ::testing::Combine( - ::testing::ValuesIn(deformable_values_2), - ::testing::ValuesIn(kernel_shapes_2), ::testing::ValuesIn(stride_values), ::testing::ValuesIn(pad_begin_values), ::testing::ValuesIn(pad_end_values), @@ -112,310 +109,188 @@ const auto deformableConv2DParams_SingleTestCase = ::testing::Combine( ::testing::ValuesIn(num_groups), ::testing::ValuesIn(num_deformable_groups_1), ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar) + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad) ); INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_SingleTestCase, DeformableConvolutionLayerTest, ::testing::Combine( deformableConv2DParams_SingleTestCase, + ::testing::ValuesIn(with_modulated_scalar), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{1, 3, 30, 30}, + {1, 54, 28, 28}, + {1, 3, 3, 3}, + {1, 27, 28, 28}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), DeformableConvolutionLayerTest::getTestCaseName); /* ============= Multiple groups case ============= */ +const auto DeformableConvolution2D_MultipleGroups22 = ::testing::Combine( + ::testing::ValuesIn(stride_values), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilation_values), + ::testing::ValuesIn(std::vector({2})), + ::testing::ValuesIn(std::vector({2})), + ::testing::ValuesIn(num_out_channels), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad) +); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{1, 16, 2, 2}}), // offsets - ::testing::ValuesIn(std::vector> {{2, 2, 2, 2}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {2}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 3, 3})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups22, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{1, 4, 3, 3}, {1, 16, 2, 2}, {2, 2, 2, 2}, {1, 8, 2, 2}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_Batch2, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{2, 16, 2, 2}}), // offsets - ::testing::ValuesIn(std::vector> {{2, 2, 2, 2}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {2}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({2, 4, 3, 3})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups22, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{2, 4, 3, 3}, {2, 16, 2, 2}, {2, 2, 2, 2}, {2, 8, 2, 2}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_Batch3, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{3, 16, 2, 2}}), // offsets - ::testing::ValuesIn(std::vector> {{2, 2, 2, 2}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {2}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({3, 4, 3, 3})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups22, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{3, 4, 3, 3}, {3, 16, 2, 2}, {2, 2, 2, 2}, {3, 8, 2, 2}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_Batch4, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{4, 16, 2, 2}}), // offsets - ::testing::ValuesIn(std::vector> {{2, 2, 2, 2}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {2}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({4, 4, 3, 3})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups22, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{4, 4, 3, 3}, {4, 16, 2, 2}, {2, 2, 2, 2}, {4, 8, 2, 2}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + +const auto DeformableConvolution2D_MultipleGroups_41 = ::testing::Combine( + ::testing::ValuesIn(stride_values), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilation_values), + ::testing::ValuesIn(std::vector({4})), + ::testing::ValuesIn(std::vector({1})), + ::testing::ValuesIn(num_out_channels), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad) +); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_2, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{1, 18, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {1}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_41, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{1, 8, 68, 68}, {1, 18, 66, 66}, {4, 2, 3, 3}, {1, 9, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_2_Batch2, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{2, 18, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {1}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({2, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_41, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{2, 8, 68, 68}, {2, 18, 66, 66}, {4, 2, 3, 3}, {2, 9, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_2_Batch3, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{3, 18, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {1}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({3, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_41, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{3, 8, 68, 68}, {3, 18, 66, 66}, {4, 2, 3, 3}, {3, 9, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_2_Batch4, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{4, 18, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {1}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({4, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_41, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{4, 8, 68, 68}, {4, 18, 66, 66}, {4, 2, 3, 3}, {4, 9, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + +const auto DeformableConvolution2D_MultipleGroups_42 = ::testing::Combine( + ::testing::ValuesIn(stride_values), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilation_values), + ::testing::ValuesIn(std::vector({4})), + ::testing::ValuesIn(std::vector({2})), + ::testing::ValuesIn(num_out_channels), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad) +); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_3, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{1, 36, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_42, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{1, 8, 68, 68}, {1, 36, 66, 66}, {4, 2, 3, 3}, {1, 18, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_3_Batch2, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{2, 36, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({2, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_42, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{2, 8, 68, 68}, {2, 36, 66, 66}, {4, 2, 3, 3}, {2, 18, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_3_Batch3, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{3, 36, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({3, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_42, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{3, 8, 68, 68}, {3, 36, 66, 66}, {4, 2, 3, 3}, {3, 18, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P( smoke_DeformableConvolution2D_MultipleGroups_3_Batch4, DeformableConvolutionLayerTest, ::testing::Combine( - ::testing::Combine( - ::testing::ValuesIn(std::vector> {{4, 36, 66, 66}}), // offsets - ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. - ::testing::ValuesIn(stride_values), - ::testing::Values(std::vector({0, 0})), - ::testing::Values(std::vector({0, 0})), - ::testing::ValuesIn(dilation_values), - ::testing::ValuesIn(std::vector {4}), // gr. - ::testing::ValuesIn(std::vector {2}), // def. gr. - ::testing::ValuesIn(num_out_channels), - ::testing::Values(ngraph::op::PadType::EXPLICIT), - ::testing::ValuesIn(with_bilinear_interpolation_pad), - ::testing::ValuesIn(with_modulated_scalar)), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({4, 8, 68, 68})), - ::testing::Values(ov::test::utils::DEVICE_GPU)), - DeformableConvolutionLayerTest::getTestCaseName); + DeformableConvolution2D_MultipleGroups_42, + ::testing::ValuesIn(with_modulated_scalar), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{4, 8, 68, 68}, {4, 36, 66, 66}, {4, 2, 3, 3}, {4, 18, 66, 66}}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + DeformableConvolutionLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_psroi_pooling.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_psroi_pooling.cpp index 2abb9f270ae605..8b2f061622116e 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_psroi_pooling.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/deformable_psroi_pooling.cpp @@ -2,21 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/deformable_psroi_pooling.hpp" +#include "single_op_tests/deformable_psroi_pooling.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::DeformablePSROIPoolingLayerTest; + +std::vector> shapes_static { + //dataShape, roisShape, offsetsShape + {{3, 8, 16, 16}, {10, 5}}, + {{1, 8, 67, 32}, {10, 5}}, + {{3, 8, 16, 16}, {10, 5}, {10, 2, 2, 2}}, + {{1, 8, 67, 32}, {10, 5}, {10, 2, 2, 2}}, +}; const auto params = testing::Combine( - testing::ValuesIn(std::vector>{{3, 8, 16, 16}, {1, 8, 67, 32}}), // data input shape - testing::Values(std::vector{10, 5}), // rois input shape - // Empty offsets shape means test without optional third input - testing::ValuesIn(std::vector>{{}, {10, 2, 2, 2}}), // offsets input shape testing::Values(2), // output_dim testing::Values(2), // group_size testing::ValuesIn(std::vector{1.0f, 0.5f, 0.0625f}), // spatial_scale @@ -27,16 +30,17 @@ const auto params = testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_DeformablePSROIPooling, DeformablePSROIPoolingLayerTest, testing::Combine(params, - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16), - testing::Values(ov::test::utils::DEVICE_GPU)), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_static)), + testing::Values(ov::element::f32, + ov::element::f16), + testing::Values(ov::test::utils::DEVICE_GPU)), DeformablePSROIPoolingLayerTest::getTestCaseName); - +std::vector> shapes_advanced_static { + //dataShape, roisShape, offsetsShape + {{2, 441, 63, 38}, {30, 5}, {30, 2, 3, 3}} +}; const auto params_advanced = - testing::Combine(testing::ValuesIn(std::vector>{{2, 441, 63, 38}}), // data input shape - testing::Values(std::vector{30, 5}), // rois input shape - testing::Values(std::vector{30, 2, 3, 3}), // offsets input shape - testing::Values(49), // output_dim + testing::Combine(testing::Values(49), // output_dim testing::Values(3), // group_size testing::ValuesIn(std::vector{0.0625f}), // spatial_scale testing::ValuesIn(std::vector>{{4, 4}}), // spatial_bins_x_y @@ -46,8 +50,9 @@ const auto params_advanced = INSTANTIATE_TEST_SUITE_P(smoke_DeformablePSROIPooling_advanced, DeformablePSROIPoolingLayerTest, testing::Combine(params_advanced, - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_advanced_static)), + testing::Values(ov::element::f32, + ov::element::f16), testing::Values(ov::test::utils::DEVICE_GPU)), DeformablePSROIPoolingLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/depth_to_space.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/depth_to_space.cpp index 4af8f6977de042..ca7ac19ac93d11 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/depth_to_space.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/depth_to_space.cpp @@ -5,43 +5,43 @@ #include #include -#include "single_layer_tests/depth_to_space.hpp" +#include "single_op_tests/depth_to_space.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::opset3; - namespace { -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I16, +using ov::test::DepthToSpaceLayerTest; +using ov::op::v0::DepthToSpace; + +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::u8, + ov::element::i16, }; const std::vector modes = { DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST, DepthToSpace::DepthToSpaceMode::DEPTH_FIRST}; -const std::vector> inputShapesBS2 = { - {1, 4, 1, 1}, {1, 4, 2, 2}, {1, 4, 3, 3}, {2, 32, 3, 3}, {2, 16, 5, 4}, - {1, 8, 1, 1, 1}, {1, 8, 2, 2, 2}, {1, 8, 3, 3, 3}, {2, 32, 3, 3, 3}, {2, 16, 5, 4, 6}}; +const std::vector> inputShapesBS2 = { + {{1, 4, 1, 1}}, {{1, 4, 2, 2}}, {{1, 4, 3, 3}}, {{2, 32, 3, 3}}, {{2, 16, 5, 4}}, + {{1, 8, 1, 1, 1}}, {{1, 8, 2, 2, 2}}, {{1, 8, 3, 3, 3}}, {{2, 32, 3, 3, 3}}, {{2, 16, 5, 4, 6}}}; INSTANTIATE_TEST_SUITE_P(smoke_DepthToSpaceBS2, DepthToSpaceLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapesBS2), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesBS2)), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(modes), ::testing::Values(2), ::testing::Values(ov::test::utils::DEVICE_GPU)), DepthToSpaceLayerTest::getTestCaseName); -const std::vector> inputShapesBS3 = { - {1, 9, 1, 1}, {1, 9, 2, 2}, {1, 9, 3, 3}, {2, 36, 3, 3}, {2, 27, 5, 4}, - {1, 27, 1, 1, 1}, {1, 27, 2, 2, 2}, {1, 27, 3, 3, 3}, {2, 108, 3, 3, 3}, {2, 54, 5, 4, 6}}; +const std::vector> inputShapesBS3 = { + {{1, 9, 1, 1}}, {{1, 9, 2, 2}}, {{1, 9, 3, 3}}, {{2, 36, 3, 3}}, {{2, 27, 5, 4}}, + {{1, 27, 1, 1, 1}}, {{1, 27, 2, 2, 2}}, {{1, 27, 3, 3, 3}}, {{2, 108, 3, 3, 3}}, {{2, 54, 5, 4, 6}}}; INSTANTIATE_TEST_SUITE_P(smoke_DepthToSpaceBS3, DepthToSpaceLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapesBS3), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesBS3)), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(modes), ::testing::Values(3), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp index 8a78eb3611cace..968715cc939acd 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/detection_output.cpp @@ -2,11 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/detection_output.hpp" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/detection_output.hpp" namespace { +using ov::test::DetectionOutputLayerTest; +using ov::test::ParamsWhichSizeDepends; const int numClasses = 11; const int backgroundLabelId = 0; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp index 009d29e94ed1f4..7085bd9e5e6117 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/eltwise.cpp @@ -3,12 +3,16 @@ // #include -#include "single_layer_tests/eltwise.hpp" +#include "single_op_tests/eltwise.hpp" #include "common_test_utils/test_constants.hpp" -using namespace ov::test::subgraph; namespace { +using ov::test::EltwiseLayerTest; +using ov::test::utils::InputLayerType; +using ov::test::utils::OpType; +using ov::test::utils::EltwiseTypes; + std::vector> inShapes = { {{2}}, {{}, {34100}}, @@ -36,9 +40,9 @@ std::vector netPrecisions = { ov::element::i64, }; -std::vector secondaryInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER, +std::vector secondaryInputTypes = { + InputLayerType::CONSTANT, + InputLayerType::PARAMETER, }; std::vector opTypes = { @@ -46,20 +50,20 @@ std::vector opTypes = { ov::test::utils::OpType::VECTOR, }; -std::vector smoke_eltwiseOpTypes = { - ngraph::helpers::EltwiseTypes::ADD, - ngraph::helpers::EltwiseTypes::MULTIPLY, +std::vector smoke_eltwiseOpTypes = { + EltwiseTypes::ADD, + EltwiseTypes::MULTIPLY, }; -std::vector eltwiseOpTypes = { - ngraph::helpers::EltwiseTypes::ADD, - ngraph::helpers::EltwiseTypes::MULTIPLY, - ngraph::helpers::EltwiseTypes::SUBTRACT, - ngraph::helpers::EltwiseTypes::DIVIDE, - ngraph::helpers::EltwiseTypes::FLOOR_MOD, - ngraph::helpers::EltwiseTypes::SQUARED_DIFF, - ngraph::helpers::EltwiseTypes::POWER, - ngraph::helpers::EltwiseTypes::MOD +std::vector eltwiseOpTypes = { + EltwiseTypes::ADD, + EltwiseTypes::MULTIPLY, + EltwiseTypes::SUBTRACT, + EltwiseTypes::DIVIDE, + EltwiseTypes::FLOOR_MOD, + EltwiseTypes::SQUARED_DIFF, + EltwiseTypes::POWER, + EltwiseTypes::MOD }; ov::AnyMap additional_config = {}; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_offsets_sum.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_offsets_sum.cpp index 1614f8e41da72c..268a65df8b350d 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_offsets_sum.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_offsets_sum.cpp @@ -2,30 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/embedding_bag_offsets_sum.hpp" +#include "single_op_tests/embedding_bag_offsets_sum.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::EmbeddingBagOffsetsSumLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; -const std::vector indPrecisions = { - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I32 +const std::vector indPrecisions = { + ov::element::i64, + ov::element::i32 }; -const std::vector> emb_table_shape = { - {5, 6}, - {10, 35}, - {5, 4, 16} +const std::vector> emb_table_shape = { + {{5, 6}}, + {{10, 35}}, + {{5, 4, 16}} }; const std::vector> indices = { @@ -45,13 +44,16 @@ const std::vector with_weights = {false, true}; const std::vector with_default_index = {false, true}; const auto embBagOffsetSumArgSet = ::testing::Combine( - ::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices), - ::testing::ValuesIn(offsets), ::testing::ValuesIn(default_index), - ::testing::ValuesIn(with_weights), ::testing::ValuesIn(with_default_index)); + ::testing::ValuesIn(indices), + ::testing::ValuesIn(offsets), + ::testing::ValuesIn(default_index), + ::testing::ValuesIn(with_weights), + ::testing::ValuesIn(with_default_index)); INSTANTIATE_TEST_SUITE_P( smoke, EmbeddingBagOffsetsSumLayerTest, ::testing::Combine(embBagOffsetSumArgSet, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(emb_table_shape)), ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(indPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_packed_sum.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_packed_sum.cpp index 2da08a1c4ef6b2..a92a10f1c33ca8 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_packed_sum.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_bag_packed_sum.cpp @@ -2,30 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/embedding_bag_packed_sum.hpp" +#include "single_op_tests/embedding_bag_packed_sum.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::EmbeddingBagPackedSumLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, }; -const std::vector indPrecisions = { - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I32 +const std::vector indPrecisions = { + ov::element::i64, + ov::element::i32 }; -const std::vector> emb_table_shape = { - {5, 6}, - {10, 35}, - {5, 4, 16} +const std::vector> emb_table_shape = { + {{5, 6}}, + {{10, 35}}, + {{5, 4, 16}} }; const std::vector>> indices = { @@ -36,12 +35,13 @@ const std::vector>> indices = { const std::vector with_weights = {false, true}; const auto embBagPackedSumArgSet = ::testing::Combine( - ::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices), + ::testing::ValuesIn(indices), ::testing::ValuesIn(with_weights)); INSTANTIATE_TEST_SUITE_P( smoke, EmbeddingBagPackedSumLayerTest, ::testing::Combine(embBagPackedSumArgSet, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(emb_table_shape)), ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(indPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_segments_sum.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_segments_sum.cpp index 09976af24d14a9..cd3a67b6d6a17f 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_segments_sum.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/embedding_segments_sum.cpp @@ -4,27 +4,27 @@ #include -#include "single_layer_tests/embedding_segments_sum.hpp" +#include "single_op_tests/embedding_segments_sum.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; namespace { +using ov::test::EmbeddingSegmentsSumLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; -const std::vector indPrecisions = { - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I32 +const std::vector indPrecisions = { + ov::element::i64, + ov::element::i32 }; -const std::vector> emb_table_shape = { - {5, 6}, - {10, 35}, - {5, 4, 16} +const std::vector> emb_table_shape = { + {{5, 6}}, + {{10, 35}}, + {{5, 4, 16}} }; const std::vector> indices = { {0, 1, 2, 2, 3}, @@ -40,14 +40,18 @@ const std::vector with_weights = {false, true}; const std::vector with_default_index = {false, true}; const auto embSegmentsSumArgSet = ::testing::Combine( - ::testing::ValuesIn(emb_table_shape), ::testing::ValuesIn(indices), - ::testing::ValuesIn(segment_ids), ::testing::ValuesIn(num_segments), - ::testing::ValuesIn(default_index), ::testing::ValuesIn(with_weights), + ::testing::ValuesIn(indices), + ::testing::ValuesIn(segment_ids), + ::testing::ValuesIn(num_segments), + ::testing::ValuesIn(default_index), + ::testing::ValuesIn(with_weights), ::testing::ValuesIn(with_default_index)); INSTANTIATE_TEST_SUITE_P( smoke, EmbeddingSegmentsSumLayerTest, - ::testing::Combine(embSegmentsSumArgSet, ::testing::ValuesIn(netPrecisions), + ::testing::Combine(embSegmentsSumArgSet, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(emb_table_shape)), + ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(indPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), EmbeddingSegmentsSumLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp index d7ddde0f50a754..8492067daeb1f7 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp @@ -4,16 +4,16 @@ #include -#include "single_layer_tests/extract_image_patches.hpp" +#include "single_op_tests/extract_image_patches.hpp" -using namespace LayerTestsDefinitions; -using ngraph::op::PadType; namespace { +using ov::test::ExtractImagePatchesTest; +using ov::op::PadType; -const std::vector> inDataShape = { - {1, 1, 10, 10}, - {1, 3, 10, 10} +const std::vector> inDataShape = { + {{1, 1, 10, 10}}, + {{1, 3, 10, 10}} }; const std::vector> kernels = { {2, 2}, @@ -40,12 +40,12 @@ const std::vector autoPads = { PadType::SAME_UPPER, PadType::SAME_LOWER }; -const std::vector netPrecisions = { - //InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::FP32 +const std::vector netPrecisions = { + //ov::element::i8, + ov::element::u8, + ov::element::i16, + ov::element::i32, + ov::element::f32 }; const auto extractImagePatchesParamsSet = ::testing::Combine( @@ -58,15 +58,12 @@ const auto extractImagePatchesParamsSet = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_layers_GPU, ExtractImagePatchesTest, ::testing::Combine( - ::testing::ValuesIn(inDataShape), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inDataShape)), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), ::testing::ValuesIn(rates), ::testing::ValuesIn(autoPads), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU)), ExtractImagePatchesTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/fake_quantize.cpp index abb061be7e1c6d..44e0fc6d5c2666 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/fake_quantize.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/fake_quantize.cpp @@ -4,46 +4,36 @@ #include -#include "single_layer_tests/fake_quantize.hpp" +#include "single_op_tests/fake_quantize.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::FakeQuantizeLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; -const std::vector> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6}, {1, 2, 3, 4, 2, 3, 2, 2}}; +const std::vector> inputShapes = {{{1, 1, 1, 1}}, {{3, 10, 5, 6}}, {{1, 2, 3, 4, 2, 3, 2, 2}}}; const std::vector> constShapes = {{1}}; const std::vector levels = {16, 255, 256}; -const std::pair> config = {}; const std::vector fqArgs = {}; -const std::vector inputParams = {}; - const auto fqParams = ::testing::Combine( ::testing::ValuesIn(levels), ::testing::ValuesIn(constShapes), ::testing::Values(fqArgs), - ::testing::Values(inputParams), - ::testing::Values(ngraph::op::AutoBroadcastType::NUMPY) + ::testing::Values(ov::op::AutoBroadcastType::NUMPY) ); INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest, ::testing::Combine( fqParams, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values(config)), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes)), + ::testing::Values(ov::test::utils::DEVICE_GPU)), FakeQuantizeLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp index 04b501af6d6a51..0573d427d8a419 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather.cpp @@ -4,40 +4,44 @@ #include -#include "single_layer_tests/gather.hpp" +#include "single_op_tests/gather.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::GatherLayerTest; +using ov::test::Gather7LayerTest; +using ov::test::Gather8LayerTest; +using ov::test::gather7ParamsTuple; +using ov::test::Gather8IndiceScalarLayerTest; +using ov::test::Gather8withIndicesDataLayerTest; -const std::vector netPrecisionsFP32 = { - InferenceEngine::Precision::FP32, +const std::vector netPrecisionsFP32 = { + ov::element::f32, }; -const std::vector netPrecisionsI32 = { - InferenceEngine::Precision::I32, +const std::vector netPrecisionsI32 = { + ov::element::i32, }; -const std::vector netPrecisionsFP16 = { - InferenceEngine::Precision::FP16, +const std::vector netPrecisionsFP16 = { + ov::element::f16, }; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i32, }; -const std::vector> indicesShapes2 = { - std::vector{2, 2}, - std::vector{2, 2, 2}, - std::vector{2, 4}, +const std::vector indicesShapes2 = { + {2, 2}, + {2, 2, 2}, + {2, 4}, }; -const std::vector> indicesShapes23 = { - std::vector{2, 3, 2}, - std::vector{2, 3, 4}, +const std::vector indicesShapes23 = { + {2, 3, 2}, + {2, 3, 4}, }; const std::vector> axis_batch41 = { @@ -50,79 +54,63 @@ const std::vector> axis_batch42 = { std::tuple(4, 2), }; -const std::vector> inputShapesAxes4b1 = { - std::vector{2, 6, 7, 8, 9}, - std::vector{2, 1, 7, 8, 9}, - std::vector{2, 1, 1, 8, 9}, - std::vector{2, 6, 1, 4, 9}, - std::vector{2, 6, 7, 4, 1}, - std::vector{2, 6, 1, 8, 9}, - std::vector{2, 1, 7, 1, 9}, - std::vector{2, 6, 1, 8, 4}, - std::vector{2, 6, 7, 4, 9}, - std::vector{2, 1, 7, 8, 4}, - std::vector{2, 6, 7, 8, 4}, -}; - -const std::vector> inputShapesAxes4b2 = { - std::vector{2, 3, 7, 8, 9}, - std::vector{2, 3, 7, 6, 9}, - std::vector{2, 3, 9, 8, 9}, - std::vector{2, 3, 9, 4, 9}, - std::vector{2, 3, 7, 4, 2}, - std::vector{2, 3, 5, 8, 9}, - std::vector{2, 3, 7, 2, 9}, - std::vector{2, 3, 9, 8, 4}, - std::vector{2, 3, 7, 4, 9}, - std::vector{2, 3, 7, 5, 4}, - std::vector{2, 3, 7, 8, 4}, +const std::vector> inputShapesAxes4b1 = { + {{2, 6, 7, 8, 9}}, + {{2, 1, 7, 8, 9}}, + {{2, 1, 1, 8, 9}}, + {{2, 6, 1, 4, 9}}, + {{2, 6, 7, 4, 1}}, + {{2, 6, 1, 8, 9}}, + {{2, 1, 7, 1, 9}}, + {{2, 6, 1, 8, 4}}, + {{2, 6, 7, 4, 9}}, + {{2, 1, 7, 8, 4}}, + {{2, 6, 7, 8, 4}}, +}; + +const std::vector> inputShapesAxes4b2 = { + {{2, 3, 7, 8, 9}}, + {{2, 3, 7, 6, 9}}, + {{2, 3, 9, 8, 9}}, + {{2, 3, 9, 4, 9}}, + {{2, 3, 7, 4, 2}}, + {{2, 3, 5, 8, 9}}, + {{2, 3, 7, 2, 9}}, + {{2, 3, 9, 8, 4}}, + {{2, 3, 7, 4, 9}}, + {{2, 3, 7, 5, 4}}, + {{2, 3, 7, 8, 4}}, }; const auto GatherIndiceScalar = []() { - return testing::Combine(testing::Values(std::vector{1, 3, 4, 5}), - testing::Values(std::vector{}), + return testing::Combine(testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{1, 3, 4, 5}}))), + testing::Values(ov::Shape({})), testing::Values(std::tuple(2, 0)), - testing::Values(InferenceEngine::Precision::FP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; const auto GatherAxes4i4b1 = []() { - return testing::Combine(testing::ValuesIn(inputShapesAxes4b1), + return testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes4b1)), testing::ValuesIn(indicesShapes2), testing::ValuesIn(axis_batch41), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), testing::Values(ov::test::utils::DEVICE_GPU)); }; const auto GatherAxes4i8b1 = []() { - return testing::Combine(testing::ValuesIn(inputShapesAxes4b1), + return testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes4b1)), testing::ValuesIn(indicesShapes2), testing::ValuesIn(axis_batch41), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), testing::Values(ov::test::utils::DEVICE_GPU)); }; const auto GatherAxes4i8b2 = []() { - return testing::Combine(testing::ValuesIn(inputShapesAxes4b2), + return testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes4b2)), testing::ValuesIn(indicesShapes23), testing::ValuesIn(axis_batch42), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -190,53 +178,37 @@ INSTANTIATE_TEST_SUITE_P( ); const std::vector> indices = { - std::vector{0, 3, 2, 1}, + {0, 3, 2, 1}, }; -const std::vector> indicesShapes12 = { - std::vector{4}, - std::vector{2, 2} +const std::vector indicesShapes12 = { + {4}, + {2, 2} }; -const std::vector> indicesShapes1 = { - std::vector{4}, +const std::vector indicesShapes1 = { + {4}, }; -const std::vector> inputShapes6DAxes5 = { - std::vector{5, 6, 7, 8, 9, 10}, - std::vector{1, 1, 7, 8, 9, 10}, - std::vector{5, 1, 1, 8, 9, 10}, - std::vector{5, 6, 1, 1, 9, 10}, - std::vector{5, 6, 7, 1, 1, 10}, - std::vector{1, 6, 1, 8, 9, 10}, - std::vector{5, 1, 7, 1, 9, 10}, - std::vector{5, 6, 1, 8, 1, 10}, - std::vector{1, 6, 7, 1, 9, 10}, - std::vector{5, 1, 7, 8, 1, 10}, - std::vector{1, 6, 7, 8, 1, 10}, +const std::vector> inputShapesAxes4 = { + {{5, 6, 7, 8, 9}}, + {{1, 6, 7, 8, 9}}, + {{5, 1, 7, 8, 9}}, + {{5, 6, 1, 8, 9}}, + {{5, 6, 7, 1, 9}}, }; -const std::vector axes5 = {5}; - -const std::vector> inputShapesAxes4 = { - std::vector{5, 6, 7, 8, 9}, - std::vector{1, 6, 7, 8, 9}, - std::vector{5, 1, 7, 8, 9}, - std::vector{5, 6, 1, 8, 9}, - std::vector{5, 6, 7, 1, 9}, -}; - -const std::vector> inputShapes6DAxes4 = { - std::vector{5, 6, 7, 8, 9, 10}, - std::vector{1, 1, 7, 8, 9, 10}, - std::vector{5, 1, 1, 8, 9, 10}, - std::vector{5, 6, 1, 1, 9, 10}, - std::vector{5, 6, 7, 1, 9, 1}, - std::vector{1, 6, 1, 8, 9, 10}, - std::vector{5, 1, 7, 1, 9, 10}, - std::vector{5, 6, 1, 8, 9, 1}, - std::vector{1, 6, 7, 1, 9, 10}, - std::vector{5, 1, 7, 8, 9, 1}, - std::vector{1, 6, 7, 8, 9, 1}, +const std::vector> inputShapes6DAxes4 = { + {{5, 6, 7, 8, 9, 10}}, + {{1, 1, 7, 8, 9, 10}}, + {{5, 1, 1, 8, 9, 10}}, + {{5, 6, 1, 1, 9, 10}}, + {{5, 6, 7, 1, 9, 1}}, + {{1, 6, 1, 8, 9, 10}}, + {{5, 1, 7, 1, 9, 10}}, + {{5, 6, 1, 8, 9, 1}}, + {{1, 6, 7, 1, 9, 10}}, + {{5, 1, 7, 8, 9, 1}}, + {{1, 6, 7, 8, 9, 1}}, }; const std::vector axes4 = {4}; @@ -245,12 +217,8 @@ const auto GatherAxes4 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes12), testing::ValuesIn(axes4), - testing::ValuesIn(inputShapesAxes4), - testing::ValuesIn(netPrecisionsFP16), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes4)), + testing::Values(ov::element::f16), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -265,12 +233,8 @@ const auto Gather6dAxes4 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes1), testing::ValuesIn(axes4), - testing::ValuesIn(inputShapes6DAxes4), - testing::ValuesIn(netPrecisionsFP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes6DAxes4)), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -281,30 +245,30 @@ INSTANTIATE_TEST_SUITE_P( GatherLayerTest::getTestCaseName ); -const std::vector> inputShapesAxes3 = { - std::vector{5, 6, 7, 8}, - std::vector{1, 6, 7, 8}, - std::vector{5, 1, 7, 8}, - std::vector{5, 6, 1, 8}, - std::vector{5, 6, 7, 8, 9}, - std::vector{1, 6, 7, 8, 9}, - std::vector{5, 1, 7, 8, 9}, - std::vector{5, 6, 1, 8, 9}, - std::vector{5, 6, 7, 8, 1}, -}; - -const std::vector> inputShapes6DAxes3 = { - std::vector{5, 6, 7, 8, 9, 10}, - std::vector{1, 1, 7, 8, 9, 10}, - std::vector{5, 1, 1, 8, 9, 10}, - std::vector{5, 6, 1, 8, 1, 10}, - std::vector{5, 6, 7, 8, 1, 1}, - std::vector{1, 6, 1, 8, 9, 10}, - std::vector{5, 1, 7, 8, 1, 10}, - std::vector{5, 6, 1, 8, 9, 1}, - std::vector{1, 6, 7, 8, 1, 10}, - std::vector{5, 1, 7, 8, 9, 1}, - std::vector{1, 6, 7, 8, 9, 1}, +const std::vector> inputShapesAxes3 = { + {{5, 6, 7, 8}}, + {{1, 6, 7, 8}}, + {{5, 1, 7, 8}}, + {{5, 6, 1, 8}}, + {{5, 6, 7, 8, 9}}, + {{1, 6, 7, 8, 9}}, + {{5, 1, 7, 8, 9}}, + {{5, 6, 1, 8, 9}}, + {{5, 6, 7, 8, 1}}, +}; + +const std::vector> inputShapes6DAxes3 = { + {{5, 6, 7, 8, 9, 10}}, + {{1, 1, 7, 8, 9, 10}}, + {{5, 1, 1, 8, 9, 10}}, + {{5, 6, 1, 8, 1, 10}}, + {{5, 6, 7, 8, 1, 1}}, + {{1, 6, 1, 8, 9, 10}}, + {{5, 1, 7, 8, 1, 10}}, + {{5, 6, 1, 8, 9, 1}}, + {{1, 6, 7, 8, 1, 10}}, + {{5, 1, 7, 8, 9, 1}}, + {{1, 6, 7, 8, 9, 1}}, }; const std::vector axes3 = {3}; @@ -313,12 +277,8 @@ const auto GatherAxes3 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes12), testing::ValuesIn(axes3), - testing::ValuesIn(inputShapesAxes3), - testing::ValuesIn(netPrecisionsFP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes3)), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -333,12 +293,8 @@ const auto Gather6dAxes3 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes1), testing::ValuesIn(axes3), - testing::ValuesIn(inputShapes6DAxes3), - testing::ValuesIn(netPrecisionsI32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes6DAxes3)), + testing::Values(ov::element::i32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -349,31 +305,31 @@ INSTANTIATE_TEST_SUITE_P( GatherLayerTest::getTestCaseName ); -const std::vector> inputShapesAxes2 = { - std::vector{5, 6, 7}, - std::vector{5, 6, 7, 8}, - std::vector{1, 6, 7, 8}, - std::vector{5, 1, 7, 8}, - std::vector{5, 6, 7, 1}, - std::vector{5, 6, 7, 8, 9}, - std::vector{1, 6, 7, 8, 9}, - std::vector{5, 1, 7, 8, 9}, - std::vector{5, 6, 7, 1, 9}, - std::vector{5, 6, 7, 8, 1}, -}; - -const std::vector> inputShapes6DAxes2 = { - std::vector{5, 6, 7, 8, 9, 10}, - std::vector{1, 1, 7, 8, 9, 10}, - std::vector{5, 1, 7, 1, 9, 10}, - std::vector{5, 6, 7, 1, 1, 10}, - std::vector{5, 6, 7, 8, 1, 1}, - std::vector{1, 6, 7, 1, 9, 10}, - std::vector{5, 1, 7, 8, 1, 10}, - std::vector{5, 6, 7, 1, 9, 1}, - std::vector{1, 6, 7, 8, 1, 10}, - std::vector{5, 1, 7, 8, 9, 1}, - std::vector{1, 6, 7, 8, 9, 1}, +const std::vector> inputShapesAxes2 = { + {{5, 6, 7}}, + {{5, 6, 7, 8}}, + {{1, 6, 7, 8}}, + {{5, 1, 7, 8}}, + {{5, 6, 7, 1}}, + {{5, 6, 7, 8, 9}}, + {{1, 6, 7, 8, 9}}, + {{5, 1, 7, 8, 9}}, + {{5, 6, 7, 1, 9}}, + {{5, 6, 7, 8, 1}}, +}; + +const std::vector> inputShapes6DAxes2 = { + {{5, 6, 7, 8, 9, 10}}, + {{1, 1, 7, 8, 9, 10}}, + {{5, 1, 7, 1, 9, 10}}, + {{5, 6, 7, 1, 1, 10}}, + {{5, 6, 7, 8, 1, 1}}, + {{1, 6, 7, 1, 9, 10}}, + {{5, 1, 7, 8, 1, 10}}, + {{5, 6, 7, 1, 9, 1}}, + {{1, 6, 7, 8, 1, 10}}, + {{5, 1, 7, 8, 9, 1}}, + {{1, 6, 7, 8, 9, 1}}, }; const std::vector axes2 = {2}; @@ -382,12 +338,8 @@ const auto GatherAxes2 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes12), testing::ValuesIn(axes2), - testing::ValuesIn(inputShapesAxes2), - testing::ValuesIn(netPrecisionsFP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes2)), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -402,12 +354,8 @@ const auto Gather6dAxes2 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes1), testing::ValuesIn(axes2), - testing::ValuesIn(inputShapes6DAxes2), - testing::ValuesIn(netPrecisionsFP16), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes6DAxes2)), + testing::Values(ov::element::f16), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -418,32 +366,32 @@ INSTANTIATE_TEST_SUITE_P( GatherLayerTest::getTestCaseName ); -const std::vector> inputShapesAxes1 = { - std::vector{5, 6}, - std::vector{5, 6, 7}, - std::vector{5, 6, 7, 8}, - std::vector{1, 6, 7, 8}, - std::vector{5, 6, 1, 8}, - std::vector{5, 6, 7, 1}, - std::vector{5, 6, 7, 8, 9}, - std::vector{1, 6, 7, 8, 9}, - std::vector{5, 6, 1, 8, 9}, - std::vector{5, 6, 7, 1, 9}, - std::vector{5, 6, 7, 8, 1}, -}; - -const std::vector> inputShapes6DAxes1 = { - std::vector{5, 6, 7, 8, 9, 10}, - std::vector{1, 6, 1, 8, 9, 10}, - std::vector{5, 6, 1, 1, 9, 10}, - std::vector{5, 6, 7, 1, 1, 10}, - std::vector{5, 6, 7, 8, 1, 1}, - std::vector{1, 6, 7, 1, 9, 10}, - std::vector{5, 6, 1, 8, 1, 10}, - std::vector{5, 6, 1, 8, 9, 1}, - std::vector{1, 6, 7, 8, 1, 10}, - std::vector{1, 6, 7, 8, 9, 1}, - std::vector{5, 6, 7, 1, 9, 1}, +const std::vector> inputShapesAxes1 = { + {{5, 6}}, + {{5, 6, 7}}, + {{5, 6, 7, 8}}, + {{1, 6, 7, 8}}, + {{5, 6, 1, 8}}, + {{5, 6, 7, 1}}, + {{5, 6, 7, 8, 9}}, + {{1, 6, 7, 8, 9}}, + {{5, 6, 1, 8, 9}}, + {{5, 6, 7, 1, 9}}, + {{5, 6, 7, 8, 1}}, +}; + +const std::vector> inputShapes6DAxes1 = { + {{5, 6, 7, 8, 9, 10}}, + {{1, 6, 1, 8, 9, 10}}, + {{5, 6, 1, 1, 9, 10}}, + {{5, 6, 7, 1, 1, 10}}, + {{5, 6, 7, 8, 1, 1}}, + {{1, 6, 7, 1, 9, 10}}, + {{5, 6, 1, 8, 1, 10}}, + {{5, 6, 1, 8, 9, 1}}, + {{1, 6, 7, 8, 1, 10}}, + {{1, 6, 7, 8, 9, 1}}, + {{5, 6, 7, 1, 9, 1}}, }; const std::vector axes1 = {1}; @@ -452,12 +400,8 @@ const auto GatherAxes1 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes12), testing::ValuesIn(axes1), - testing::ValuesIn(inputShapesAxes1), - testing::ValuesIn(netPrecisionsI32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes1)), + testing::Values(ov::element::i32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -472,12 +416,8 @@ const auto Gather6dAxes1 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes1), testing::ValuesIn(axes1), - testing::ValuesIn(inputShapes6DAxes1), - testing::ValuesIn(netPrecisionsFP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes6DAxes1)), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -488,33 +428,33 @@ INSTANTIATE_TEST_SUITE_P( GatherLayerTest::getTestCaseName ); -const std::vector> inputShapesAxes0 = { - std::vector{5}, - std::vector{5, 6}, - std::vector{5, 6, 7}, - std::vector{5, 6, 7, 8}, - std::vector{5, 1, 7, 8}, - std::vector{5, 6, 1, 8}, - std::vector{5, 6, 7, 1}, - std::vector{5, 6, 7, 8, 9}, - std::vector{5, 1, 7, 8, 9}, - std::vector{5, 6, 1, 8, 9}, - std::vector{5, 6, 7, 1, 9}, - std::vector{5, 6, 7, 8, 1}, -}; - -const std::vector> inputShapes6DAxes0 = { - std::vector{5, 6, 7, 8, 9, 10}, - std::vector{5, 1, 1, 8, 9, 10}, - std::vector{5, 6, 1, 1, 9, 10}, - std::vector{5, 6, 7, 1, 1, 10}, - std::vector{5, 6, 7, 8, 1, 1}, - std::vector{5, 1, 7, 1, 9, 10}, - std::vector{5, 6, 1, 8, 1, 10}, - std::vector{5, 6, 1, 8, 9, 1}, - std::vector{5, 1, 7, 8, 1, 10}, - std::vector{5, 1, 7, 8, 9, 1}, - std::vector{5, 6, 7, 1, 9, 1}, +const std::vector> inputShapesAxes0 = { + {{5}}, + {{5, 6}}, + {{5, 6, 7}}, + {{5, 6, 7, 8}}, + {{5, 1, 7, 8}}, + {{5, 6, 1, 8}}, + {{5, 6, 7, 1}}, + {{5, 6, 7, 8, 9}}, + {{5, 1, 7, 8, 9}}, + {{5, 6, 1, 8, 9}}, + {{5, 6, 7, 1, 9}}, + {{5, 6, 7, 8, 1}}, +}; + +const std::vector> inputShapes6DAxes0 = { + {{5, 6, 7, 8, 9, 10}}, + {{5, 1, 1, 8, 9, 10}}, + {{5, 6, 1, 1, 9, 10}}, + {{5, 6, 7, 1, 1, 10}}, + {{5, 6, 7, 8, 1, 1}}, + {{5, 1, 7, 1, 9, 10}}, + {{5, 6, 1, 8, 1, 10}}, + {{5, 6, 1, 8, 9, 1}}, + {{5, 1, 7, 8, 1, 10}}, + {{5, 1, 7, 8, 9, 1}}, + {{5, 6, 7, 1, 9, 1}}, }; const std::vector axes0 = {0}; @@ -523,12 +463,8 @@ const auto GatherAxes0 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes12), testing::ValuesIn(axes0), - testing::ValuesIn(inputShapesAxes0), - testing::ValuesIn(netPrecisionsFP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesAxes0)), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -543,12 +479,8 @@ const auto Gather6dAxes0 = []() { return testing::Combine(testing::ValuesIn(indices), testing::ValuesIn(indicesShapes1), testing::ValuesIn(axes0), - testing::ValuesIn(inputShapes6DAxes0), - testing::ValuesIn(netPrecisionsFP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes6DAxes0)), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -560,14 +492,10 @@ INSTANTIATE_TEST_SUITE_P( ); const auto GatherAxes0Optimized = []() { - return testing::Combine(testing::ValuesIn({std::vector{4, 8, 2, 2}}), - testing::ValuesIn({std::vector{}}), - testing::ValuesIn({std::tuple{0, 0}}), - testing::ValuesIn(netPrecisionsFP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), + return testing::Combine(testing::ValuesIn(ov::test::static_shapes_to_test_representation(std::vector>({{{4, 8, 2, 2}}}))), + testing::Values(ov::Shape({})), + testing::Values(std::tuple(0, 0)), + testing::Values(ov::element::f32), testing::Values(ov::test::utils::DEVICE_GPU)); }; @@ -579,18 +507,14 @@ INSTANTIATE_TEST_SUITE_P( ); gather7ParamsTuple dummyParams = { - std::vector{2, 3}, - std::vector{2, 2}, + ov::test::static_shapes_to_test_representation(std::vector({{2, 3}})), + ov::Shape({2, 2}), std::tuple{1, 1}, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::element::f32, ov::test::utils::DEVICE_GPU, }; -std::vector> indicesData = { +std::vector> indicesData = { {0, 1, 2, 0}, // positive in bound {-1, -2, -3, -1}, // negative in bound {-1, 0, 1, 2}, // positive and negative in bound @@ -604,6 +528,10 @@ const auto gatherWithIndicesParams = testing::Combine( testing::ValuesIn(indicesData) ); -INSTANTIATE_TEST_CASE_P(smoke, Gather8withIndicesDataLayerTest, gatherWithIndicesParams, Gather8withIndicesDataLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke, + Gather8withIndicesDataLayerTest, + gatherWithIndicesParams, + Gather8withIndicesDataLayerTest::getTestCaseName +); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_elements.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_elements.cpp index dfff95ef67bc9b..ceb5b433de6a54 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_elements.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_elements.cpp @@ -5,222 +5,220 @@ #include #include -#include "single_layer_tests/gather_elements.hpp" +#include "single_op_tests/gather_elements.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::opset6; - namespace { +using ov::test::GatherElementsLayerTest; -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, +const std::vector model_types = { + ov::element::f32, + ov::element::f16, + ov::element::i32, }; -const std::vector idxPrecisions = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, +const std::vector indices_types = { + ov::element::i32, + ov::element::i64, }; INSTANTIATE_TEST_SUITE_P(smoke_set1, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector({2, 2})), - ::testing::Values(std::vector({2, 2})), - ::testing::ValuesIn(std::vector({-1, 0, 1})), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 2}})), + ::testing::Values(ov::Shape{2, 2}), + ::testing::ValuesIn(std::vector{-1, 0, 1}), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_set2, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector({2, 2, 1})), - ::testing::Values(std::vector({4, 2, 1})), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 2, 1}})), + ::testing::Values(ov::Shape{4, 2, 1}), ::testing::ValuesIn(std::vector({0, -3})), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_set3, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector({2, 2, 3, 5})), - ::testing::Values(std::vector({2, 2, 3, 7})), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 2, 3, 5}})), + ::testing::Values(ov::Shape{2, 2, 3, 7}), ::testing::Values(3, -1), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_set4, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector({3, 2, 3, 8})), - ::testing::Values(std::vector({2, 2, 3, 8})), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{3, 2, 3, 8}})), + ::testing::Values(ov::Shape{2, 2, 3, 8}), ::testing::Values(0, -4), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_set5, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector({3, 2, 3, 4, 8})), - ::testing::Values(std::vector({3, 2, 3, 5, 8})), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{3, 2, 3, 4, 8}})), + ::testing::Values(ov::Shape{3, 2, 3, 5, 8}), ::testing::Values(3, -2), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank4axis0, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{7, 7, 8, 4}), - ::testing::Values(std::vector{2, 7, 8, 4}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{7, 7, 8, 4}})), + ::testing::Values(ov::Shape{2, 7, 8, 4}), ::testing::Values(0), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank4axis1, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{6, 1, 8, 4}), - ::testing::Values(std::vector{6, 8, 8, 4}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{6, 1, 8, 4}})), + ::testing::Values(ov::Shape{6, 8, 8, 4}), ::testing::Values(1, -3), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank4axis2, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{6, 7, 4, 4}), - ::testing::Values(std::vector{6, 7, 2, 4}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{6, 7, 4, 4}})), + ::testing::Values(ov::Shape{6, 7, 2, 4}), ::testing::Values(2, -2), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank4axis3, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{6, 5, 8, 7}), - ::testing::Values(std::vector{6, 5, 8, 7}), - ::testing::Values(3, -1), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{6, 5, 8, 7}})), + ::testing::Values(ov::Shape{6, 5, 8, 7}), + ::testing::Values(1, -3), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank5axis0, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{2, 3, 9, 4, 9}), - ::testing::Values(std::vector{1, 3, 9, 4, 9}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 3, 9, 4, 9}})), + ::testing::Values(ov::Shape{1, 3, 9, 4, 9}), ::testing::Values(0), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank5axis1, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{2, 3, 5, 4, 7}), - ::testing::Values(std::vector{2, 9, 5, 4, 7}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 3, 5, 4, 7}})), + ::testing::Values(ov::Shape{2, 9, 5, 4, 7}), ::testing::Values(1, -4), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank5axis2, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{1, 2, 6, 8, 9}), - ::testing::Values(std::vector{1, 2, 6, 8, 9}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{1, 2, 6, 8, 9}})), + ::testing::Values(ov::Shape{1, 2, 6, 8, 9}), ::testing::Values(2, -3), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank5axis3, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{2, 2, 4, 7, 7}), - ::testing::Values(std::vector{2, 2, 4, 3, 7}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 2, 4, 7, 7}})), + ::testing::Values(ov::Shape{2, 2, 4, 3, 7}), ::testing::Values(3, -2), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank5axis4, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{1, 3, 9, 3, 2}), - ::testing::Values(std::vector{1, 3, 9, 3, 9}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{1, 3, 9, 3, 2}})), + ::testing::Values(ov::Shape{1, 3, 9, 3, 9}), ::testing::Values(4, -1), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank6axis0, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{3, 3, 2, 4, 4, 3}), - ::testing::Values(std::vector{7, 3, 2, 4, 4, 3}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{3, 3, 2, 4, 4, 3}})), + ::testing::Values(ov::Shape{7, 3, 2, 4, 4, 3}), ::testing::Values(0), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank6axis1, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{1, 6, 2, 3, 5, 9}), - ::testing::Values(std::vector{1, 6, 2, 3, 5, 9}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{1, 6, 2, 3, 5, 9}})), + ::testing::Values(ov::Shape{1, 6, 2, 3, 5, 9}), ::testing::Values(1, -5), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank6axis2, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{2, 3, 9, 7, 2, 1}), - ::testing::Values(std::vector{2, 3, 5, 7, 2, 1}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 3, 9, 7, 2, 1}})), + ::testing::Values(ov::Shape{2, 3, 5, 7, 2, 1}), ::testing::Values(2, -4), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank6axis3, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{1, 3, 4, 5, 1, 3}), - ::testing::Values(std::vector{1, 3, 4, 4, 1, 3}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{1, 3, 4, 5, 1, 3}})), + ::testing::Values(ov::Shape{1, 3, 4, 4, 1, 3}), ::testing::Values(3, -3), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank6axis4, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{1, 3, 2, 4, 3, 3}), - ::testing::Values(std::vector{1, 3, 2, 4, 6, 3}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{1, 3, 2, 4, 3, 3}})), + ::testing::Values(ov::Shape{1, 3, 2, 4, 6, 3}), ::testing::Values(4, -2), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherElements_rank6axis5, GatherElementsLayerTest, ::testing::Combine( - ::testing::Values(std::vector{2, 1, 7, 8, 1, 6}), - ::testing::Values(std::vector{2, 1, 7, 8, 1, 5}), + ::testing::Values(ov::test::static_shapes_to_test_representation({ov::Shape{2, 1, 7, 8, 1, 6}})), + ::testing::Values(ov::Shape{2, 1, 7, 8, 1, 5}), ::testing::Values(5, -1), - ::testing::ValuesIn(inputPrecisions), - ::testing::ValuesIn(idxPrecisions), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(indices_types), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherElementsLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_nd.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_nd.cpp index f2c7896b7bcc51..78ba9bf3e2609f 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_nd.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_nd.cpp @@ -4,107 +4,103 @@ #include -#include "single_layer_tests/gather_nd.hpp" +#include "single_op_tests/gather_nd.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::opset5; - namespace { +using ov::test::GatherNDLayerTest; +using ov::test::GatherND8LayerTest; -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i32, }; -const std::vector idxPrecisions = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, +const std::vector idxPrecisions = { + ov::element::i32, + ov::element::i64, }; // set1 -const auto gatherNDArgsSubset1 = ::testing::Combine( - ::testing::ValuesIn(std::vector>( - { {2, 2}, {2, 3, 4} })), // Data shape - ::testing::ValuesIn(std::vector>( - { {2, 1}, {2, 1, 1} })), // Indices shape - ::testing::ValuesIn(std::vector({ 0, 1 })) // Batch dims -); +std::vector> shapes_subset1_static = std::vector>({ + {{2, 2}}, {{2, 3, 4}} }); +std::vector indices_subset1_static = std::vector({ + {2, 1}, {2, 1, 1}}); // set2 -const auto gatherNDArgsSubset2 = ::testing::Combine( - ::testing::ValuesIn(std::vector>( - { {15, 12, 20, 15, 2}, {15, 12, 18, 7, 17} })), // Data shape - ::testing::ValuesIn(std::vector>( - { {15, 12, 2}, {15, 12, 5, 9, 1, 3} })), // Indices shape - ::testing::ValuesIn(std::vector({ 1, 2 })) // Batch dims -); +std::vector> shapes_subset2_static = std::vector>({ + {{15, 12, 20, 15, 2}}, {{15, 12, 18, 7, 17}}}); +std::vector indices_subset2_static = std::vector({ + {15, 12, 2}, {15, 12, 5, 9, 1, 3}}); // set3 -const auto gatherNDArgsSubset3 = ::testing::Combine( - ::testing::ValuesIn(std::vector>( - { {4, 3, 2, 5, 5, 2}, {4, 3, 2, 5, 7, 2} })), // Data shape - ::testing::ValuesIn(std::vector>( - { {4, 3, 2, 5, 1}, {4, 3, 2, 5, 6, 2} })), // Indices shape - ::testing::ValuesIn(std::vector({ 3, 4 })) // Batch dims -); +std::vector> shapes_subset3_static = std::vector>({ + {{4, 3, 2, 5, 5, 2}}, {{4, 3, 2, 5, 7, 2}} }); +std::vector indices_subset3_static = std::vector({ + {4, 3, 2, 5, 1}, {4, 3, 2, 5, 6, 2}}); // -------------------------------- V5 -------------------------------- INSTANTIATE_TEST_SUITE_P(smoke_GatherND5_set1, GatherNDLayerTest, ::testing::Combine( - gatherNDArgsSubset1, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_subset1_static)), + ::testing::ValuesIn(std::vector(indices_subset1_static)), + ::testing::ValuesIn(std::vector({ 0, 1 })), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values({})), + ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherNDLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherND5_set2, GatherNDLayerTest, ::testing::Combine( - gatherNDArgsSubset2, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_subset2_static)), + ::testing::ValuesIn(std::vector(indices_subset2_static)), + ::testing::ValuesIn(std::vector({ 1, 2 })), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values({})), + ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherNDLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherND5_set3, GatherNDLayerTest, ::testing::Combine( - gatherNDArgsSubset3, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_subset3_static)), + ::testing::ValuesIn(std::vector(indices_subset3_static)), + ::testing::ValuesIn(std::vector({ 3, 4 })), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values({})), + ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherNDLayerTest::getTestCaseName); // -------------------------------- V8 -------------------------------- INSTANTIATE_TEST_SUITE_P(smoke_GatherND8_set1, GatherND8LayerTest, ::testing::Combine( - gatherNDArgsSubset1, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_subset1_static)), + ::testing::ValuesIn(std::vector(indices_subset1_static)), + ::testing::ValuesIn(std::vector({ 0, 1 })), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values({})), + ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherND8LayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherND8_set2, GatherND8LayerTest, ::testing::Combine( - gatherNDArgsSubset2, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_subset2_static)), + ::testing::ValuesIn(std::vector(indices_subset2_static)), + ::testing::ValuesIn(std::vector({ 1, 2 })), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values({})), + ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherND8LayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GatherND8_set3, GatherND8LayerTest, ::testing::Combine( - gatherNDArgsSubset3, + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(shapes_subset3_static)), + ::testing::ValuesIn(std::vector(indices_subset3_static)), + ::testing::ValuesIn(std::vector({ 3, 4 })), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values({})), + ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherND8LayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_tree.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_tree.cpp index 1cccdf322512d6..e72acb1ce9e7df 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_tree.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/gather_tree.cpp @@ -4,24 +4,24 @@ #include -#include "single_layer_tests/gather_tree.hpp" +#include "single_op_tests/gather_tree.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::GatherTreeLayerTest; +using ov::test::utils::InputLayerType; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i32 }; -const std::vector> inputShapes = { {5, 1, 10}, {1, 1, 10}, {20, 1, 10}, {20, 20, 10} }; +const std::vector inputShapes = { {5, 1, 10}, {1, 1, 10}, {20, 1, 10}, {20, 20, 10} }; -const std::vector secondaryInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER +const std::vector secondaryInputTypes = { + InputLayerType::CONSTANT, + InputLayerType::PARAMETER }; INSTANTIATE_TEST_SUITE_P(Basic_smoke, GatherTreeLayerTest, @@ -29,10 +29,6 @@ INSTANTIATE_TEST_SUITE_P(Basic_smoke, GatherTreeLayerTest, ::testing::ValuesIn(inputShapes), ::testing::ValuesIn(secondaryInputTypes), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU)), GatherTreeLayerTest::getTestCaseName); From 2e76be18e22138e3ec876e6b66db201ffbe3ab62 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Thu, 26 Oct 2023 18:07:13 +0100 Subject: [PATCH 083/275] [CPU] Bitwise operations (reference implementation) (#20474) --- src/plugins/intel_cpu/src/cpu_types.cpp | 8 + src/plugins/intel_cpu/src/cpu_types.h | 4 + src/plugins/intel_cpu/src/nodes/eltwise.cpp | 364 +++++++++++++----- .../single_layer_tests/classes/eltwise.cpp | 95 +++-- .../instances/common/eltwise.cpp | 89 +++++ .../functional/test_utils/cpu_test_utils.cpp | 22 +- .../src/base/utils/compare_results.cpp | 1 + .../ov_helpers/ov_models/src/eltwise.cpp | 9 + .../ov_helpers/ov_models/src/input_layer.cpp | 3 +- .../include/common_test_utils/test_enums.hpp | 6 +- .../common_test_utils/src/test_enums.cpp | 12 + .../skip_configs/CPU/expected_failures_OP.csv | 4 - 12 files changed, 487 insertions(+), 130 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 6f5a84701b184d..139685f5882103 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -70,6 +70,10 @@ static const TypeToNameMap& get_type_to_name_tbl() { { "SoftSign", Type::Eltwise }, { "Select", Type::Eltwise}, { "Log", Type::Eltwise }, + { "BitwiseAnd", Type::Eltwise }, + { "BitwiseNot", Type::Eltwise }, + { "BitwiseOr", Type::Eltwise }, + { "BitwiseXor", Type::Eltwise }, { "Reshape", Type::Reshape }, { "Squeeze", Type::Reshape }, { "Unsqueeze", Type::Reshape }, @@ -386,6 +390,10 @@ std::string algToString(const Algorithm alg) { CASE(EltwiseErf); CASE(EltwiseSoftSign); CASE(EltwiseLog); + CASE(EltwiseBitwiseAnd); + CASE(EltwiseBitwiseNot); + CASE(EltwiseBitwiseOr); + CASE(EltwiseBitwiseXor); CASE(FQCommon); CASE(FQQuantization); CASE(FQBinarization); diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 9afbe2d7485ddd..8b38ed8e043c23 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -182,6 +182,10 @@ enum class Algorithm { EltwiseErf, EltwiseSoftSign, EltwiseLog, + EltwiseBitwiseAnd, + EltwiseBitwiseNot, + EltwiseBitwiseOr, + EltwiseBitwiseXor, // FakeQuantize algorithms FQCommon, diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 852acc7487d318..aae4f68bd14234 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -17,6 +17,7 @@ #include "ie_ngraph_utils.hpp" #include #include +#include #include #include @@ -36,6 +37,10 @@ #include "ngraph/ngraph.hpp" #include +#include +#include +#include +#include #include "transformations/cpu_opset/common/op/power_static.hpp" #include "transformations/cpu_opset/common/op/leaky_relu.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" @@ -717,7 +722,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vpmovzxbd(vmm_src, op); break; default: - assert(!"unknown src_prc"); + OPENVINO_THROW("unknown src_prc"); } switch (dst_prc) { @@ -730,7 +735,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vcvtps2dq(vmm_src, vmm_src); break; default: - assert(!"unknown dst_prc"); + OPENVINO_THROW("unknown dst_prc"); } } } @@ -765,7 +770,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vmovq(xmm_src, reg_tmp_64); break; default: - assert(!"unknown src_prc"); + OPENVINO_THROW("unknown src_prc"); } switch (dst_prc) { @@ -778,7 +783,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vcvtps2dq(xmm_src, xmm_src); break; default: - assert(!"unknown dst_prc"); + OPENVINO_THROW("unknown dst_prc"); } } @@ -796,7 +801,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vcvtdq2ps(vmm_dst, vmm_dst); break; default: - assert(!"unknown src_prc"); + OPENVINO_THROW("unknown src_prc"); } switch (dst_prc) { @@ -868,7 +873,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener } break; default: - assert(!"unknown dst_prc"); + OPENVINO_THROW("unknown dst_prc"); } } @@ -883,7 +888,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener uni_vcvtdq2ps(xmm_dst, xmm_dst); break; default: - assert(!"unknown src_prc"); + OPENVINO_THROW("unknown src_prc"); } switch (dst_prc) { @@ -923,7 +928,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener mov(op, reg_tmp_8); break; default: - assert(!"unknown dst_prc"); + OPENVINO_THROW("unknown dst_prc"); } } }; @@ -1160,6 +1165,18 @@ const std::map& Eltwise::g {ngraph::op::v0::Log::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { node.algorithm = Algorithm::EltwiseLog; }}, + {op::v13::BitwiseAnd::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseBitwiseAnd; + }}, + {op::v13::BitwiseNot::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseBitwiseNot; + }}, + {op::v13::BitwiseOr::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseBitwiseOr; + }}, + {op::v13::BitwiseXor::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseBitwiseXor; + }}, }; return initializers; } @@ -1544,16 +1561,12 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { /* enabled only for float at float16_t at the moment * can be extended in the future */ -template::value || - std::is_same::value> - ::type* = nullptr> -class EltwiseRefExecutor : public Eltwise::IEltwiseExecutor { +template +class EltwiseRefBaseExecutor : public Eltwise::IEltwiseExecutor { public: - EltwiseRefExecutor(Eltwise::EltwiseData opData, - const VectorDims& outBlkDims, - std::vector inpDims) + EltwiseRefBaseExecutor(Eltwise::EltwiseData opData, + const VectorDims& outBlkDims, + std::vector inpDims) : _opData(std::move(opData)), _inpDims(inpDims) { if (inpDims.empty()) { IE_THROW() << "Can not make Eltwise executor from empty input dims array"; @@ -1595,47 +1608,114 @@ class EltwiseRefExecutor : public Eltwise::IEltwiseExecutor { } } + const VectorDims& getOutDims() const override { + return _dims; + } + + size_t getBatchDimIdx() const override { + return _batchDimIdx; + } + +protected: + void init_ptr(const jit_eltwise_call_args_ptrs& args_ptrs, + const VectorDims& dims_out, + std::vector& counters, + const size_t iwork, + std::vector& src_f, + T*& dst_ptr_f) { + size_t tmp = iwork; + for (ptrdiff_t j = dims_out.size() - 1; j >= 0; j--) { + counters[j] = tmp % dims_out[j]; + tmp /= dims_out[j]; + } + + size_t index_in[MAX_ELTWISE_INPUTS] = { 0 }; + for (size_t i = 0; i < _inputNum; i++) { + index_in[i] = 0; + for (size_t j = 0; j < counters.size(); j++) { + index_in[i] += counters[j] * _src_offsets[i][j]; + } + index_in[i] /= sizeof(T); + } + + size_t index_out = 0; + for (size_t j = 0; j < counters.size(); j++) { + index_out += counters[j] * _dst_offsets[j]; + } + index_out /= sizeof(T); + + //std::vector src_f(_inputNum); + for (size_t i = 0; i < _inputNum; i++) { + src_f[i] = (reinterpret_cast(args_ptrs.src_ptr[i]) + index_in[i])[0]; + } + dst_ptr_f = reinterpret_cast(args_ptrs.dst_ptr) + index_out; + } + + const Eltwise::EltwiseData _opData; + VectorDims _dims; + VectorDims _src_offsets[MAX_ELTWISE_INPUTS]; + VectorDims _dst_offsets; + size_t _fullWorkAmount = 0; + size_t _inputNum = 0; + size_t _batchDimIdx = 0; + std::vector _inpDims; +}; + +/* enabled only for float at float16_t at the moment + * can be extended in the future */ +template::value || + std::is_same::value> + ::type * = nullptr> +class EltwiseRefExecutor : public EltwiseRefBaseExecutor { +public: + EltwiseRefExecutor(Eltwise::EltwiseData opData, + const VectorDims& outBlkDims, + std::vector inpDims) : EltwiseRefBaseExecutor(opData, outBlkDims, inpDims) { + } + void exec(const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override { - if (_opData.algo == Algorithm::EltwiseLog) { + if (this->_opData.algo == Algorithm::EltwiseLog) { const T* src_ptr_f = reinterpret_cast(args_ptrs.src_ptr[0]); T* dst_ptr_f = reinterpret_cast(args_ptrs.dst_ptr); - parallel_for(_fullWorkAmount, [&](size_t i) { + parallel_for(this->_fullWorkAmount, [&](size_t i) { dst_ptr_f[i] = logf(src_ptr_f[i]); }); return; } - if (_opData.algo == Algorithm::EltwisePowerStatic) { + if (this->_opData.algo == Algorithm::EltwisePowerStatic) { const T* src_ptr_f = reinterpret_cast(args_ptrs.src_ptr[0]); T* dst_ptr_f = reinterpret_cast(args_ptrs.dst_ptr); - if (_opData.alpha == 2) { - parallel_for(_fullWorkAmount, [&](size_t i) { - dst_ptr_f[i] = (_opData.beta * src_ptr_f[i] + _opData.gamma) * - (_opData.beta * src_ptr_f[i] + _opData.gamma); + if (this->_opData.alpha == 2) { + parallel_for(this->_fullWorkAmount, [&](size_t i) { + dst_ptr_f[i] = (this->_opData.beta * src_ptr_f[i] + this->_opData.gamma) * + (this->_opData.beta * src_ptr_f[i] + this->_opData.gamma); }); } else { - parallel_for(_fullWorkAmount, [&](size_t i) { - dst_ptr_f[i] = powf(_opData.beta * src_ptr_f[i] + _opData.gamma, _opData.alpha); + parallel_for(this->_fullWorkAmount, [&](size_t i) { + dst_ptr_f[i] = powf(this->_opData.beta * src_ptr_f[i] + this->_opData.gamma, this->_opData.alpha); }); } return; } - if (_opData.algo == Algorithm::EltwisePowerDynamic) { + if (this->_opData.algo == Algorithm::EltwisePowerDynamic) { const T* src_ptr_f = reinterpret_cast(args_ptrs.src_ptr[0]); const T* src_ptr_f_pow = reinterpret_cast(args_ptrs.src_ptr[1]); T* dst_ptr_f = reinterpret_cast(args_ptrs.dst_ptr); uint32_t count_of_power_values = 1; - for (unsigned long i : _inpDims[1]) { + for (unsigned long i : this->_inpDims[1]) { count_of_power_values *= i; } if (count_of_power_values == 1) { if (src_ptr_f_pow[0] != 2) { - parallel_for(_fullWorkAmount, [&](size_t i) { + parallel_for(this->_fullWorkAmount, [&](size_t i) { dst_ptr_f[i] = powf(src_ptr_f[i], src_ptr_f_pow[0]); }); } else { - parallel_for(_fullWorkAmount, [&](size_t i) { + parallel_for(this->_fullWorkAmount, [&](size_t i) { dst_ptr_f[i] = src_ptr_f[i] * src_ptr_f[i]; }); } @@ -1644,46 +1724,23 @@ class EltwiseRefExecutor : public Eltwise::IEltwiseExecutor { } std::shared_ptr ref_eltwise_injector = nullptr; - if (_opData.onednnAlgorithm != dnnl::algorithm::undef) { + if (this->_opData.onednnAlgorithm != dnnl::algorithm::undef) { ref_eltwise_injector = std::make_shared( - static_cast(_opData.onednnAlgorithm), _opData.alpha, _opData.beta, 1.f); + static_cast(this->_opData.onednnAlgorithm), this->_opData.alpha, this->_opData.beta, 1.f); } parallel_nt(0, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; - splitter(_fullWorkAmount, nthr, ithr, start, end); + splitter(this->_fullWorkAmount, nthr, ithr, start, end); std::vector counters(dims_out.size(), 0); for (size_t iwork = start; iwork < end; ++iwork) { - size_t tmp = iwork; - for (ptrdiff_t j = dims_out.size() - 1; j >= 0; j--) { - counters[j] = tmp % dims_out[j]; - tmp /= dims_out[j]; - } - - size_t index_in[MAX_ELTWISE_INPUTS] = {0}; - for (size_t i = 0; i < _inputNum; i++) { - index_in[i] = 0; - for (size_t j = 0; j < counters.size(); j++) { - index_in[i] += counters[j] * _src_offsets[i][j]; - } - index_in[i] /= sizeof(T); - } - - size_t index_out = 0; - for (size_t j = 0; j < counters.size(); j++) { - index_out += counters[j] * _dst_offsets[j]; - } - index_out /= sizeof(T); - - std::vector src_f(_inputNum); - for (size_t i = 0; i < _inputNum; i++) { - src_f[i] = (reinterpret_cast(args_ptrs.src_ptr[i]) + index_in[i])[0]; - } - T* dst_ptr_f = reinterpret_cast(args_ptrs.dst_ptr) + index_out; + std::vector src_f(this->_inputNum); + T* dst_ptr_f; + this->init_ptr(args_ptrs, dims_out, counters, iwork, src_f, dst_ptr_f); - switch (_opData.algo) { + switch (this->_opData.algo) { case Algorithm::EltwiseRelu: case Algorithm::EltwiseGeluErf: case Algorithm::EltwiseGeluTanh: @@ -1730,8 +1787,8 @@ class EltwiseRefExecutor : public Eltwise::IEltwiseExecutor { // @todo implement proper isinfinite for non-float precisions case Algorithm::EltwiseIsFinite: *dst_ptr_f = std::isfinite(static_cast(src_f[0])); break; case Algorithm::EltwiseIsInf: - *dst_ptr_f = (_opData.alpha && (src_f[0] == -std::numeric_limits::infinity())) || - (_opData.beta && (src_f[0] == std::numeric_limits::infinity())); + *dst_ptr_f = (this->_opData.alpha && (src_f[0] == -std::numeric_limits::infinity())) || + (this->_opData.beta && (src_f[0] == std::numeric_limits::infinity())); break; case Algorithm::EltwiseIsNaN: *dst_ptr_f = std::isnan(src_f[0]); break; case Algorithm::EltwiseSelect: *dst_ptr_f = src_f[0] ? src_f[1] : src_f[2]; break; @@ -1740,24 +1797,63 @@ class EltwiseRefExecutor : public Eltwise::IEltwiseExecutor { } }); } +}; - const VectorDims& getOutDims() const override { - return _dims; +template::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value> + ::type * = nullptr> +class BitwiseRefExecutor : public EltwiseRefBaseExecutor { +public: + BitwiseRefExecutor(Eltwise::EltwiseData opData, + const VectorDims& outBlkDims, + std::vector inpDims) : EltwiseRefBaseExecutor(opData, outBlkDims, inpDims) { } - size_t getBatchDimIdx() const override { - return _batchDimIdx; - } + void exec(const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override { + std::shared_ptr ref_eltwise_injector = nullptr; + if (this->_opData.onednnAlgorithm != dnnl::algorithm::undef) { + ref_eltwise_injector = std::make_shared( + static_cast(this->_opData.onednnAlgorithm), this->_opData.alpha, this->_opData.beta, 1.f); + } -private: - const Eltwise::EltwiseData _opData; - VectorDims _dims; - VectorDims _src_offsets[MAX_ELTWISE_INPUTS]; - VectorDims _dst_offsets; - size_t _fullWorkAmount = 0; - size_t _inputNum = 0; - size_t _batchDimIdx = 0; - std::vector _inpDims; + parallel_nt(0, [&](const int ithr, const int nthr) { + size_t start = 0, end = 0; + splitter(this->_fullWorkAmount, nthr, ithr, start, end); + + std::vector counters(dims_out.size(), 0); + + for (size_t iwork = start; iwork < end; ++iwork) { + std::vector src_f(this->_inputNum); + T* dst_ptr_f; + this->init_ptr(args_ptrs, dims_out, counters, iwork, src_f, dst_ptr_f); + + switch (this->_opData.algo) { + case Algorithm::EltwiseBitwiseAnd: { + *dst_ptr_f = src_f[0] & src_f[1]; + break; + } + case Algorithm::EltwiseBitwiseNot: { + *dst_ptr_f = ~src_f[0]; + break; + } + case Algorithm::EltwiseBitwiseOr: { + *dst_ptr_f = src_f[0] | src_f[1]; + break; + } + case Algorithm::EltwiseBitwiseXor: { + *dst_ptr_f = src_f[0] ^ src_f[1]; + break; + } + default: IE_THROW() << "Unsupported operation type for Eltwise executor"; + } + } + }); + } }; } // namespace @@ -1771,15 +1867,47 @@ bool Eltwise::EltwiseData::operator==(const EltwiseData &rhs) const noexcept { } static Eltwise::executorPtr buildRefExecutor(const EltwiseKey& key) { - if (key.outPrc == Precision::FP16) { - return std::make_shared>(key.eltwise_data.front(), - key.outBlkDims, - key.inpDims); - } - // use float reference executor for any other precision for now - return std::make_shared>(key.eltwise_data.front(), - key.outBlkDims, - key.inpDims); + switch (key.outPrc) { + case Precision::FP16: + return std::make_shared>(key.eltwise_data.front(), + key.outBlkDims, + key.inpDims); + case Precision::I8: + return std::make_shared::value_type>>( + key.eltwise_data.front(), + key.outBlkDims, + key.inpDims); + + case Precision::U8: + return std::make_shared::value_type>>( + key.eltwise_data.front(), + key.outBlkDims, + key.inpDims); + + case Precision::I16: + return std::make_shared::value_type>>( + key.eltwise_data.front(), + key.outBlkDims, + key.inpDims); + + case Precision::U16: + return std::make_shared::value_type>>( + key.eltwise_data.front(), + key.outBlkDims, + key.inpDims); +# + case Precision::I32: + return std::make_shared::value_type>>( + key.eltwise_data.front(), + key.outBlkDims, + key.inpDims); + + default: + // use float reference executor for any other precision for now + return std::make_shared>(key.eltwise_data.front(), + key.outBlkDims, + key.inpDims); + } } static Eltwise::executorPtr buildExecutor(const EltwiseKey& key) { @@ -1880,6 +2008,12 @@ size_t Eltwise::getOpInputsNum() const { case Algorithm::EltwiseLogicalAnd: case Algorithm::EltwiseLogicalOr: case Algorithm::EltwiseLogicalXor: + case Algorithm::EltwiseBitwiseAnd: + case Algorithm::EltwiseBitwiseOr: + case Algorithm::EltwiseBitwiseXor: + return 2; + case Algorithm::EltwiseBitwiseNot: + return 1; case Algorithm::EltwisePrelu: return 2; case Algorithm::EltwiseMulAdd: @@ -1909,7 +2043,23 @@ void Eltwise::getSupportedDescriptors() { } void Eltwise::initSupportedPrimitiveDescriptors() { - std::vector supportedPrecisions = { + const auto isBitwise = [](const Algorithm& algorithm) { + return one_of( + algorithm, + Algorithm::EltwiseBitwiseAnd, + Algorithm::EltwiseBitwiseNot, + Algorithm::EltwiseBitwiseOr, + Algorithm::EltwiseBitwiseXor); + }; + + std::vector supportedPrecisions = isBitwise(algorithm) ? + std::vector { + Precision::U8, + Precision::I8, + Precision::U16, + Precision::I16, + Precision::I32 + } : std::vector { Precision::FP32, Precision::U8, Precision::I8, @@ -1918,7 +2068,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() { Precision::BF16, Precision::FP16, Precision::I32 - }; + }; if (!supportedPrimitiveDescriptors.empty()) return; @@ -1926,7 +2076,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() { // if dim rank is greater than the maximum possible, we should use the reference execution bool canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK; // TODO: Add EltwiseLog algorithm support for JIT implementation - canUseOptimizedImpl &= !one_of(getAlgorithm(), Algorithm::EltwiseLog); + canUseOptimizedImpl &= !(one_of(getAlgorithm(), Algorithm::EltwiseLog) || isBitwise(getAlgorithm())); + bool canUseOptimizedShapeAgnosticImpl = isDynamicNode() && canUseOptimizedImpl; if (!canUseOptimizedImpl && !fusedWith.empty()) { @@ -1986,6 +2137,16 @@ void Eltwise::initSupportedPrimitiveDescriptors() { } #if defined(OV_CPU_WITH_ACL) + auto filterPrecision = [&](const Precision& prc, const Precision& forcedPrec) { + if (isBitwise(algorithm)) { + if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), prc) == supportedPrecisions.end()) { + IE_THROW() << "Eltwise node with name `" << getName() << "` doesn't support " << prc << " precision."; + } + return prc; + } + return forcedPrec; + }; + // Use original output precision as a reference point since some eltwise algorithms have non-float inputs (i.e. EltwiseSelect) Precision forcedPrec = getOriginalOutputPrecisionAtPort(0) == Precision::FP16 ? Precision::FP16 : Precision::FP32; // ACL implementation supports only identical precisions on inputs/outputs so they are aligned it to highest one @@ -2003,12 +2164,18 @@ void Eltwise::initSupportedPrimitiveDescriptors() { } for (size_t i = 0; i < inputPrecisions.size(); i++) { - inputPrecisions[i] = forcedPrec; + inputPrecisions[i] = filterPrecision(inputPrecisions[i], forcedPrec); } - outputPrecision = forcedPrec; + outputPrecision = filterPrecision(outputPrecision, forcedPrec); #else - auto filterPrecision = [&](Precision& prc) { + auto filterPrecision = [&](const Precision& prc) { if (implType == EltwiseImplType::reference) { + if (isBitwise(algorithm)) { + if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), prc) == supportedPrecisions.end()) { + IE_THROW() << "Eltwise node with name `" << getName() << "` doesn't support " << prc << " precision."; + } + return prc; + } return Precision(Precision::FP32); } else if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), prc) == supportedPrecisions.end()) { if (prc == Precision::U32 || prc == Precision::I64 || prc == Precision::U64) { @@ -2678,8 +2845,19 @@ bool Eltwise::canFuse(const NodePtr& node) const { if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) return false; - // TODO: EltwiseLog is supported only via reference executor - if (getAlgorithm() == Algorithm::EltwiseLog || node->getAlgorithm() == Algorithm::EltwiseLog) + // TODO: supported only via reference executor + if (one_of(getAlgorithm(), + Algorithm::EltwiseLog, + Algorithm::EltwiseBitwiseAnd, + Algorithm::EltwiseBitwiseNot, + Algorithm::EltwiseBitwiseOr, + Algorithm::EltwiseBitwiseXor) || + one_of(node->getAlgorithm(), + Algorithm::EltwiseLog, + Algorithm::EltwiseBitwiseAnd, + Algorithm::EltwiseBitwiseNot, + Algorithm::EltwiseBitwiseOr, + Algorithm::EltwiseBitwiseXor)) return false; bool isIntegerNode = isIntegerComputeSupported(this); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp index 3595284e84784c..92e7ff007d5275 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp @@ -63,7 +63,26 @@ ov::Tensor EltwiseLayerCPUTest::generate_eltwise_input(const ov::element::Type& break; } } else { - params = gen_params(INT32_MAX, INT32_MIN); + switch (type) { + case ov::element::i8: + params = gen_params(INT8_MAX, INT8_MIN); + break; + case ov::element::u8: + params = gen_params(UINT8_MAX, 0); + break; + case ov::element::i16: + params = gen_params(INT16_MAX, INT16_MIN); + break; + case ov::element::u16: + params = gen_params(UINT16_MAX, 0); + break; + case ov::element::u32: + params = gen_params(UINT32_MAX, 0); + break; + default: + params = gen_params(INT32_MAX, INT32_MIN); + break; + } } return ov::test::utils::create_and_fill_tensor(type, shape, params.range, params.start_from, params.resolution); } @@ -139,34 +158,56 @@ void EltwiseLayerCPUTest::SetUp() { } ov::ParameterVector parameters{std::make_shared(netType, inputDynamicShapes.front())}; std::shared_ptr secondaryInput; - if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) { - auto param = std::make_shared(netType, inputDynamicShapes.back()); - secondaryInput = param; - parameters.push_back(param); - } else { - auto pShape = inputDynamicShapes.back(); - ngraph::Shape shape; - if (pShape.is_static()) { - shape = pShape.get_shape(); - } else { - ASSERT_TRUE(pShape.rank().is_static()); - shape = std::vector(pShape.rank().get_length(), 1); - for (size_t i = 0; i < pShape.size(); ++i) { - if (pShape[i].is_static()) { - shape[i] = pShape[i].get_length(); + if (eltwiseType != ngraph::helpers::EltwiseTypes::BITWISE_NOT) { + switch (secondaryInputType) { + case ngraph::helpers::InputLayerType::PARAMETER: { + auto param = std::make_shared(netType, inputDynamicShapes.back()); + secondaryInput = param; + parameters.push_back(param); + break; + } + case ngraph::helpers::InputLayerType::CONSTANT: { + auto pShape = inputDynamicShapes.back(); + ngraph::Shape shape; + if (pShape.is_static()) { + shape = pShape.get_shape(); + } else { + ASSERT_TRUE(pShape.rank().is_static()); + shape = std::vector(pShape.rank().get_length(), 1); + for (size_t i = 0; i < pShape.size(); ++i) { + if (pShape[i].is_static()) { + shape[i] = pShape[i].get_length(); + } + } } + + auto data_tensor = generate_eltwise_input(netType, shape); + if ((netType == ElementType::i8) || (netType == ElementType::u8)) { + auto data_ptr = reinterpret_cast(data_tensor.data()); + std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); + secondaryInput = ngraph::builder::makeConstant(netType, shape, data); + } else if ((netType == ElementType::i16) || (netType == ElementType::u16)) { + auto data_ptr = reinterpret_cast(data_tensor.data()); + std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); + secondaryInput = ngraph::builder::makeConstant(netType, shape, data); + } else if ((netType == ElementType::i32) || (netType == ElementType::u32)) { + auto data_ptr = reinterpret_cast(data_tensor.data()); + std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); + secondaryInput = ngraph::builder::makeConstant(netType, shape, data); + } else if (netType == ElementType::f16) { + auto data_ptr = reinterpret_cast(data_tensor.data()); + std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); + secondaryInput = ngraph::builder::makeConstant(netType, shape, data); + } else { + auto data_ptr = reinterpret_cast(data_tensor.data()); + std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); + secondaryInput = ngraph::builder::makeConstant(netType, shape, data); + } + break; + } + default: { + FAIL() << "Unsupported InputLayerType"; } - } - if (netType == ElementType::i32) { - auto data_tensor = generate_eltwise_input(ElementType::i32, shape); - auto data_ptr = reinterpret_cast(data_tensor.data()); - std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); - secondaryInput = ngraph::builder::makeConstant(netType, shape, data); - } else { - auto data_tensor = generate_eltwise_input(ElementType::f32, shape); - auto data_ptr = reinterpret_cast(data_tensor.data()); - std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); - secondaryInput = ngraph::builder::makeConstant(netType, shape, data); } } auto eltwise = ngraph::builder::makeEltwise(parameters[0], secondaryInput, eltwiseType); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/eltwise.cpp index bf61d07e289d25..7bc76746861444 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/eltwise.cpp @@ -219,5 +219,94 @@ const auto params_5D_dyn_param = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_MemOrder_dyn_param, EltwiseLayerCPUTest, params_5D_dyn_param, EltwiseLayerCPUTest::getTestCaseName); +static const std::vector bitwise_in_shapes_4D = { + { + {1, -1, -1, -1}, + { + {1, 3, 4, 4}, + {1, 3, 1, 1}, + {1, 1, 1, 1} + } + }, + {{1, 3, 4, 4}, {{1, 3, 4, 4}}} +}; + +const auto params_4D_bitwise = ::testing::Combine( + ::testing::Combine( + ::testing::Values(bitwise_in_shapes_4D), + ::testing::ValuesIn({ + ngraph::helpers::EltwiseTypes::BITWISE_AND, + ngraph::helpers::EltwiseTypes::BITWISE_OR, + ngraph::helpers::EltwiseTypes::BITWISE_XOR + }), + ::testing::ValuesIn(secondaryInputTypes()), + ::testing::ValuesIn({ ov::test::utils::OpType::VECTOR }), + ::testing::ValuesIn({ ov::element::Type_t::i8, ov::element::Type_t::u8, ov::element::Type_t::i32 }), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(ov::AnyMap())), + ::testing::Values(CPUSpecificParams({ nhwc, nhwc }, { nhwc }, {}, "ref")), + ::testing::Values(emptyFusingSpec), + ::testing::Values(false)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_Bitwise, EltwiseLayerCPUTest, params_4D_bitwise, EltwiseLayerCPUTest::getTestCaseName); + +const auto params_4D_bitwise_i16 = ::testing::Combine( + ::testing::Combine( + ::testing::Values(bitwise_in_shapes_4D), + ::testing::ValuesIn({ + ngraph::helpers::EltwiseTypes::BITWISE_AND, + ngraph::helpers::EltwiseTypes::BITWISE_OR, + ngraph::helpers::EltwiseTypes::BITWISE_XOR + }), + ::testing::ValuesIn(secondaryInputTypes()), + ::testing::ValuesIn({ ov::test::utils::OpType::VECTOR }), + ::testing::ValuesIn({ ov::element::Type_t::i16, ov::element::Type_t::u16 }), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(ov::AnyMap())), + ::testing::Values(CPUSpecificParams({ nhwc, nhwc }, { nhwc }, {}, "ref_I32$/")), + ::testing::Values(emptyFusingSpec), + ::testing::Values(false)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_Bitwise_i16, EltwiseLayerCPUTest, params_4D_bitwise_i16, EltwiseLayerCPUTest::getTestCaseName); + + +const auto params_4D_bitwise_NOT = ::testing::Combine( + ::testing::Combine( + ::testing::Values(bitwise_in_shapes_4D), + ::testing::ValuesIn({ ngraph::helpers::EltwiseTypes::BITWISE_NOT }), + ::testing::ValuesIn({ ngraph::helpers::InputLayerType::CONSTANT }), + ::testing::ValuesIn({ ov::test::utils::OpType::VECTOR }), + ::testing::ValuesIn({ ov::element::Type_t::i8, ov::element::Type_t::u8, ov::element::Type_t::i32 }), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(ov::AnyMap())), + ::testing::Values(CPUSpecificParams({ nhwc }, { nhwc }, {}, "ref")), + ::testing::Values(emptyFusingSpec), + ::testing::Values(false)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_Bitwise_NOT, EltwiseLayerCPUTest, params_4D_bitwise_NOT, EltwiseLayerCPUTest::getTestCaseName); + +const auto params_4D_bitwise_NOT_i16 = ::testing::Combine( + ::testing::Combine( + ::testing::Values(bitwise_in_shapes_4D), + ::testing::ValuesIn({ ngraph::helpers::EltwiseTypes::BITWISE_NOT }), + ::testing::ValuesIn({ ngraph::helpers::InputLayerType::CONSTANT }), + ::testing::ValuesIn({ ov::test::utils::OpType::VECTOR }), + ::testing::ValuesIn({ ov::element::Type_t::i16, ov::element::Type_t::u16 }), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::element::Type_t::undefined), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(ov::AnyMap())), + ::testing::Values(CPUSpecificParams({ nhwc }, { nhwc }, {}, "ref_I32$/")), + ::testing::Values(emptyFusingSpec), + ::testing::Values(false)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_Bitwise_NOT_i16, EltwiseLayerCPUTest, params_4D_bitwise_NOT, EltwiseLayerCPUTest::getTestCaseName); + } // namespace Eltwise } // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp index e355ee79b75bcc..ced8d4a2d3cdd9 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp @@ -355,10 +355,27 @@ std::string CPUTestsBase::makeSelectedTypeStr(std::string implString, ngraph::el } void CPUTestsBase::updateSelectedType(const std::string& primitiveType, const ov::element::Type netType, const ov::AnyMap& config) { + if (selectedType.empty()) { + selectedType = primitiveType; + } + + if (selectedType.find("$/") != std::string::npos) { + // like as regex + selectedType = selectedType.substr(0, selectedType.find("$/")); + return; + } + + selectedType.push_back('_'); + auto getExecType = [&](){ // inference_precision affects only floating point type networks - if (!netType.is_real()) + if (!netType.is_real()) { + if (netType == ov::element::u8) { + // Node::getPrimitiveDescriptorType() returns i8 for u8 + return ov::element::i8; + } return netType; + } const auto it = config.find(ov::hint::inference_precision.name()); if (it == config.end()) @@ -374,9 +391,6 @@ void CPUTestsBase::updateSelectedType(const std::string& primitiveType, const ov }; const auto execType = getExecType(); - - selectedType = primitiveType; - selectedType.push_back('_'); selectedType += InferenceEngine::details::convertPrecision(execType).name(); } diff --git a/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp b/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp index 5d9f13d9ba0017..e9945fa6e2d54f 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/compare_results.cpp @@ -180,6 +180,7 @@ CompareMap getCompareMap() { #include "openvino/opsets/opset10_tbl.hpp" #include "openvino/opsets/opset11_tbl.hpp" #include "openvino/opsets/opset12_tbl.hpp" +#include "openvino/opsets/opset13_tbl.hpp" #include "ov_ops/opset_private_tbl.hpp" #undef _OPENVINO_OP_REG diff --git a/src/tests/ov_helpers/ov_models/src/eltwise.cpp b/src/tests/ov_helpers/ov_models/src/eltwise.cpp index 4932332e0773fb..e51afcd8ceba3d 100644 --- a/src/tests/ov_helpers/ov_models/src/eltwise.cpp +++ b/src/tests/ov_helpers/ov_models/src/eltwise.cpp @@ -3,6 +3,7 @@ // #include +#include #include "common_test_utils/test_enums.hpp" #include "ov_models/utils/ov_helpers.hpp" @@ -32,6 +33,14 @@ std::shared_ptr makeEltwise(const ov::Output& in0, return std::make_shared(in0, in1); case ov::test::utils::EltwiseTypes::ERF: return std::make_shared(in0); + case ngraph::helpers::EltwiseTypes::BITWISE_AND: + return std::make_shared(in0, in1); + case ngraph::helpers::EltwiseTypes::BITWISE_NOT: + return std::make_shared(in0); + case ngraph::helpers::EltwiseTypes::BITWISE_OR: + return std::make_shared(in0, in1); + case ngraph::helpers::EltwiseTypes::BITWISE_XOR: + return std::make_shared(in0, in1); default: { throw std::runtime_error("Incorrect type of Eltwise operation"); } diff --git a/src/tests/ov_helpers/ov_models/src/input_layer.cpp b/src/tests/ov_helpers/ov_models/src/input_layer.cpp index 5d8b8d417f3812..8b87774e5630dc 100644 --- a/src/tests/ov_helpers/ov_models/src/input_layer.cpp +++ b/src/tests/ov_helpers/ov_models/src/input_layer.cpp @@ -20,9 +20,10 @@ std::shared_ptr makeInputLayer(const element::Type& type, input = ngraph::builder::makeConstant(type, shape, {}, true); break; } - case ov::test::utils::InputLayerType::PARAMETER: + case ov::test::utils::InputLayerType::PARAMETER: { input = std::make_shared(type, ov::Shape(shape)); break; + } default: throw std::runtime_error("Unsupported inputType"); } diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp index 6e73dd07a5adac..5c93d211cac30b 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp @@ -56,7 +56,11 @@ enum EltwiseTypes { POWER, FLOOR_MOD, MOD, - ERF + ERF, + BITWISE_AND, + BITWISE_NOT, + BITWISE_OR, + BITWISE_XOR }; enum SqueezeOpType { diff --git a/src/tests/test_utils/common_test_utils/src/test_enums.cpp b/src/tests/test_utils/common_test_utils/src/test_enums.cpp index 8bb1cff3ce77dc..e67122d9b8af4f 100644 --- a/src/tests/test_utils/common_test_utils/src/test_enums.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_enums.cpp @@ -70,6 +70,18 @@ std::ostream& operator<<(std::ostream& os, const ov::test::utils::EltwiseTypes t case ov::test::utils::EltwiseTypes::ERF: os << "Erf"; break; + case ov::test::utils::EltwiseTypes::BITWISE_AND: + os << "BitwiseAnd"; + break; + case ov::test::utils::EltwiseTypes::BITWISE_NOT: + os << "BitwiseNot"; + break; + case ov::test::utils::EltwiseTypes::BITWISE_OR: + os << "BitwiseOr"; + break; + case ov::test::utils::EltwiseTypes::BITWISE_XOR: + os << "BitwiseXor"; + break; default: throw std::runtime_error("NOT_SUPPORTED_OP_TYPE"); } diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv index 07f091dd7a222b..fa91f28719a834 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv @@ -1130,10 +1130,6 @@ conformance_PRelu/ReadIRTest.ImportExport/Op=PRelu.1_Type=f32_IR=20e7e74f55eb5fb conformance_RegionYolo/ReadIRTest.ImportExport/Op=RegionYolo.1_Type=f32_IR=RegionYolo-1_750_Device=CPU_Shape=static_Config=(),5.06332e-06 conformance_Add/ReadIRTest.ImportExport/Op=Add.1_Type=i32_IR=28f23780d4ca0d40671caf79d5cd9223ad8f6dc2fa5ade2521f3d99586eeeb7f_Device=CPU_Shape=static_Config=(),9.72615e-07 conformance_Convolution/ReadIRTest.Inference/Op=Convolution.1_Type=f32_IR=c301804445f273eef62f41f02204711d9d6e571da28c76ab447d7d90983b0032_Device=CPU_Shape=dynamic_Config=(),0.000113281 -conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseAnd_opset13_Device=CPU_Config=(),1 -conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseOr_opset13_Device=CPU_Config=(),1 -conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseNot_opset13_Device=CPU_Config=(),1 conformance/OpImplCheckTest.checkPluginImplementation/Function=Multinomial_opset13_Device=CPU_Config=(),1 conformance/OpImplCheckTest.checkPluginImplementation/Function=NMSRotated_opset13_Device=CPU_Config=(),1 conformance/OpImplCheckTest.checkPluginImplementation/Function=LSTMSequence_opset1_Device=CPU_Config=(),1 -conformance/OpImplCheckTest.checkPluginImplementation/Function=BitwiseXor_opset13_Device=CPU_Config=(),1 From 52d3588358bba6def31ea07b562842826b3ea631 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 26 Oct 2023 23:24:24 +0400 Subject: [PATCH 084/275] Move create model sample to the last opset (#20333) * Move create model sample to the last opset * Downgrade to opset8 * Migrate sample to the last opset * Fixed code style --- samples/cpp/model_creation_sample/main.cpp | 104 +++++++++++---------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/samples/cpp/model_creation_sample/main.cpp b/samples/cpp/model_creation_sample/main.cpp index 078bf6b7d0eb1a..8f2df4817bfe47 100644 --- a/samples/cpp/model_creation_sample/main.cpp +++ b/samples/cpp/model_creation_sample/main.cpp @@ -12,8 +12,7 @@ // clang-format off #include "openvino/openvino.hpp" -#include "openvino/opsets/opset1.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/opsets/opset13.hpp" #include "samples/args_helper.hpp" #include "samples/common.hpp" @@ -84,127 +83,130 @@ std::shared_ptr create_model(const std::string& path_to_weights) { std::vector padBegin{0, 0}; std::vector padEnd{0, 0}; - auto paramNode = std::make_shared(ov::element::Type_t::f32, ov::Shape({64, 1, 28, 28})); + auto paramNode = std::make_shared(ov::element::Type_t::f32, ov::Shape({64, 1, 28, 28})); // -------convolution 1---- auto convFirstShape = Shape{20, 1, 5, 5}; - auto convolutionFirstConstantNode = std::make_shared(element::Type_t::f32, convFirstShape, data); + auto convolutionFirstConstantNode = std::make_shared(element::Type_t::f32, convFirstShape, data); - auto convolutionNodeFirst = std::make_shared(paramNode->output(0), - convolutionFirstConstantNode->output(0), - Strides({1, 1}), - CoordinateDiff(padBegin), - CoordinateDiff(padEnd), - Strides({1, 1})); + auto convolutionNodeFirst = std::make_shared(paramNode->output(0), + convolutionFirstConstantNode->output(0), + Strides({1, 1}), + CoordinateDiff(padBegin), + CoordinateDiff(padEnd), + Strides({1, 1})); // -------Add-------------- auto addFirstShape = Shape{1, 20, 1, 1}; auto offset = shape_size(convFirstShape) * sizeof(float); - auto addFirstConstantNode = std::make_shared(element::Type_t::f32, addFirstShape, data + offset); + auto addFirstConstantNode = std::make_shared(element::Type_t::f32, addFirstShape, data + offset); - auto addNodeFirst = std::make_shared(convolutionNodeFirst->output(0), addFirstConstantNode->output(0)); + auto addNodeFirst = + std::make_shared(convolutionNodeFirst->output(0), addFirstConstantNode->output(0)); // -------MAXPOOL---------- Shape padBeginShape{0, 0}; Shape padEndShape{0, 0}; - auto maxPoolingNodeFirst = std::make_shared(addNodeFirst->output(0), - Strides{2, 2}, - padBeginShape, - padEndShape, - Shape{2, 2}, - op::RoundingType::CEIL); + auto maxPoolingNodeFirst = std::make_shared(addNodeFirst->output(0), + Strides{2, 2}, + Strides{1, 1}, + padBeginShape, + padEndShape, + Shape{2, 2}, + op::RoundingType::CEIL); // -------convolution 2---- auto convSecondShape = Shape{50, 20, 5, 5}; offset += shape_size(addFirstShape) * sizeof(float); auto convolutionSecondConstantNode = - std::make_shared(element::Type_t::f32, convSecondShape, data + offset); + std::make_shared(element::Type_t::f32, convSecondShape, data + offset); - auto convolutionNodeSecond = std::make_shared(maxPoolingNodeFirst->output(0), - convolutionSecondConstantNode->output(0), - Strides({1, 1}), - CoordinateDiff(padBegin), - CoordinateDiff(padEnd), - Strides({1, 1})); + auto convolutionNodeSecond = std::make_shared(maxPoolingNodeFirst->output(0), + convolutionSecondConstantNode->output(0), + Strides({1, 1}), + CoordinateDiff(padBegin), + CoordinateDiff(padEnd), + Strides({1, 1})); // -------Add 2------------ auto addSecondShape = Shape{1, 50, 1, 1}; offset += shape_size(convSecondShape) * sizeof(float); auto addSecondConstantNode = - std::make_shared(element::Type_t::f32, addSecondShape, data + offset); + std::make_shared(element::Type_t::f32, addSecondShape, data + offset); auto addNodeSecond = - std::make_shared(convolutionNodeSecond->output(0), addSecondConstantNode->output(0)); + std::make_shared(convolutionNodeSecond->output(0), addSecondConstantNode->output(0)); // -------MAXPOOL 2-------- - auto maxPoolingNodeSecond = std::make_shared(addNodeSecond->output(0), - Strides{2, 2}, - padBeginShape, - padEndShape, - Shape{2, 2}, - op::RoundingType::CEIL); + auto maxPoolingNodeSecond = std::make_shared(addNodeSecond->output(0), + Strides{2, 2}, + Strides{1, 1}, + padBeginShape, + padEndShape, + Shape{2, 2}, + op::RoundingType::CEIL); // -------Reshape---------- auto reshapeFirstShape = Shape{2}; auto reshapeOffset = shape_size(addSecondShape) * sizeof(float) + offset; auto reshapeFirstConstantNode = - std::make_shared(element::Type_t::i64, reshapeFirstShape, data + reshapeOffset); + std::make_shared(element::Type_t::i64, reshapeFirstShape, data + reshapeOffset); auto reshapeFirstNode = - std::make_shared(maxPoolingNodeSecond->output(0), reshapeFirstConstantNode->output(0), true); + std::make_shared(maxPoolingNodeSecond->output(0), reshapeFirstConstantNode->output(0), true); // -------MatMul 1--------- auto matMulFirstShape = Shape{500, 800}; offset = shape_size(reshapeFirstShape) * sizeof(int64_t) + reshapeOffset; auto matMulFirstConstantNode = - std::make_shared(element::Type_t::f32, matMulFirstShape, data + offset); + std::make_shared(element::Type_t::f32, matMulFirstShape, data + offset); auto matMulFirstNode = - std::make_shared(reshapeFirstNode->output(0), matMulFirstConstantNode->output(0), false, true); + std::make_shared(reshapeFirstNode->output(0), matMulFirstConstantNode->output(0), false, true); // -------Add 3------------ auto addThirdShape = Shape{1, 500}; offset += shape_size(matMulFirstShape) * sizeof(float); - auto addThirdConstantNode = std::make_shared(element::Type_t::f32, addThirdShape, data + offset); + auto addThirdConstantNode = std::make_shared(element::Type_t::f32, addThirdShape, data + offset); - auto addThirdNode = std::make_shared(matMulFirstNode->output(0), addThirdConstantNode->output(0)); + auto addThirdNode = std::make_shared(matMulFirstNode->output(0), addThirdConstantNode->output(0)); // -------Relu------------- - auto reluNode = std::make_shared(addThirdNode->output(0)); + auto reluNode = std::make_shared(addThirdNode->output(0)); // -------Reshape 2-------- auto reshapeSecondShape = Shape{2}; auto reshapeSecondConstantNode = - std::make_shared(element::Type_t::i64, reshapeSecondShape, data + reshapeOffset); + std::make_shared(element::Type_t::i64, reshapeSecondShape, data + reshapeOffset); auto reshapeSecondNode = - std::make_shared(reluNode->output(0), reshapeSecondConstantNode->output(0), true); + std::make_shared(reluNode->output(0), reshapeSecondConstantNode->output(0), true); // -------MatMul 2--------- auto matMulSecondShape = Shape{10, 500}; offset += shape_size(addThirdShape) * sizeof(float); auto matMulSecondConstantNode = - std::make_shared(element::Type_t::f32, matMulSecondShape, data + offset); + std::make_shared(element::Type_t::f32, matMulSecondShape, data + offset); - auto matMulSecondNode = std::make_shared(reshapeSecondNode->output(0), - matMulSecondConstantNode->output(0), - false, - true); + auto matMulSecondNode = std::make_shared(reshapeSecondNode->output(0), + matMulSecondConstantNode->output(0), + false, + true); // -------Add 4------------ auto add4Shape = Shape{1, 10}; offset += shape_size(matMulSecondShape) * sizeof(float); - auto add4ConstantNode = std::make_shared(element::Type_t::f32, add4Shape, data + offset); + auto add4ConstantNode = std::make_shared(element::Type_t::f32, add4Shape, data + offset); - auto add4Node = std::make_shared(matMulSecondNode->output(0), add4ConstantNode->output(0)); + auto add4Node = std::make_shared(matMulSecondNode->output(0), add4ConstantNode->output(0)); // -------softMax---------- - auto softMaxNode = std::make_shared(add4Node->output(0), 1); + auto softMaxNode = std::make_shared(add4Node->output(0), 1); softMaxNode->get_output_tensor(0).set_names({"output_tensor"}); // ------- OpenVINO function-- - auto result_full = std::make_shared(softMaxNode->output(0)); + auto result_full = std::make_shared(softMaxNode->output(0)); std::shared_ptr fnPtr = std::make_shared(result_full, ov::ParameterVector{paramNode}, "lenet"); From 5b8433ffbed3fbb08ed02641612f5d0cd63786e9 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Thu, 26 Oct 2023 22:25:28 +0200 Subject: [PATCH 085/275] [PT FE] Fix issue with adding Result to mutated tensor (#20690) * [PT FE] Fix issue with adding Result to mutated tensor * Add test --- .../pytorch/src/translate_session.cpp | 4 +- tests/layer_tests/pytorch_tests/test_loop.py | 45 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/layer_tests/pytorch_tests/test_loop.py diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 5d2372cbe40182..9d5c719a3afa78 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -271,8 +271,10 @@ std::shared_ptr TranslateSession::convert_pytorch_model( auto mutated_tensor = tensor_map->at(tensor_id); // empty external_tensor_map means this is main body of the model and we don't want to create // additional outputs in that case. - if (mutated_tensor.get_target_inputs().empty() && !external_tensor_map.empty()) + if (!external_tensor_map.empty()) { + OPENVINO_DEBUG << "Creating Result for mutated tensor " << tensor_id; results.push_back(std::make_shared(tensor_map->at(tensor_id))); + } } else { OPENVINO_DEBUG << "Mutated tensor with id " << tensor_id << " doesn't exist in inputs, skipping."; } diff --git a/tests/layer_tests/pytorch_tests/test_loop.py b/tests/layer_tests/pytorch_tests/test_loop.py new file mode 100644 index 00000000000000..8c91833f437b18 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_loop.py @@ -0,0 +1,45 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import os + +import pytest +import numpy as np + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestLoopWithAlias(PytorchLayerTest): + def _prepare_input(self): + return (np.random.randn(*self.shape).astype(np.float32),) + + def create_model(self, n): + import torch + + class loop_alias_model(torch.nn.Module): + def __init__(self, n): + super(loop_alias_model, self).__init__() + self.n = n + + def forward(self, x): + N = x.shape[1] + res = torch.zeros(1, self.n, dtype=torch.long) + d = torch.ones(1, N) * 1e10 + f = torch.zeros(1, dtype=torch.long) + + for i in range(self.n): + res[:, i] = f + _d = torch.sum((x - x[0, f, :]) ** 2, -1) + m = _d < d + d[m] = _d[m] + f = torch.max(d, -1)[1] + return res + + return loop_alias_model(n), None, ["prim::Loop", "aten::copy_"] + + @pytest.mark.parametrize("s,n", [([1, 1024, 3], 512), ([1, 512, 3], 128)]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_loop_alias(self, s, n, ie_device, precision, ir_version): + self.shape = s + self._test(*self.create_model(n), ie_device, precision, + ir_version, use_convert_model=True) From a8a14eba9d2b4cf5594a3c7a9028abc251cd6715 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 27 Oct 2023 05:27:11 +0200 Subject: [PATCH 086/275] [core]Migrate Equal operator to new API (#20688) * Migrate Equal to new API * Remove `visit_attributes` is same as base * Remove i4, u4 from evaluate in Equal reference implementation not handle binary precisions correctly * Sync precisions in `has_evaluate` with `evaluate` * Fix all equal check for lower bound - make broadcast test assertion more strict - remove deprecated functions from broadcast test --- src/core/include/openvino/op/equal.hpp | 5 +- .../include/openvino/reference/equal.hpp | 68 ++++-- src/core/src/op/equal.cpp | 219 ++++++++---------- src/core/tests/type_prop/broadcast.cpp | 14 +- 4 files changed, 151 insertions(+), 155 deletions(-) diff --git a/src/core/include/openvino/op/equal.hpp b/src/core/include/openvino/op/equal.hpp index 8148f62d2ba44b..a66e00d4be96d9 100644 --- a/src/core/include/openvino/op/equal.hpp +++ b/src/core/include/openvino/op/equal.hpp @@ -41,12 +41,9 @@ class OPENVINO_API Equal : public util::BinaryElementwiseComparison { const Output& arg1, const AutoBroadcastSpec& auto_broadcast = AutoBroadcastSpec(AutoBroadcastType::NUMPY)); - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool evaluate_upper(TensorVector& outputs) const override; bool evaluate_lower(TensorVector& outputs) const override; bool has_evaluate() const override; diff --git a/src/core/reference/include/openvino/reference/equal.hpp b/src/core/reference/include/openvino/reference/equal.hpp index c81d47c23d18ff..5e75b110eb996c 100644 --- a/src/core/reference/include/openvino/reference/equal.hpp +++ b/src/core/reference/include/openvino/reference/equal.hpp @@ -4,44 +4,68 @@ #pragma once -#if defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wfloat-equal" -#endif +#include +#include -#include - -#include "openvino/core/shape.hpp" -#include "openvino/op/util/attr_types.hpp" #include "openvino/reference/autobroadcast_binop.hpp" +#include "openvino/reference/utils/type_util.hpp" namespace ov { namespace reference { +namespace func { +template +bool equal(const T lhs, const T rhs) { + return lhs == rhs; +} +} // namespace func + template +void equal(const T* arg0, const T* arg1, char* out, size_t count) { + std::transform(arg0, std::next(arg0, count), arg1, out, std::equal_to()); +} + +/** + * @brief Reference implementation of binary elementwise Equal operator. + * + * Used for integral types with custom `equal` function (reduce binary size). + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg_shape0 Input 0 shape. + * @param arg_shape1 Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ +template ::value>::type* = nullptr> void equal(const T* arg0, const T* arg1, - char* out, - size_t count) // TODO: using char for bool, is this right? -{ - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] == arg1[i]; - } + U* out, + const Shape& arg0_shape, + const Shape& arg1_shape, + const op::AutoBroadcastSpec& broadcast_spec) { + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::equal); } -template +/** + * @brief Reference implementation of binary elementwise Equal operator. + * + * Used for floating-point types to (avoid warning compare floating point with `==`). + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg_shape0 Input 0 shape. + * @param arg_shape1 Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ +template ()>::type* = nullptr> void equal(const T* arg0, const T* arg1, U* out, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U { - return static_cast(x == y); - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, std::equal_to()); } } // namespace reference } // namespace ov - -#if defined(__GNUC__) -# pragma GCC diagnostic pop -#endif diff --git a/src/core/src/op/equal.cpp b/src/core/src/op/equal.cpp index e4adf5d0e4ce53..7f23b8970e204a 100644 --- a/src/core/src/op/equal.cpp +++ b/src/core/src/op/equal.cpp @@ -2,183 +2,160 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/equal.hpp" +#include "openvino/op/equal.hpp" #include "bound_evaluate.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/op/less_eq.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "openvino/op/ops.hpp" +#include "openvino/op/less_eq.hpp" +#include "openvino/op/logical_and.hpp" +#include "openvino/op/logical_or.hpp" #include "openvino/reference/equal.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; - -OPENVINO_SUPPRESS_DEPRECATED_START +namespace ov { +namespace op { namespace equal { namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::equal(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} - -bool evaluate_equal(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_equal, boolean, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, i4, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, i8, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, i16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, u4, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, u8, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, u16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, bf16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, f32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_equal, f64, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; - } - return rc; -} -ov::Tensor equal_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) { - auto equal = op::v1::Equal(std::make_shared(lhs.get_element_type(), lhs.get_shape()), - std::make_shared(rhs.get_element_type(), rhs.get_shape()), - op::AutoBroadcastType::NUMPY); - auto outs = ov::TensorVector{{equal.get_output_element_type(0), equal.get_output_shape(0)}}; - equal.evaluate(outs, ov::TensorVector{lhs, rhs}); +Tensor less_equal_tensor(const Tensor& lhs, const Tensor& rhs) { + const auto less_eq = v1::LessEqual(); + auto outs = TensorVector{{element::boolean, Shape{}}}; + less_eq.evaluate(outs, {lhs, rhs}); return outs.front(); } -ov::Tensor less_equal_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) { - auto equal = op::v1::LessEqual(std::make_shared(lhs.get_element_type(), lhs.get_shape()), - std::make_shared(rhs.get_element_type(), rhs.get_shape()), - op::AutoBroadcastType::NUMPY); - auto outs = ov::TensorVector{{equal.get_output_element_type(0), equal.get_output_shape(0)}}; - equal.evaluate(outs, ov::TensorVector{lhs, rhs}); +Tensor and_tensor(const Tensor& lhs, const Tensor& rhs) { + const auto logical_and = v1::LogicalAnd(); + auto outs = TensorVector{{element::boolean, Shape{}}}; + logical_and.evaluate(outs, {lhs, rhs}); return outs.front(); } -ov::Tensor and_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) { - auto logical_and = - ov::op::v1::LogicalAnd(std::make_shared(lhs.get_element_type(), lhs.get_shape()), - std::make_shared(rhs.get_element_type(), rhs.get_shape()), - op::AutoBroadcastType::NUMPY); - auto outs = ov::TensorVector{{logical_and.get_output_element_type(0), logical_and.get_output_shape(0)}}; - logical_and.evaluate(outs, ov::TensorVector{lhs, rhs}); +Tensor or_tensor(const Tensor& lhs, const Tensor& rhs) { + const auto logical_or = v1::LogicalOr(); + auto outs = TensorVector{{element::boolean, Shape{}}}; + logical_or.evaluate(outs, {lhs, rhs}); return outs.front(); } -ov::Tensor or_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) { - auto logical_or = - ov::op::v1::LogicalOr(std::make_shared(lhs.get_element_type(), lhs.get_shape()), - std::make_shared(rhs.get_element_type(), rhs.get_shape()), - op::AutoBroadcastType::NUMPY); - auto outs = ov::TensorVector{{logical_or.get_output_element_type(0), logical_or.get_output_shape(0)}}; - logical_or.evaluate(outs, ov::TensorVector{lhs, rhs}); - return outs.front(); -} +void all_equal(const TensorVector& tensors, TensorVector& outputs) { + auto& output = outputs[0]; + auto eq_result = TensorVector{{output.get_element_type(), output.get_shape()}}; -void all_equal(const ov::TensorVector tensors, ov::Tensor& output_value) { - OPENVINO_ASSERT(tensors.size() >= 2, "Unexpected number of tensors in all_equal helper"); - auto& tensor = tensors[0]; - output_value = equal_tensor(tensor, tensors[1]); - for (size_t i = 2; i < tensors.size(); ++i) { - output_value = and_tensor(output_value, equal_tensor(tensor, tensors[i])); + auto t_iter = tensors.begin() + 2; + auto eq_inputs = TensorVector(tensors.begin(), t_iter); + + const auto eq = v1::Equal(); + eq.evaluate(outputs, eq_inputs); + for (; t_iter != tensors.end(); ++t_iter) { + eq_inputs[1] = *t_iter; + eq.evaluate(eq_result, eq_inputs); + output = and_tensor(output, eq_result[0]); } } -ov::Tensor within_interval(const ov::Tensor& lower, const ov::Tensor& upper, const ov::Tensor& subject_to_check) { - auto lower_check = less_equal_tensor(lower, subject_to_check); - auto upper_check = less_equal_tensor(subject_to_check, upper); +Tensor within_interval(const Tensor& lower, const Tensor& upper, const Tensor& subject_to_check) { + const auto lower_check = less_equal_tensor(lower, subject_to_check); + const auto upper_check = less_equal_tensor(subject_to_check, upper); return and_tensor(lower_check, upper_check); } - } // namespace + +struct Evaluate : public element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const op::AutoBroadcastSpec& broadcast_spec) { + reference::equal(arg0.data(), + arg1.data(), + out.data>(), + shape0, + shape1, + broadcast_spec); + return true; + } +}; } // namespace equal //------------------------------- v1 ------------------------------------------- -op::v1::Equal::Equal(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +Equal::Equal(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseComparison(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Equal::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Equal::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Equal_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::Equal::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Equal::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Equal_evaluate); - return equal::evaluate_equal(inputs[0], inputs[1], outputs[0], get_autob()); + + outputs[0].set_shape(ov::op::infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply( + inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::Equal::evaluate_lower(ov::TensorVector& output_values) const { +bool Equal::evaluate_lower(TensorVector& output_values) const { if (get_input_tensor(0).has_and_set_bound() && get_input_tensor(1).has_and_set_bound()) return default_upper_bound_evaluator(this, output_values); // ll == lu == rl == ru -> {true} // else -> {false} const auto &lhs = get_input_tensor(0), &rhs = get_input_tensor(1); - auto lhs_lower = lhs.get_lower_value(), lhs_upper = lhs.get_upper_value(); - auto rhs_lower = rhs.get_lower_value(), rhs_upper = rhs.get_upper_value(); - equal::all_equal({lhs_lower, lhs_upper, rhs_lower, rhs_upper}, output_values[0]); + const auto &lhs_lower = lhs.get_lower_value(), &lhs_upper = lhs.get_upper_value(); + const auto &rhs_lower = rhs.get_lower_value(), &rhs_upper = rhs.get_upper_value(); + equal::all_equal({lhs_lower, lhs_upper, rhs_lower, rhs_upper}, output_values); return true; } -bool op::v1::Equal::evaluate_upper(ov::TensorVector& output_values) const { +bool Equal::evaluate_upper(TensorVector& output_values) const { const auto &lhs = get_input_tensor(0), &rhs = get_input_tensor(1); - auto lhs_lower = lhs.get_lower_value(), lhs_upper = lhs.get_upper_value(); - auto rhs_lower = rhs.get_lower_value(), rhs_upper = rhs.get_upper_value(); + const auto &lhs_lower = lhs.get_lower_value(), &lhs_upper = lhs.get_upper_value(); + const auto &rhs_lower = rhs.get_lower_value(), &rhs_upper = rhs.get_upper_value(); // check for intersection: // ll <= rl <= lu or ll <= ru <= lu - auto rl_check = equal::within_interval(lhs_lower, lhs_upper, rhs_lower); - auto ru_check = equal::within_interval(lhs_lower, lhs_upper, rhs_upper); + const auto rl_check = equal::within_interval(lhs_lower, lhs_upper, rhs_lower); + const auto ru_check = equal::within_interval(lhs_lower, lhs_upper, rhs_upper); output_values[0] = equal::or_tensor(rl_check, ru_check); return true; } -bool op::v1::Equal::has_evaluate() const { +bool Equal::has_evaluate() const { OV_OP_SCOPE(v1_Equal_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i8: - case ngraph::element::u8: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::bf16: + case element::f16: + case element::f32: + case element::f64: + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; -} - -bool op::v1::Equal::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v1_Equal_visit_attributes); - BinaryElementwiseComparison::visit_attributes(visitor); - return true; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/tests/type_prop/broadcast.cpp b/src/core/tests/type_prop/broadcast.cpp index 023f8fa9505f0a..04f93d08b798e6 100644 --- a/src/core/tests/type_prop/broadcast.cpp +++ b/src/core/tests/type_prop/broadcast.cpp @@ -9,7 +9,6 @@ #include "common_test_utils/test_assertions.hpp" #include "common_test_utils/type_prop.hpp" #include "openvino/core/dimension_tracker.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/equal.hpp" @@ -18,6 +17,7 @@ #include "openvino/op/shape_of.hpp" #include "openvino/op/unsqueeze.hpp" #include "openvino/op/util/attr_types.hpp" +#include "validation_util.hpp" using namespace std; using namespace testing; @@ -1303,24 +1303,22 @@ TEST(type_prop, broadcast_v3_bidirectional_tricky_partial_value_case_and_equal_p auto broadcast_a = make_shared(a, select, "BIDIRECTIONAL"); const auto out_shape = broadcast_a->get_output_partial_shape(0); - OPENVINO_SUPPRESS_DEPRECATED_START EXPECT_EQ(out_shape, expected_shape); { - auto constant = ov::get_constant_from_source(equal->output(0)); - EXPECT_TRUE(constant != nullptr); + auto constant = ov::util::get_constant_from_source(equal->output(0)); + ASSERT_TRUE(constant != nullptr); std::vector expected{false, false, false}, calculated = constant->get_vector(); EXPECT_EQ(calculated, expected); } { equal = make_shared(concat, ov::op::v0::Constant::create(ov::element::i64, {3}, {5, 1, 4})); - EXPECT_TRUE(ov::get_constant_from_source(equal->output(0)) == nullptr); + EXPECT_TRUE(ov::util::get_constant_from_source(equal->output(0)) == nullptr); } { equal = make_shared(concat, ov::op::v0::Constant::create(ov::element::i64, {3}, {11, 1, 4})); - auto constant = ov::get_constant_from_source(equal->output(0)); - EXPECT_TRUE(constant != nullptr); + auto constant = ov::util::get_constant_from_source(equal->output(0)); + ASSERT_TRUE(constant != nullptr); std::vector expected{false, true, true}, calculated = constant->get_vector(); EXPECT_EQ(calculated, expected); } - OPENVINO_SUPPRESS_DEPRECATED_END } From b06a0010ea1ddddefe544b58e4c746c5e84e2830 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 27 Oct 2023 05:54:30 +0200 Subject: [PATCH 087/275] [PT FE] Disable failing pytorch layer test (#20719) * [PT FE] Disable test * Update tests/layer_tests/pytorch_tests/test_convnd.py --- tests/layer_tests/pytorch_tests/test_convnd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/layer_tests/pytorch_tests/test_convnd.py b/tests/layer_tests/pytorch_tests/test_convnd.py index 8b46b2992d2c07..8ea83a5aae5dc1 100644 --- a/tests/layer_tests/pytorch_tests/test_convnd.py +++ b/tests/layer_tests/pytorch_tests/test_convnd.py @@ -216,6 +216,7 @@ def forward(self, x, y): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(reason="ticket 123727") def test_conv2d(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, freeze_model=True, dynamic_shapes=False) From 14d51de93c92d53c69087d01fe5f1fffc899426b Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Fri, 27 Oct 2023 07:24:07 +0200 Subject: [PATCH 088/275] [ONNX] Add support for standard ONNX GroupNormalization (#20694) * Add support for ONNX GroupNormalization * Enable onnx compliance tests * Add f to the test values to avoid cast warning * Use tolerance as fp instead of bits --- .../test_onnx/test_backend.py | 2 - .../frontend/src/op/group_normalization.cpp | 46 +++++++++ .../frontend/src/op/group_normalization.hpp | 23 +++++ .../onnx/frontend/src/ops_bridge.cpp | 2 + .../models/group_normalization_2grp.prototxt | 91 ++++++++++++++++++ .../models/group_normalization_3grp.prototxt | 91 ++++++++++++++++++ .../group_normalization_custom_eps.prototxt | 96 +++++++++++++++++++ src/frontends/onnx/tests/onnx_import.in.cpp | 78 +++++++++++++++ .../onnx/tests/tests_python/test_backend.py | 2 - 9 files changed, 427 insertions(+), 4 deletions(-) create mode 100644 src/frontends/onnx/frontend/src/op/group_normalization.cpp create mode 100644 src/frontends/onnx/frontend/src/op/group_normalization.hpp create mode 100644 src/frontends/onnx/tests/models/group_normalization_2grp.prototxt create mode 100644 src/frontends/onnx/tests/models/group_normalization_3grp.prototxt create mode 100644 src/frontends/onnx/tests/models/group_normalization_custom_eps.prototxt diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py index 87f53223c2d672..c1ad04a6fe44a5 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py @@ -565,9 +565,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None ), ( xfail_issue_99955, - "OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu", "OnnxBackendNodeModelTest.test_group_normalization_epsilon_expanded_cpu", - "OnnxBackendNodeModelTest.test_group_normalization_example_cpu", "OnnxBackendNodeModelTest.test_group_normalization_example_expanded_cpu", ), ( diff --git a/src/frontends/onnx/frontend/src/op/group_normalization.cpp b/src/frontends/onnx/frontend/src/op/group_normalization.cpp new file mode 100644 index 00000000000000..d0f32a89ae4515 --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/group_normalization.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op/group_normalization.hpp" + +#include "default_opset.hpp" + +OPENVINO_SUPPRESS_DEPRECATED_START +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector group_normalization(const Node& node) { + const auto data = node.get_ng_inputs().at(0); // Shape [N, C, ...] + auto scale = node.get_ng_inputs().at(1); // Shape [num_groups] + auto bias = node.get_ng_inputs().at(2); // Shape [num_groups] + + auto eps = node.get_attribute_value("epsilon", 1e-05f); + auto num_groups = node.get_attribute_value("num_groups"); + + auto zero = default_opset::Constant::create(element::i64, Shape{1}, {0}); + auto one = default_opset::Constant::create(element::i64, Shape{1}, {1}); + auto c_dim = std::make_shared(std::make_shared(data), one, zero); + auto g_dim = default_opset::Constant::create(element::i64, Shape{1}, {num_groups}); + + auto c_g_div = std::make_shared(c_dim, g_dim); + + // Adjust scale and bias shape, [G] -> [G, C/G] -> [C] + scale = std::make_shared(scale, one); + auto broadcast_scale = + std::make_shared(scale, c_g_div, ov::op::BroadcastType::BIDIRECTIONAL); + auto c_scale = std::make_shared(broadcast_scale, c_dim, false); + + bias = std::make_shared(bias, one); + auto broadcast_bias = + std::make_shared(bias, c_g_div, ov::op::BroadcastType::BIDIRECTIONAL); + auto c_bias = std::make_shared(broadcast_bias, c_dim, false); + + return {std::make_shared(data, c_scale, c_bias, num_groups, eps)}; +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END diff --git a/src/frontends/onnx/frontend/src/op/group_normalization.hpp b/src/frontends/onnx/frontend/src/op/group_normalization.hpp new file mode 100644 index 00000000000000..fbd38d3667d4dd --- /dev/null +++ b/src/frontends/onnx/frontend/src/op/group_normalization.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/deprecated.hpp" +OPENVINO_SUPPRESS_DEPRECATED_START + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector group_normalization(const Node& node); + +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph +OPENVINO_SUPPRESS_DEPRECATED_END diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp index c4d9a50c4ca637..31ca0b20836de5 100644 --- a/src/frontends/onnx/frontend/src/ops_bridge.cpp +++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp @@ -75,6 +75,7 @@ #include "op/global_max_pool.hpp" #include "op/greater.hpp" #include "op/grid_sample.hpp" +#include "op/group_normalization.hpp" #include "op/gru.hpp" #include "op/hammingwindow.hpp" #include "op/hannwindow.hpp" @@ -395,6 +396,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("GlobalMaxPool", 1, global_max_pool); REGISTER_OPERATOR("Greater", 1, greater); REGISTER_OPERATOR("GridSample", 1, grid_sample); + REGISTER_OPERATOR("GroupNormalization", 1, group_normalization); REGISTER_OPERATOR("GRU", 1, gru); REGISTER_OPERATOR("HannWindow", 1, hannwindow); REGISTER_OPERATOR("HammingWindow", 1, hammingwindow); diff --git a/src/frontends/onnx/tests/models/group_normalization_2grp.prototxt b/src/frontends/onnx/tests/models/group_normalization_2grp.prototxt new file mode 100644 index 00000000000000..978ab918a2c521 --- /dev/null +++ b/src/frontends/onnx/tests/models/group_normalization_2grp.prototxt @@ -0,0 +1,91 @@ +ir_version: 8 +producer_name: "onnx-frontend-test" +graph { + node { + input: "x" + input: "scale" + input: "bias" + output: "Y" + op_type: "GroupNormalization" + attribute { + name: "num_groups" + i: 2 + type: INT + } + } + name: "test-model-group-normalization" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "scale" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 18 +} diff --git a/src/frontends/onnx/tests/models/group_normalization_3grp.prototxt b/src/frontends/onnx/tests/models/group_normalization_3grp.prototxt new file mode 100644 index 00000000000000..1711e41bd5d48f --- /dev/null +++ b/src/frontends/onnx/tests/models/group_normalization_3grp.prototxt @@ -0,0 +1,91 @@ +ir_version: 9 +opset_import { + domain: "" + version: 18 +} +producer_name: "onnx-frontend-test" +graph { + node { + input: "x" + input: "scale" + input: "bias" + output: "Y" + op_type: "GroupNormalization" + attribute { + name: "num_groups" + type: INT + i: 3 + } + } + name: "test-model-group-normalization" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 6 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "scale" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 6 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + } + } + } + } +} diff --git a/src/frontends/onnx/tests/models/group_normalization_custom_eps.prototxt b/src/frontends/onnx/tests/models/group_normalization_custom_eps.prototxt new file mode 100644 index 00000000000000..083b5d8ecf5d0e --- /dev/null +++ b/src/frontends/onnx/tests/models/group_normalization_custom_eps.prototxt @@ -0,0 +1,96 @@ +ir_version: 9 +opset_import { + domain: "" + version: 18 +} +producer_name: "onnx-frontend-test" +graph { + node { + input: "x" + input: "scale" + input: "bias" + output: "Y" + op_type: "GroupNormalization" + attribute { + name: "epsilon" + type: FLOAT + f: 0.0001 + } + attribute { + name: "num_groups" + type: INT + i: 3 + } + } + name: "test-model-group-normalization" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 6 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "scale" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 6 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + } + } + } + } +} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index 361805e45cf0d4..2ac1dc6d464567 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -6884,3 +6884,81 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_hannwindow_symmetric) { test_case.run_with_tolerance_as_fp(0.01f); } } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_group_normalization_3grp_default_eps) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/group_normalization_3grp.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + test_case.add_input( + {-0.2261407f, -1.8793484f, -0.37692875f, 0.8860143f, 0.05993791f, -0.7634332f, 0.61080337f, 0.09776749f, + 0.5835062f, -0.32338685f, -0.23485906f, -0.04752525f, 2.4905143f, -0.11199934f, -0.20539412f, -2.4455426f, + -0.5437323f, 0.51794696f, -0.44127423f, 0.09666952f, -0.09539367f, -1.962784f, 0.25065672f, 1.5909688f, + 0.927671f, -0.46812922f, 0.2925484f, -1.1766007f, 0.7675745f, -0.94145614f, 1.1552521f, 1.6375796f, + 0.0198675f, -0.45938072f, 0.43037328f, 0.37999842f, -0.45021877f, -0.84925014f, 1.6790043f, -1.0172538f, + 0.0493111f, -0.53391f, -0.08101435f, 0.14738432f, -0.58910686f, 0.51673824f, -1.7001126f, -1.888597f}); + test_case.add_input({2.4556813f, 0.12371606f, 1.5681714f}); + test_case.add_input({0.79260737f, -0.74518913f, 1.370796f}); + + test_case.add_expected_output( + Shape{2, 6, 2, 2}, + {0.70938545f, -4.3442307f, 0.24844825f, 4.109082f, 1.5838864f, -0.93303996f, 3.267802f, 1.6995258f, + -0.6843487f, -0.7732928f, -0.76461035f, -0.7462375f, -0.49731785f, -0.75256085f, -0.7617206f, -0.9814244f, + 0.5922366f, 2.3495553f, 0.76182777f, 1.652246f, 1.3343381f, -1.7566144f, 1.9071295f, 4.1256485f, + 2.4563973f, -1.0979934f, 0.8390641f, -2.9021082f, 2.0487132f, -2.3033152f, 3.03593f, 4.2641716f, + -0.73710674f, -0.80988204f, -0.6747702f, -0.6824198f, -0.8084908f, -0.86908495f, -0.48516175f, -0.8945968f, + 2.4475086f, 1.3245938f, 2.1965842f, 2.6363354f, 1.2183195f, 3.3474774f, -0.92077446f, -1.2836761f}); + + test_case.run_with_tolerance_as_fp(0.000001f); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_group_normalization_3grp_custom_eps) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/group_normalization_custom_eps.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + test_case.add_input( + {1.8079232f, -0.2892469f, 2.0915377f, -1.8837914f, 0.25869793f, 0.80542284f, 2.9528935f, 0.16081251f, + 0.10507602f, -1.7271832f, -1.0217364f, -1.1528395f, -0.69146425f, -2.4292548f, -0.4232518f, 0.33357796f, + -1.4946569f, -0.08947915f, -0.7962127f, 1.3765403f, -0.1947846f, 0.30173305f, 0.08576944f, 0.8134404f, + 0.62960416f, -1.0745901f, -0.27037576f, -0.3607608f, 0.14347585f, 1.4590056f, -1.1309915f, 0.88850766f, + 0.5367185f, -0.7771955f, 0.81048864f, 0.45839247f, 1.0398412f, -0.21019235f, -1.037122f, -0.36852306f, + 2.7608335f, 0.3126114f, 0.336343f, 0.76919895f, 0.58595645f, 0.71894723f, -1.2922621f, -0.542859f}); + test_case.add_input({-0.05215209f, -0.5643389f, -0.6959881f}); + test_case.add_input({1.4327786f, 0.01641126f, -1.471873f}); + + test_case.add_expected_output( + Shape{2, 6, 2, 2}, + {1.3937842f, 1.4702199f, 1.3834473f, 1.5283363f, 1.4502488f, 1.4303224f, 1.3520534f, 1.4538165f, + -0.628196f, 0.5758153f, 0.11225323f, 0.19840352f, -0.10477467f, 1.0371594f, -0.281022f, -0.77834874f, + -0.22489226f, -1.3969909f, -0.8074844f, -2.6198394f, -1.3091526f, -1.7233121f, -1.5431708f, -2.1501417f, + 1.3968898f, 1.4998344f, 1.4512546f, 1.4567144f, 1.4262552f, 1.3467885f, 1.5032414f, 1.3812504f, + -0.36344206f, 0.6759755f, -0.58001745f, -0.30147952f, -0.7614548f, 0.22742787f, 0.8815994f, 0.35268092f, + -2.9372354f, -1.3806448f, -1.3957335f, -1.6709452f, -1.5544388f, -1.6389949f, -0.36025894f, -0.83673286f}); + + test_case.run_with_tolerance_as_fp(0.000001f); +} + +OPENVINO_TEST(${BACKEND_NAME}, onnx_group_normalization_2grp_custom_eps) { + auto function = onnx_import::import_onnx_model(file_util::path_join(ov::test::utils::getExecutableDirectory(), + SERIALIZED_ZOO, + "onnx/group_normalization_2grp.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + test_case.add_input({-0.424049f, 1.7215315f, 1.429421f, 0.52730036f, 2.0628972f, -0.15856522f, + 2.274094f, -1.9989003f, -1.7827071f, -0.87104136f, -1.2995626f, 0.16800839f, + 0.5934625f, 1.553442f, -0.5482905f, 0.6079124f, 0.3598974f, -0.15221423f, + 1.1135519f, -1.2533926f, -1.019778f, -1.9142767f, -1.2984604f, 0.5587884f}); + test_case.add_input({-1.4678609f, -1.8223071f}); + test_case.add_input({1.1155374f, -0.6101201f}); + + test_case.add_expected_output( + Shape{1, 4, 2, 3}, + {1.694167f, -0.51719165f, -0.21612573f, 0.71365166f, -0.86902285f, 1.4205441f, -1.0866947f, 3.3172996f, + 3.0944781f, 2.154863f, 2.5965219f, 1.0839586f, -1.8562672f, -3.540983f, 0.14745194f, -1.8816261f, + -1.4463723f, -0.547642f, -2.768998f, 1.3848708f, 0.97488886f, 2.5446892f, 1.4639623f, -1.7954159f}); + + test_case.run_with_tolerance_as_fp(0.000001f); +} diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index 779444658d1e28..a027f703ba29ce 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -437,9 +437,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None ), ( xfail_issue_99955, - "OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu", "OnnxBackendNodeModelTest.test_group_normalization_epsilon_expanded_cpu", - "OnnxBackendNodeModelTest.test_group_normalization_example_cpu", "OnnxBackendNodeModelTest.test_group_normalization_example_expanded_cpu", ), ( From be25d9038e976056deae307427737be24ab9a7b4 Mon Sep 17 00:00:00 2001 From: River Li Date: Fri, 27 Oct 2023 13:27:53 +0800 Subject: [PATCH 089/275] Fix stride issue for ZeroDims (#20686) * Fix stride issue for ZeroDims * Add test case * Fix ITensor::is_continuous() issue * Fix the same issue in gpu plugin and template plugin --- src/core/src/runtime/itensor.cpp | 3 ++- src/core/tests/tensor.cpp | 10 ++++++++++ src/inference/src/dev/make_tensor.cpp | 2 +- src/plugins/intel_gpu/src/plugin/remote_tensor.cpp | 2 +- src/plugins/template/src/remote_context.cpp | 2 +- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/core/src/runtime/itensor.cpp b/src/core/src/runtime/itensor.cpp index 6d966566c65610..2b3a6d49b84947 100644 --- a/src/core/src/runtime/itensor.cpp +++ b/src/core/src/runtime/itensor.cpp @@ -25,9 +25,10 @@ size_t ITensor::get_byte_size() const { } bool ITensor::is_continuous() const { - if (get_element_type().bitwidth() < 8) + if ((get_element_type().bitwidth() < 8) || get_size() == 0) { // OpenVINO doesn't support strides for lp types return true; + } const auto& shape = get_shape(); const auto& type = get_element_type(); std::vector strides(shape.size()); diff --git a/src/core/tests/tensor.cpp b/src/core/tests/tensor.cpp index 3fb0c259fc0c0d..361e45e8a570ce 100644 --- a/src/core/tests/tensor.cpp +++ b/src/core/tests/tensor.cpp @@ -52,3 +52,13 @@ TEST(tensor, wrap_tensor_with_unspecified_type_from_host_tensor) { // !tensor means that the tensor is not initialized EXPECT_EQ(!tensor, true); } + +TEST(tensor, create_tensor_with_zero_dims_check_stride) { + ov::Shape shape = {0, 0, 0, 0}; + auto tensor = ov::Tensor(element::f32, shape); + EXPECT_EQ(!!tensor, true); + auto stride = tensor.get_strides(); + EXPECT_EQ(stride.size(), shape.size()); + EXPECT_EQ(stride.back(), 0); + EXPECT_EQ(tensor.is_continuous(), true); +} diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp index 1d23c62f86d957..2c0f33b352bcf6 100644 --- a/src/inference/src/dev/make_tensor.cpp +++ b/src/inference/src/dev/make_tensor.cpp @@ -77,7 +77,7 @@ class ViewTensor : public ITensor { auto& shape = get_shape(); if (m_strides.empty() && !shape.empty()) { m_strides.resize(shape.size()); - m_strides.back() = m_element_type.size(); + m_strides.back() = shape.back() == 0 ? 0 : m_element_type.size(); std::transform(shape.crbegin(), shape.crend() - 1, m_strides.rbegin(), diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp index cd1011ea153bfe..9bf19f8df50535 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp @@ -63,7 +63,7 @@ void RemoteTensorImpl::update_strides() { m_strides.clear(); if (!shape.empty()) { m_strides.resize(shape.size()); - m_strides.back() = m_element_type.size(); + m_strides.back() = shape.back() == 0 ? 0 : m_element_type.size(); std::copy(shape.rbegin(), shape.rend() - 1, m_strides.rbegin() + 1); std::partial_sum(m_strides.rbegin(), m_strides.rend(), m_strides.rbegin(), std::multiplies()); } diff --git a/src/plugins/template/src/remote_context.cpp b/src/plugins/template/src/remote_context.cpp index 2003cdf48ed93f..51d4f7a0211d77 100644 --- a/src/plugins/template/src/remote_context.cpp +++ b/src/plugins/template/src/remote_context.cpp @@ -26,7 +26,7 @@ class VectorTensorImpl : public ov::IRemoteTensor { m_strides.clear(); if (!shape.empty()) { m_strides.resize(shape.size()); - m_strides.back() = m_element_type.size(); + m_strides.back() = shape.back() == 0 ? 0 : m_element_type.size(); std::copy(shape.rbegin(), shape.rend() - 1, m_strides.rbegin() + 1); std::partial_sum(m_strides.rbegin(), m_strides.rend(), m_strides.rbegin(), std::multiplies()); } From cc10b14bab35f756f95b0013203cc952e2e79c5e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 27 Oct 2023 09:58:49 +0400 Subject: [PATCH 090/275] [GPU] Share compilation context with sub-networks (#20706) --- .../include/intel_gpu/graph/program.hpp | 14 +++++++-- .../intel_gpu/plugin/program_builder.hpp | 7 ++++- .../runtime}/compilation_context.hpp | 4 +-- .../src/graph/compilation_context.cpp | 6 ++-- src/plugins/intel_gpu/src/graph/network.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 10 ++++-- src/plugins/intel_gpu/src/graph/program.cpp | 31 +++++++++++++++---- .../intel_gpu/src/plugin/ops/condition.cpp | 2 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 2 +- .../intel_gpu/src/plugin/program_builder.cpp | 19 ++++++++++-- .../kernel_impl_params_relevance_test.cpp | 2 +- .../test_cases/fully_connected_gpu_test.cpp | 2 +- .../tests/unit/test_cases/gemm_gpu_test.cpp | 2 +- .../group_normalization_gpu_test.cpp | 2 +- 14 files changed, 80 insertions(+), 25 deletions(-) rename src/plugins/intel_gpu/{src/graph/include => include/intel_gpu/runtime}/compilation_context.hpp (83%) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 1a196ea49e8e95..a67bbf1570ff13 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -131,6 +131,7 @@ struct program { topology const& topology, const ExecutionConfig& config, std::shared_ptr task_executor, + std::shared_ptr compilation_context, bool is_internal = false, bool no_optimizations = false, bool is_body_program = false); @@ -251,6 +252,14 @@ struct program { bool is_internal = false, bool no_optimizations = false, bool is_body_program = false); + static ptr build_program(engine& engine, + const topology& topology, + const ExecutionConfig& config, + std::shared_ptr task_executor, + std::shared_ptr compilation_context, + bool is_internal = false, + bool no_optimizations = false, + bool is_body_program = false); static ptr build_program(engine& engine, const std::set>& nodes, const ExecutionConfig& config, @@ -266,9 +275,11 @@ struct program { ImplementationsCache& get_implementations_cache() const { return *_impls_cache; } ICompilationContext& get_compilation_context() const { return *_compilation_context; } + std::shared_ptr get_compilation_context_ptr() const { return _compilation_context; } void cancel_compilation_context(); static std::shared_ptr make_task_executor(const ExecutionConfig& config); + static std::shared_ptr make_compilation_context(const ExecutionConfig& config); private: uint32_t prog_id = 0; @@ -286,8 +297,7 @@ struct program { bool is_body_program; std::unique_ptr _impls_cache; const size_t _impls_cache_capacity = 10000; - const int _num_async_build_threads = 1; - std::unique_ptr _compilation_context; + std::shared_ptr _compilation_context; std::map> nodes_map; std::list optimized_out; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 22864106fb39f5..422451d096729b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -10,6 +10,7 @@ #include "intel_gpu/plugin/custom_layer.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/execution_config.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" #include "intel_gpu/graph/topology.hpp" #include "intel_gpu/graph/program.hpp" @@ -75,7 +76,9 @@ class ProgramBuilder final { public: ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool createTopologyOnly = false, bool partialBuild = false, - std::shared_ptr task_executor = nullptr, bool innerProgram = false); + std::shared_ptr task_executor = nullptr, + std::shared_ptr compilation_context = nullptr, + bool innerProgram = false); ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config); static const cldnn::primitive_id m_preProcessTag; @@ -136,6 +139,7 @@ class ProgramBuilder final { bool requires_new_shape_infer(const ov::Node& op) const; std::shared_ptr get_task_executor() const { return m_task_executor; } + std::shared_ptr get_compilation_context() const { return m_compilation_context; } private: static factories_map_t factories_map; @@ -153,6 +157,7 @@ class ProgramBuilder final { bool queryMode; std::shared_ptr m_task_executor; + std::shared_ptr m_compilation_context; void EnableQueryMode() { queryMode = true; } void DisableQueryMode() { queryMode = false; } diff --git a/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/compilation_context.hpp similarity index 83% rename from src/plugins/intel_gpu/src/graph/include/compilation_context.hpp rename to src/plugins/intel_gpu/include/intel_gpu/runtime/compilation_context.hpp index be8d65c6aa5ecc..f664e728680b62 100644 --- a/src/plugins/intel_gpu/src/graph/include/compilation_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/compilation_context.hpp @@ -4,10 +4,10 @@ #pragma once -#include "openvino/runtime/threading/cpu_streams_executor.hpp" #include #include #include "intel_gpu/graph/kernel_impl_params.hpp" +#include "openvino/runtime/threading/istreams_executor.hpp" namespace cldnn { @@ -21,7 +21,7 @@ class ICompilationContext { virtual void cancel() = 0; virtual void wait_all() = 0; - static std::unique_ptr create(ov::threading::IStreamsExecutor::Config task_executor_config); + static std::shared_ptr create(ov::threading::IStreamsExecutor::Config task_executor_config); }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/compilation_context.cpp b/src/plugins/intel_gpu/src/graph/compilation_context.cpp index c1f483200c9a38..df2fad3412286b 100644 --- a/src/plugins/intel_gpu/src/graph/compilation_context.cpp +++ b/src/plugins/intel_gpu/src/graph/compilation_context.cpp @@ -2,12 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "compilation_context.hpp" #include #include #include #include #include "intel_gpu/runtime/utils.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" + +#include "openvino/runtime/threading/cpu_streams_executor.hpp" namespace cldnn { class CompilationContext : public ICompilationContext { @@ -83,7 +85,7 @@ class CompilationContext : public ICompilationContext { std::vector> futures; }; -std::unique_ptr ICompilationContext::create(ov::threading::IStreamsExecutor::Config task_executor_config) { +std::shared_ptr ICompilationContext::create(ov::threading::IStreamsExecutor::Config task_executor_config) { return cldnn::make_unique(task_executor_config); } diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 240db96d5b4988..c8a081dadbc45f 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -13,6 +13,7 @@ #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/event.hpp" #include "intel_gpu/runtime/stream.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/itt.hpp" @@ -34,7 +35,6 @@ #include "program_helpers.h" #include "to_string_utils.h" #include "kernels_cache.hpp" -#include "compilation_context.hpp" // TODO: Remove once we have an abstraction for kernels_cache #include "kernel_base.h" diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index a81d0bd10ad58d..58ecac8e776b39 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -25,7 +25,6 @@ #include "read_value_inst.h" #include "condition_inst.h" #include "experimental_detectron_roi_feature_extractor_inst.hpp" -#include "compilation_context.hpp" #include "implementation_map.hpp" #include "graph_optimizer/prepare_buffer_fusing.h" @@ -36,6 +35,7 @@ #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/error_handler.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" #include "json_object.h" #include @@ -1502,7 +1502,13 @@ cldnn::network::ptr primitive_inst::get_unfused_subgraph() { ov::intel_gpu::allow_static_input_reorder(true), ov::intel_gpu::allow_new_shape_infer(true) }; - auto prog = program::build_program(get_network().get_engine(), t, subgraph_config, get_network().get_program()->get_task_executor(), true, false); + auto prog = program::build_program(get_network().get_engine(), + t, + subgraph_config, + get_network().get_program()->get_task_executor(), + get_network().get_program()->get_compilation_context_ptr(), + true, + false); _unfused_subgraph = network::allocate_network(get_network().get_stream_ptr(), prog, true, get_network().is_primary_stream()); } diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 59af7125f9e4dc..dde29dc1e32504 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -8,6 +8,7 @@ #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" #include "intel_gpu/graph/program.hpp" #include "auto_tuner.h" @@ -17,7 +18,6 @@ #include "program_dump_graph.h" #include "sliding_window_utils.hpp" #include "program_helpers.h" -#include "compilation_context.hpp" #include "matrix_nms_inst.h" #include "roi_pooling_inst.h" @@ -145,10 +145,17 @@ std::shared_ptr program::make_task_executor(con return std::make_shared(task_executor_config); } +std::shared_ptr program::make_compilation_context(const ExecutionConfig& config) { + const int _num_async_build_threads = 1; + return ICompilationContext::create(make_task_executor_config(config, + "Task executor config for CompilationContext in GPU plugin", _num_async_build_threads)); +} + program::program(engine& engine_ref, topology const& topology, const ExecutionConfig& config, std::shared_ptr task_executor, + std::shared_ptr compilation_context, bool is_internal, bool no_optimizations, bool is_body_program) @@ -158,7 +165,8 @@ program::program(engine& engine_ref, _task_executor(std::move(task_executor)), processing_order(), is_internal(is_internal), - is_body_program(is_body_program) { + is_body_program(is_body_program), + _compilation_context(compilation_context) { _config.apply_user_properties(_engine.get_device_info()); init_primitives(); GPU_DEBUG_INFO << "Program config\n" << config.to_string(); @@ -214,8 +222,8 @@ void program::init_program() { _kernels_cache = std::unique_ptr(new kernels_cache(_engine, _config, prog_id, _task_executor, kernel_selector::KernelBase::get_db().get_batch_header_str())); - _compilation_context = ICompilationContext::create(make_task_executor_config(_config, - "Task executor config for CompilationContext in GPU plugin", _num_async_build_threads)); + if (!_compilation_context) + _compilation_context = program::make_compilation_context(_config); _impls_cache = cldnn::make_unique(_impls_cache_capacity); // Remove items of compilation context's internal queue when some impl is popped in kernels_cache @@ -253,7 +261,18 @@ program::ptr program::build_program(engine& engine, bool is_internal, bool no_optimizations, bool is_body_program) { - return std::make_shared(engine, topology, config, task_executor, is_internal, no_optimizations, is_body_program); + return std::make_shared(engine, topology, config, task_executor, nullptr, is_internal, no_optimizations, is_body_program); +} + +program::ptr program::build_program(engine& engine, + const topology& topology, + const ExecutionConfig& config, + std::shared_ptr task_executor, + std::shared_ptr compilation_context, + bool is_internal, + bool no_optimizations, + bool is_body_program) { + return std::make_shared(engine, topology, config, task_executor, compilation_context, is_internal, no_optimizations, is_body_program); } program::ptr program::build_program(engine& engine, @@ -262,7 +281,7 @@ program::ptr program::build_program(engine& engine, bool is_internal, bool no_optimizations, bool is_body_program) { - return std::make_shared(engine, topology, config, nullptr, is_internal, no_optimizations, is_body_program); + return std::make_shared(engine, topology, config, nullptr, nullptr, is_internal, no_optimizations, is_body_program); } program::ptr program::build_program(engine& engine, diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index c25726f673a2f8..7d47d1127fe57d 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -31,7 +31,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic())); - ProgramBuilder prog(internal_body, p.get_engine(), config, false, false, p.get_task_executor(), true); + ProgramBuilder prog(internal_body, p.get_engine(), config, false, false, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 628b0d7c37d9aa..af93885a5d949c 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -280,7 +280,7 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool create_topology_only, bool partial_build, - std::shared_ptr task_executor, bool is_inner_program) + std::shared_ptr task_executor, + std::shared_ptr compilation_context, + bool is_inner_program) : m_config(config) , m_engine(engine) , queryMode(false) - , m_task_executor(task_executor) { + , m_task_executor(task_executor) + , m_compilation_context(compilation_context) { if (m_task_executor == nullptr) m_task_executor = cldnn::program::make_task_executor(m_config); + if (m_compilation_context == nullptr) { + m_compilation_context = cldnn::program::make_compilation_context(m_config); + } // locate global custom kernel config // and auto-load kernels from it #ifdef _WIN32 @@ -158,7 +164,14 @@ std::shared_ptr ProgramBuilder::build(const std::vector #include #include +#include "intel_gpu/runtime/compilation_context.hpp" #include "fully_connected_inst.h" -#include "compilation_context.hpp" #include "program_wrapper.h" diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index dc23440c48af67..2f684a40f7f5ec 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -14,7 +14,7 @@ #include #include -#include "compilation_context.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" #include "fully_connected_inst.h" #include diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index a90edc00a2db98..247453944e3a4a 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -10,7 +10,7 @@ #include #include "openvino/reference/matmul.hpp" -#include "compilation_context.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" #include "gemm_inst.h" #include diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/group_normalization_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/group_normalization_gpu_test.cpp index a13c1d1550882f..ed52f276fa5960 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/group_normalization_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/group_normalization_gpu_test.cpp @@ -7,7 +7,7 @@ #include #include #include "openvino/reference/group_normalization.hpp" -#include "compilation_context.hpp" +#include "intel_gpu/runtime/compilation_context.hpp" using namespace cldnn; From fd88a6b4551da5fdb17064037b400dc130379f6f Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 27 Oct 2023 10:06:07 +0400 Subject: [PATCH 091/275] [Snippets] Added Transpose support to SplitDimensionM (#20013) --- .../snippets/lowered/port_descriptor.hpp | 10 + .../snippets/include/snippets/op/subgraph.hpp | 2 +- .../snippets/pass/common_optimizations.hpp | 20 +- .../snippets/pass/extract_constants.hpp | 29 ++ .../pass/extract_unsupported_transposes.hpp | 29 ++ .../snippets/pass/fuse_transpose_brgemm.hpp | 9 +- .../{pass_manager.hpp => pass/manager.hpp} | 3 + .../snippets/pass/mha_tokenization.hpp | 3 + .../snippets/pass/split_dimension_m.hpp | 44 +++ .../snippets/pass/subgraph_manager.hpp | 49 +++ .../include/snippets/pass/subgraph_pass.hpp | 45 +++ .../include/snippets/pass/tokenization.hpp | 13 +- .../snippets/pass/transpose_decomposition.hpp | 4 +- .../snippets/include/snippets/utils.hpp | 86 +++- src/common/snippets/src/lowered/linear_ir.cpp | 6 +- .../snippets/src/lowered/loop_manager.cpp | 5 +- .../snippets/src/lowered/pass/init_loops.cpp | 23 +- .../src/lowered/pass/insert_buffers.cpp | 2 +- .../src/lowered/pass/optimize_domain.cpp | 2 +- src/common/snippets/src/op/brgemm.cpp | 2 +- src/common/snippets/src/op/load.cpp | 1 - src/common/snippets/src/op/subgraph.cpp | 4 +- .../snippets/src/pass/collapse_subgraph.cpp | 4 +- .../src/pass/common_optimizations.cpp | 369 +----------------- .../snippets/src/pass/extract_constants.cpp | 42 ++ .../pass/extract_unsupported_transposes.cpp | 57 +++ .../src/pass/fuse_transpose_brgemm.cpp | 31 +- .../{pass_manager.cpp => pass/manager.cpp} | 7 +- .../snippets/src/pass/mha_tokenization.cpp | 61 ++- .../snippets/src/pass/split_dimension_m.cpp | 275 +++++++++++++ .../snippets/src/pass/subgraph_manager.cpp | 21 + .../src/pass/transpose_decomposition.cpp | 19 +- src/common/snippets/src/utils.cpp | 113 +++--- .../snippets/tests/include/lowering_utils.hpp | 2 +- .../tests/src/pass/mha_tokenization.cpp | 61 ++- .../emitters/x64/jit_snippets_emitters.cpp | 112 +++--- .../emitters/x64/jit_snippets_emitters.hpp | 3 + src/plugins/intel_cpu/src/plugin.cpp | 12 +- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 4 +- .../pass/set_brgemm_cpu_blocking_params.cpp | 4 +- .../transformation_pipeline.cpp | 18 +- .../shared_tests_instances/snippets/mha.cpp | 90 ++++- .../snippets/transpose.cpp | 23 +- .../mul_add_to_fma.cpp | 2 +- .../plugin/shared/include/snippets/mha.hpp | 5 + .../plugin/shared/src/snippets/mha.cpp | 13 +- .../include/subgraph_mha.hpp | 13 + .../ov_snippets_models/src/subgraph_mha.cpp | 197 +++++++--- 48 files changed, 1305 insertions(+), 644 deletions(-) create mode 100644 src/common/snippets/include/snippets/pass/extract_constants.hpp create mode 100644 src/common/snippets/include/snippets/pass/extract_unsupported_transposes.hpp rename src/common/snippets/include/snippets/{pass_manager.hpp => pass/manager.hpp} (99%) create mode 100644 src/common/snippets/include/snippets/pass/split_dimension_m.hpp create mode 100644 src/common/snippets/include/snippets/pass/subgraph_manager.hpp create mode 100644 src/common/snippets/include/snippets/pass/subgraph_pass.hpp create mode 100644 src/common/snippets/src/pass/extract_constants.cpp create mode 100644 src/common/snippets/src/pass/extract_unsupported_transposes.cpp rename src/common/snippets/src/{pass_manager.cpp => pass/manager.cpp} (97%) create mode 100644 src/common/snippets/src/pass/split_dimension_m.cpp create mode 100644 src/common/snippets/src/pass/subgraph_manager.cpp diff --git a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp index 551ef1907037ab..2c74867d8436d6 100644 --- a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp @@ -65,6 +65,16 @@ class PortDescriptor { VectorDims m_subtensor_shape{}; /// \brief The corresponding abstract/physical register size_t m_reg = 0; + + /// Notes: + /// - `m_tensor_shape` is dense shape which is controlled by expression outputs. + /// It means that the result of data writing of expression outputs should be read using this shape by the next expression inputs. + /// - `m_layout` is the order of data reading or writing by MemoryAccess ops. Note that only MemoryAccess ops may have `m_layout`. + /// For other expressions this order parameter is simply ignored for now. + /// if it's input port of MemoryAccess expression: + /// - `m_layout` shows how the data should be read (by which strides) using m_tensor_shape. + /// If it's output port of MemoryAccess expression: + /// - `m_layout` shows how the data should be written (by which strides) to get m_tensor_shape. }; class PortDescriptorUtils { diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index b17031e2a67d1c..b642bbd7a23ccb 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -10,7 +10,7 @@ #include #include "openvino/op/op.hpp" #include "openvino/core/rt_info.hpp" -#include "snippets/pass_manager.hpp" +#include "snippets/pass/manager.hpp" #include "snippets/shape_inference/shape_inference.hpp" #include "snippets/lowered/pass/pass.hpp" diff --git a/src/common/snippets/include/snippets/pass/common_optimizations.hpp b/src/common/snippets/include/snippets/pass/common_optimizations.hpp index 30ec301eb92c43..aba1ef9fb919df 100644 --- a/src/common/snippets/include/snippets/pass/common_optimizations.hpp +++ b/src/common/snippets/include/snippets/pass/common_optimizations.hpp @@ -5,7 +5,6 @@ #pragma once #include "openvino/pass/graph_rewrite.hpp" -#include "snippets/op/subgraph.hpp" #include "snippets/pass/tokenization.hpp" namespace ov { @@ -13,22 +12,15 @@ namespace snippets { namespace pass { class CommonOptimizations : public ov::pass::MatcherPass { + class SubgraphPass; + class SubgraphManager; + friend class ExtractConstants; + friend class ExtractUnsupportedTransposes; + friend class SplitDimensionM; + public: OPENVINO_RTTI("CommonOptimizations", "0"); CommonOptimizations(const SnippetsTokenization::Config& config = {}); - - // Returns True if parallelism work amount can be increased using SplitDimensionM optimization - static bool CanOptimizeParallelWA(const std::shared_ptr& node, size_t concurrency); - -private: - // Move up Constants which aren't scalars from body to Subgraph and replace them with Parameters inside body - void ExtractConstants(const std::shared_ptr& subgraph); - // Move up unsupported Transposes on Parameter outputs from body - void ExtractUnsupportedTransposes(const std::shared_ptr& subgraph); - // Insert Reshape nodes after and before Parameters and Results in Subgraphs with MatMul inside - // to split dimension M for MatMuls to increase work amount for parallelism - // Note: works only with 3D MHA patterns - void SplitDimensionM(const std::shared_ptr& subgraph, size_t concurrency); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/pass/extract_constants.hpp b/src/common/snippets/include/snippets/pass/extract_constants.hpp new file mode 100644 index 00000000000000..17db3101c95138 --- /dev/null +++ b/src/common/snippets/include/snippets/pass/extract_constants.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "subgraph_pass.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface ExtractConstants + * @brief Moves up Constants which aren't scalars outside of the Subgraph's body and replaces them with Parameters inside body + * @ingroup snippets + */ +class ExtractConstants: public CommonOptimizations::SubgraphPass { +public: + OPENVINO_RTTI("ExtractConstants", "0"); + ExtractConstants() = default; + + bool run_on_subgraph(const std::shared_ptr& subgraph) override; +}; + + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/pass/extract_unsupported_transposes.hpp b/src/common/snippets/include/snippets/pass/extract_unsupported_transposes.hpp new file mode 100644 index 00000000000000..48b1c2fed88ad1 --- /dev/null +++ b/src/common/snippets/include/snippets/pass/extract_unsupported_transposes.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "subgraph_pass.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface ExtractUnsupportedTransposes + * @brief Moves up unsupported Transposes on Parameter outputs from body + * @ingroup snippets + */ +class ExtractUnsupportedTransposes: public CommonOptimizations::SubgraphPass { +public: + OPENVINO_RTTI("ExtractUnsupportedTransposes", "0"); + ExtractUnsupportedTransposes() = default; + + bool run_on_subgraph(const std::shared_ptr& subgraph) override; +}; + + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp b/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp index 69266fc90ffc62..faf320a8d8c7e7 100644 --- a/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp +++ b/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp @@ -18,18 +18,17 @@ namespace pass { /** * @interface FuseTransposeBrgemm * @brief Fuses Transpose with Brgemm node, fusing on both Brgemm inputs and output is supported. Applicable to - * Transposes that don't change the position of the last dimension (since Brgemm supports strided rows i/o), - * but only 0213 Transpose is currently supported. + * Transposes that don't change the position of the last dimension (since Brgemm supports strided rows i/o). + * Supported any Transpose order where last index is equal to [rank - 1] - it means that last dimension isn't moved. * @ingroup snippets */ class FuseTransposeBrgemm: public ov::pass::MatcherPass { public: OPENVINO_RTTI("FuseTransposeBrgemm", "0"); FuseTransposeBrgemm(); - static const std::set> supported_cases; -private: - static bool is_supported_transpose(const Output& transpose_port); + static bool is_supported_transpose(const Output& transpose_out); + static bool is_supported_transpose_order(const std::vector& order); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/pass_manager.hpp b/src/common/snippets/include/snippets/pass/manager.hpp similarity index 99% rename from src/common/snippets/include/snippets/pass_manager.hpp rename to src/common/snippets/include/snippets/pass/manager.hpp index 04d6ad57c9a6e0..d83a102acec313 100644 --- a/src/common/snippets/include/snippets/pass_manager.hpp +++ b/src/common/snippets/include/snippets/pass/manager.hpp @@ -3,15 +3,18 @@ // #pragma once + #include "openvino/pass/manager.hpp" #include "openvino/pass/pass.hpp" #include "openvino/pass/validate.hpp" + #include namespace ov { namespace snippets { namespace pass { + /** * @brief Manager is like ov::pass::Manager, but allows to insert new passes at arbitrary places in the pipeline * @ingroup snippets diff --git a/src/common/snippets/include/snippets/pass/mha_tokenization.hpp b/src/common/snippets/include/snippets/pass/mha_tokenization.hpp index acd887b0f4a2a0..f5d637f1abb15a 100644 --- a/src/common/snippets/include/snippets/pass/mha_tokenization.hpp +++ b/src/common/snippets/include/snippets/pass/mha_tokenization.hpp @@ -43,6 +43,9 @@ class TokenizeMHASnippets: public ov::pass::MatcherPass { public: OPENVINO_RTTI("TokenizeMHASnippets", "0"); TokenizeMHASnippets(const SnippetsTokenization::Config& config = {}); + + static std::vector get_fusion_transpose_order(size_t rank); + static std::vector get_decomposed_transpose_order(size_t rank); static bool is_matmul0_supported(const std::shared_ptr& matmul); }; diff --git a/src/common/snippets/include/snippets/pass/split_dimension_m.hpp b/src/common/snippets/include/snippets/pass/split_dimension_m.hpp new file mode 100644 index 00000000000000..b57841a5e9cf0f --- /dev/null +++ b/src/common/snippets/include/snippets/pass/split_dimension_m.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "subgraph_pass.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface SplitDimensionM + * @brief Inserts Reshape nodes before inputs and after outputs of Subgraphs with MatMul inside + * to split dimension M for MatMuls. It allows to increase work amount for parallelism + * @ingroup snippets + */ +class SplitDimensionM: public CommonOptimizations::SubgraphPass { +public: + OPENVINO_RTTI("SplitDimensionM", "0"); + SplitDimensionM(size_t concurrency) : m_concurrency(concurrency) {} + + bool run_on_subgraph(const std::shared_ptr& subgraph) override; + + // Return True if the MatMul node is supported by this optimization + static bool is_supported_matmul(const std::shared_ptr& node); + // Returns True if parallelism work amount (concurrency) can be increased by this optimization + static bool can_be_optimized(const std::shared_ptr& node, size_t concurrency); + +private: + static std::shared_ptr get_matmul(const std::shared_ptr& subgraph); + static std::pair get_splited_dimensions(size_t batch_dim, size_t m_dim, size_t optimal_parallelism_work_amount); + static bool split(const ov::Shape& shape, size_t optimal_parallelism_work_amount, size_t& batch_m_dim, size_t& new_m_dim); + + void reshape_subgraph(const std::shared_ptr& subgraph, const ov::Shape& shape, size_t batch_m_dim, size_t new_m_dim); + + size_t m_concurrency; +}; + + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/pass/subgraph_manager.hpp b/src/common/snippets/include/snippets/pass/subgraph_manager.hpp new file mode 100644 index 00000000000000..2aeea775987352 --- /dev/null +++ b/src/common/snippets/include/snippets/pass/subgraph_manager.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "snippets/pass/common_optimizations.hpp" + +#include "snippets/pass/subgraph_pass.hpp" +#include "snippets/op/subgraph.hpp" + +namespace ov { +namespace snippets { +namespace pass { +/** + * @brief Manager class allows to manage transformation passes (SubgraphPasses) on Subgraph ops. + * See SubgraphPasses description for more details. + * It's light version of ov::Manager implementation the purpose of which is to change only Subgraph as separate node in model. + * @ingroup snippets + */ +class CommonOptimizations::SubgraphManager { +public: + SubgraphManager() = default; + + /// @brief Register given transformation class type to execution list + /// @return shared_ptr to the transformation instance + template + std::shared_ptr register_pass(Args&&... args) { + static_assert(std::is_base_of::value, "pass not derived from SubgraphPass base"); + auto pass = std::make_shared(std::forward(args)...); + m_pass_list.push_back(std::static_pointer_cast(pass)); + return pass; + } + + /// @brief Runs registered transformations on a given model + /// @param subgraph Input model + /// @return Returns true if the model was changed by transformations, false otherwise. + bool run_passes(std::shared_ptr subgraph); + +protected: + std::vector> m_pass_list; +}; +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/pass/subgraph_pass.hpp b/src/common/snippets/include/snippets/pass/subgraph_pass.hpp new file mode 100644 index 00000000000000..c8d65f0bc536bc --- /dev/null +++ b/src/common/snippets/include/snippets/pass/subgraph_pass.hpp @@ -0,0 +1,45 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "snippets/pass/common_optimizations.hpp" + + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @brief Base class for Subgraph passes. + * The pass runs on `Subgraph` op that allows users to transform + * `Subgraph` as node and `body` of this `Subgraph` as model at the same time. + * These passes may change `Subgraph` as node, its `body` and other ops around `Subgraph` in model. + * To avoid unsafe changes of other ops in model, SubgraphPass is not derived from ov::Pass to avoid + * registration to ov::Model + * @ingroup snippets + */ +class CommonOptimizations::SubgraphPass { +public: + SubgraphPass() = default; + virtual ~SubgraphPass() = default; + + virtual bool run_on_subgraph(const std::shared_ptr& subgraph) = 0; + + void set_name(const std::string& name) { m_name = name; } + std::string get_name() const { return m_name; } + + using type_info_t = DiscreteTypeInfo; + virtual const type_info_t& get_type_info() const = 0; + +private: + std::string m_name; +}; + + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/pass/tokenization.hpp b/src/common/snippets/include/snippets/pass/tokenization.hpp index 9b070fb13c3445..a222bd72ef4f54 100644 --- a/src/common/snippets/include/snippets/pass/tokenization.hpp +++ b/src/common/snippets/include/snippets/pass/tokenization.hpp @@ -51,6 +51,11 @@ class EnumerateNodes : public ov::pass::ModelPass { * 2. MHA tokenization * 3. Common tokenization * 4. Some common transformations for Subgraphs. For example, FakeQuantize decomposition + * Naming policy: + * - During tokenization new Subgraph op takes the name of the last tokenized op. + * It's needed to save output names of model in cases when tokenized op was before model Result. + * - If some transformation (for example, SplitDimensionM) insert new op after Subgraph, + * the op should be called as this Subgraph to save output name. The Subgraph name is updated using suffix "_original". * @ingroup snippets */ class SnippetsTokenization : public ov::pass::ModelPass { @@ -61,9 +66,9 @@ class SnippetsTokenization : public ov::pass::ModelPass { * @ingroup snippets */ struct Config { - Config(size_t concurrency = 1, bool split_m_dimension = true, bool enable_transpose_on_output = true) + Config(size_t concurrency = 1, bool split_m_dimension = true, bool enable_transpose_on_output = true, std::set mha_transpose_ranks = {3, 4}) : concurrency(concurrency), split_m_dimension(split_m_dimension), - mha_token_enable_transpose_on_output(enable_transpose_on_output) {} + mha_token_enable_transpose_on_output(enable_transpose_on_output), mha_supported_transpose_ranks(std::move(mha_transpose_ranks)) {} size_t concurrency = 1; // True if "SplitDimensionM" optimization is enabled. Otherwise, it's disabled. @@ -72,6 +77,10 @@ class SnippetsTokenization : public ov::pass::ModelPass { // Otherwise, it may be fused into Subgraph if possible // TODO [111813]: Remove please when the ticket 111813 is implemented bool mha_token_enable_transpose_on_output = true; + // Set of supported Transpose shape ranks for tokenization in MHATokenization pass. + // Note that in general Snippets support Transpose of any ranks. + // But at the moment Transpose is used only in MHA pattern where 3D and 4D tensors are supported. + std::set mha_supported_transpose_ranks = { 3, 4 }; }; OPENVINO_RTTI("SnippetsTokenization", "0"); diff --git a/src/common/snippets/include/snippets/pass/transpose_decomposition.hpp b/src/common/snippets/include/snippets/pass/transpose_decomposition.hpp index 013a538172ac7e..e9bd1506b93c60 100644 --- a/src/common/snippets/include/snippets/pass/transpose_decomposition.hpp +++ b/src/common/snippets/include/snippets/pass/transpose_decomposition.hpp @@ -20,7 +20,9 @@ class TransposeDecomposition: public ov::pass::MatcherPass { public: OPENVINO_RTTI("TransposeDecomposition", "0"); TransposeDecomposition(); - static const std::set> supported_cases; + + static bool is_supported_transpose(const Output& transpose_out); + static bool is_supported_transpose_order(const std::vector& order); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index d10930125e0ed0..c77eecd8bb15b0 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -25,12 +25,6 @@ inline auto is_scalar_constant(const std::shared_ptr& source_output_no return ov::is_type(source_output_node) && ov::shape_size(source_output_node->get_shape()) == 1; } -ov::PartialShape get_planar_pshape(const Input& out); -ov::PartialShape get_planar_pshape(const Output& out); -ov::PartialShape get_planar_pshape(const ov::PartialShape& shape, const std::vector& layout); -VectorDims pshape_to_vdims(const PartialShape&); -ov::PartialShape vdims_to_pshape(const VectorDims&); - inline auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t { return allocation_rank < 0 ? allocation_rank + static_cast(shape_rank) + 1 : allocation_rank; } @@ -55,11 +49,87 @@ constexpr inline bool implication(bool cause, bool cond) { return !cause || !!cond; } -VectorDims get_planar_vdims(const VectorDims& shape, const std::vector& layout); -VectorDims get_planar_vdims(const snippets::lowered::PortDescriptorPtr& port_desc); +template +inline T div_up(const T a, const U b) { + return static_cast((a + b - 1) / b); +} + +/* ----- Shape `getters` ----- */ +/** + * @brief Returns a dense shape after applying the order. + * It means that the shape dimensions will be reordered in accordance with order indices to produce planar shape + * @param shape preordered (original) partial shape + * @param order order + * @return reordered partial shape: `planar_shape[i]` = `shape[order[i]]` + * Example, shape = [16, 2, 32, 64], order = [2, 0, 1, 3] + * planar_shape = [32, 16, 2, 64] + */ +ov::PartialShape get_planar_pshape(const ov::PartialShape& shape, const std::vector& order); +/** + * @brief Returns original shape before applying the order. + * It means that the shape dimensions have been already reordered in accordance with order indices to produce planar shape + * @param shape planar (ordered) partial shape + * @param order order + * @return preordered partial shape: `shape[i]` = `planar_shape[order[i]]` where `shape` is shape before applying the order. + * Example, shape = [16, 2, 32, 64], order = [2, 0, 1, 3] + * planar_shape = [2, 32, 16, 64] + */ +ov::PartialShape get_preordered_pshape(const ov::PartialShape& shape, const std::vector& order); +/** + * @brief Returns a dense shape of node input. + * It means that the node input shape dimensions will be reordered in accordance with order indices to produce planar shape + * @param in input of node + * @return new reordered partial shape: `planar_shape[i]` = `shape[order[i]]` + */ +ov::PartialShape get_planar_pshape(const Input& in); +/** + * @brief Returns original shape of node output before applying the order. + * It means that the preordered output shape dimensions have been already reordered in accordance with order indices to produce planar shape + * @param out output of node + * @return preordered partial shape: `shape[i]` = `planar_shape[order[i]]` where `shape` is shape before applying the order. + */ +ov::PartialShape get_preordered_pshape(const Output& out); +/** + * @brief Returns a dense shape after applying the order. + * It means that the shape dimensions will be reordered in accordance with order indices to produce planar shape + * @param shape preordered (original) shape + * @param order order + * @return reordered partial shape: `planar_shape[i]` = `shape[order[i]]` + * Example, shape = [16, 2, 32, 64], order = [2, 0, 1, 3] + * planar_shape = [32, 16, 2, 64] + */ +VectorDims get_planar_vdims(const VectorDims& shape, const std::vector& order); +/** + * @brief Returns original shape before applying the order. + * It means that the preordered shape dimensions have been already reordered in accordance with order indices to produce planar shape + * @param shape planar (ordered) shape + * @param order order + * @return preordered shape: `shape[i]` = `planar_shape[order[i]]` where `shape` is shape before applying the order. + * Example, shape = [16, 2, 32, 64], order = [2, 0, 1, 3] + * planar_shape = [2, 32, 16, 64] + */ +VectorDims get_preordered_vdims(const VectorDims& shape, const std::vector& order); +/** + * @brief Returns a dense shape of expression input port. + * It means that the input shape dimensions will be reordered in accordance with order indices to produce planar shape + * @param expr_port input expression port + * @return new reordered partial shape: `planar_shape[i]` = `shape[order[i]]` + */ VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port); +/** + * @brief Returns original shape before applying the order of expression output port. + * It means that the preordered output shape dimensions has been already reordered in accordance with order indices to produce planar shape + * @param out input of node + * @return preordered shape: `shape[i]` = `planar_shape[order[i]]` where `shape` is shape before applying the order. + */ +VectorDims get_preordered_vdims(const snippets::lowered::ExpressionPort& expr_port); + bool is_dynamic_vdims(const VectorDims& shape); +VectorDims pshape_to_vdims(const PartialShape&); +ov::PartialShape vdims_to_pshape(const VectorDims&); +/* --------------------------- */ + } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index adf3894f71b8b7..4e1f730db6c428 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -365,10 +365,10 @@ VectorDims LinearIR::get_master_shape() const { } // Note: Snippets would benefit from a more generic master_shape calculation approach. // It will be implemented in the scope of ROI propagation activity (ticket 120505) - const auto& result_parent = out_exprs[0]->get_input_port_connector(0)->get_source().get_expr(); + const auto& source = out_exprs[0]->get_input_port_connector(0)->get_source(); if (!m_config.m_enable_domain_optimization && out_exprs.size() == 1 && - ov::is_type(result_parent->get_node())) { - master_shape = utils::get_planar_vdims(out_exprs[0]->get_input_port_descriptor(0)); + ov::is_type(source.get_expr()->get_node())) { + master_shape = utils::get_preordered_vdims(source); } else { for (const auto& oe : out_exprs) { const auto& port_desc = oe->get_input_port_descriptor(0); diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 2bef20bb54e9d5..da8da2c2376f1f 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -181,9 +181,8 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, std::vector loop_subtensor; std::vector loop_tensor(loop_depth, 1); for (const auto& exit_point : loop_exit_points) { - const auto& desc = exit_point.get_descriptor_ptr(); - const auto shape = utils::get_planar_vdims(desc); - auto subtensor = desc->get_subtensor(); + const auto shape = utils::get_preordered_vdims(exit_point); + auto subtensor = exit_point.get_descriptor_ptr()->get_subtensor(); if (subtensor.empty()) { subtensor.resize(loop_depth, 1); subtensor[subtensor.size() - 1] = vector_size; diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index 47a77df23401e2..8128ea0253d2a7 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -16,7 +16,7 @@ namespace pass { using LoopPort = LinearIR::LoopManager::LoopPort; namespace { -int64_t get_dim_stride(size_t dim, const std::vector& layout, const std::vector& shape) { +int64_t get_input_stride(size_t dim, const std::vector& layout, const VectorDims& shape) { int64_t stride = 1; for (int i = static_cast(layout.size()) - 1; i >= 0; i--) { if (layout[i] == dim) { @@ -26,6 +26,13 @@ int64_t get_dim_stride(size_t dim, const std::vector& layout, const std: } return stride; } +int64_t get_output_stride(size_t dim, const VectorDims& shape) { + int64_t stride = 1; + for (size_t i = dim + 1; i < shape.size(); ++i) { + stride *= static_cast(shape[i]); + } + return stride; +} } // namespace InitLoops::InitLoops() : Pass() {} @@ -42,7 +49,8 @@ void InitLoops::init_ptr_increments(std::vector& loop_inputs, std::vec const auto& dim = *(layout.rbegin() + dim_idx); // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout if (!(shape[dim] == 1 && work_amount != 1)) { - loop_input.ptr_increment = get_dim_stride(dim, source.get_descriptor_ptr()->get_layout(), shape); + // Input layout shows how we should read data by which order and strides + loop_input.ptr_increment = get_input_stride(dim, source.get_descriptor_ptr()->get_layout(), shape); } } } @@ -54,15 +62,12 @@ void InitLoops::init_ptr_increments(std::vector& loop_inputs, std::vec const auto loop_ids = port->get_expr()->get_loop_ids(); const auto& layout = port->get_descriptor_ptr()->get_layout(); const auto& shape = port->get_descriptor_ptr()->get_shape(); - const auto& dim = *(layout.rbegin() + dim_idx); - // Ticket: 113106 - // WA: the current logic doesn't support the case with transposed output shape for brgemm layer - // but for all existing cases planar layout can be used - std::vector planar(layout.size()); - std::iota(planar.begin(), planar.end(), 0); + const auto original_dim = layout.size() - 1 - dim_idx; + const auto& dim = std::distance(layout.cbegin(), std::find(layout.cbegin(), layout.cend(), original_dim)); // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout if (!(shape[dim] == 1 && work_amount != 1)) { - loop_output.ptr_increment = get_dim_stride(dim, planar, shape); + // Output layout shows how we already written data by which order and strides + loop_output.ptr_increment = get_output_stride(dim, shape); } } } diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index da5ffc11c3169d..aefaca42f4094e 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -37,7 +37,7 @@ ov::Shape compute_allocation_shape(const LinearIR::LoopManagerPtr& loop_manager, const std::vector& parent_loop_ids, const ExpressionPort& expr_port, const int allocation_rank) { - const auto& planar_shape = utils::get_planar_vdims(expr_port); + const auto planar_shape = utils::get_preordered_vdims(expr_port); const size_t rank = allocation_rank >= 0 ? std::min(static_cast(allocation_rank), planar_shape.size()) : planar_shape.size(); ov::Shape allocation_shape(rank); diff --git a/src/common/snippets/src/lowered/pass/optimize_domain.cpp b/src/common/snippets/src/lowered/pass/optimize_domain.cpp index f2d2fd43baf96c..09dadc77efe6e5 100644 --- a/src/common/snippets/src/lowered/pass/optimize_domain.cpp +++ b/src/common/snippets/src/lowered/pass/optimize_domain.cpp @@ -98,7 +98,7 @@ bool OptimizeDomain::run(snippets::lowered::LinearIR& linear_ir) { const ExpressionPtr& shape_producing_expr = blocked_input_shapes ? first_consumer : io_expr; - const auto& shape = utils::get_planar_vdims(shape_producing_expr->get_output_port_descriptor(0)); + const auto& shape = utils::get_preordered_vdims(shape_producing_expr->get_output_port(0)); OPENVINO_ASSERT(std::none_of(shape.begin(), shape.end(), [](size_t d) {return d == snippets::IShapeInferSnippets::DYNAMIC_DIMENSION; }), "OptimizeDomain pass does not support dynamic shapes"); diff --git a/src/common/snippets/src/op/brgemm.cpp b/src/common/snippets/src/op/brgemm.cpp index 5cce5d85c13a82..6ea77e447c449b 100644 --- a/src/common/snippets/src/op/brgemm.cpp +++ b/src/common/snippets/src/op/brgemm.cpp @@ -114,7 +114,7 @@ ov::element::Type Brgemm::get_output_type() const { std::vector Brgemm::get_planar_input_shapes(const std::vector>& inputs) const { OPENVINO_ASSERT(inputs.size() == 2, "Brgemm::get_planar_input_shapes() expects 2 inputs"); - return {utils::get_planar_pshape(inputs[0]), utils::get_planar_pshape(inputs[1]) }; + return { utils::get_planar_pshape(inputs[0]), utils::get_planar_pshape(inputs[1]) }; } ov::PartialShape Brgemm::get_planar_output_shape(const ov::PartialShape& output_shape) const { diff --git a/src/common/snippets/src/op/load.cpp b/src/common/snippets/src/op/load.cpp index 868ed4294e6dab..065372f7a76747 100644 --- a/src/common/snippets/src/op/load.cpp +++ b/src/common/snippets/src/op/load.cpp @@ -79,7 +79,6 @@ IShapeInferSnippets::Result LoadReshape::ShapeInfer::infer(const std::vector @@ -329,7 +329,7 @@ VectorDims Subgraph::infer_master_shape() { const auto& res_input = res->input(0); OPENVINO_ASSERT(res_input.get_partial_shape().is_static(), "Result have dynamic shape in static pipeline"); // We need to account to the shape's layout stored in Output rt_info - const auto& planar_shape = utils::get_planar_pshape(res_input.get_source_output()); + const auto& planar_shape = utils::get_preordered_pshape(res_input.get_source_output()); output_dims.emplace_back(planar_shape.get_shape()); } } diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index 6ed1054adac40c..7ce3d658e56a58 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -79,8 +79,8 @@ auto is_supported_op(const std::shared_ptr &n) -> bool { const auto& order = as_type_ptr(n->get_input_node_shared_ptr(1)); if (order) { const auto order_value = order->cast_vector(); - return (TransposeDecomposition::supported_cases.count(order_value) != 0) || - (is_brgemm_case && FuseTransposeBrgemm::supported_cases.count(order_value) != 0); + return (TransposeDecomposition::is_supported_transpose_order(order_value)) || + (is_brgemm_case && FuseTransposeBrgemm::is_supported_transpose_order(order_value)); } } return false; diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp index 609496cd0265e5..1e10d2dc6dfe6e 100644 --- a/src/common/snippets/src/pass/common_optimizations.cpp +++ b/src/common/snippets/src/pass/common_optimizations.cpp @@ -11,6 +11,10 @@ #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/pass/transform_convert.hpp" #include "snippets/pass/validate.hpp" +#include "snippets/pass/split_dimension_m.hpp" +#include "snippets/pass/extract_constants.hpp" +#include "snippets/pass/extract_unsupported_transposes.hpp" +#include "snippets/pass/subgraph_manager.hpp" #include "snippets/op/subgraph.hpp" #include "snippets/itt.hpp" @@ -21,343 +25,9 @@ namespace ov { namespace snippets { namespace pass { -namespace { -size_t get_lcm(size_t a, size_t b) { - std::function get_gcd; - get_gcd = [&get_gcd](size_t a, size_t b) { - if (b == 0) - return a; - return get_gcd(b, a % b); - }; - return a / get_gcd(a, b) * b; -} - -bool is_supported_matmul_for_split_dim_m_optimization(const std::shared_ptr& node) { - const auto matmul = ov::as_type_ptr(node); - return matmul && !matmul->get_transpose_a() && !matmul->is_dynamic() && node->get_shape().size() == 3; // It's needed only for 3D MHA patterns -} -} // namespace - -bool CommonOptimizations::CanOptimizeParallelWA(const std::shared_ptr& node, size_t concurrency) { - if (!is_supported_matmul_for_split_dim_m_optimization(node)) - return false; - const auto mm_shape = node->get_shape(); - const auto current_parallel_work_amount = - std::accumulate(mm_shape.rbegin() + 2, mm_shape.rend(), size_t(1), std::multiplies()); - const auto dim_M = *(mm_shape.rbegin() + 1); - return (current_parallel_work_amount < concurrency) && - (current_parallel_work_amount * dim_M >= concurrency); -} - -void CommonOptimizations::SplitDimensionM(const std::shared_ptr& subgraph, size_t concurrency) { - // To increase parallelism work in 3D cases for MHA pattern, - // we split 1st dimension (starting from 0th) into 2 new dimensions to get 4D Shapes where - // - 0th and 1st dimensions are used in parallel scheduling, - // - 2nd and 3rd dimensions are used in kernel - // Note: 3D Patterns don't contain Transpose inside so the reshaping is valid - - // It's needed only for MHA patterns. Need to add support for common patterns - if (!subgraph->has_domain_sensitive_ops()) - return; - - const auto& body = subgraph->body_ptr(); - const auto& parameters = body->get_parameters(); - // [107806]: If count of Parameters isn't equal to Subgraph inputs (it's possible case in general), - // we cannot garantee correct extraction since we don't have correct connections between body I/O and Subgraph I/O. - OPENVINO_ASSERT(parameters.size() == subgraph->input_values().size(), - "Failed to extract unsupported transposes: the count of Parameters isn't equal to Subgraph inputs"); - - // Need to find MatMul0 and check output shape - const auto& ops = body->get_ordered_ops(); - const auto mm_it = std::find_if(ops.begin(), ops.end(), - [](const std::shared_ptr& node){ return ov::is_type(node); }); - if (mm_it == ops.end()) - return; - - const auto matmul0 = *mm_it; - if (!is_supported_matmul_for_split_dim_m_optimization(matmul0)) - return; - - auto get_dim_M = [](const ov::Shape& shape) { - return *(shape.rbegin() + 1); - }; - - const auto mm_shape = matmul0->get_shape(); - const auto m_dim = get_dim_M(mm_shape); // M - const auto batch_dim = - std::accumulate(mm_shape.rbegin() + 2, mm_shape.rend(), size_t(1), std::multiplies()); // B (batch) - - // We skip optimization if the current batch is optimal for concurrency - const auto optimal_parallelism_work_amount = concurrency; - if (batch_dim % optimal_parallelism_work_amount == 0) - return; - - size_t batch_m_dim = 1; - size_t new_m_dim = m_dim; - - auto is_optimized = [&](size_t batch_m_dim) { - return batch_m_dim > 1; - }; - - // [ First Step ] - // Need to find optimized dimension splitting: [b1..bk, m, n] -> [b1..bk, batch_m_dim, new_m_dim, n] - // The work amount for parallelism should be divided by max thread count in ideal case - // that all threads have the same full work amount (avoid of thread downtime) - // If it's impossible, we select such values so that as many threads as possible have work (see [ Second Step ]) - // For example, there are 16 threads and shape [6, 512, 32] - // LCM(6, 16) = 48 <- ideal work amount for parallelism - // new_shape [6, 48 / 6, 512 / (48 / 6), 32 ] => [6, 8, 64, 32] - // Each thread has parallelism_work_amount = 6 * 8 / nthrs = 3 - const auto lcm = get_lcm(batch_dim, optimal_parallelism_work_amount); // LCM(b, nthrs) - const auto batch_dim_multiplier = lcm / batch_dim; // LCM(b, nthrs) / b - const auto needed_new_dim = m_dim / batch_dim_multiplier; // m / (LCM(b, nthrs) / b) - needed factors of dimension m - if (batch_dim_multiplier * needed_new_dim == m_dim && is_optimized(batch_dim_multiplier)) { - batch_m_dim = batch_dim_multiplier; - new_m_dim = needed_new_dim; - } else { - // [ Second Step ] - // If we couldn't optimally split on the previous step, try the second step. - // The algorithm finds the more optimal parallelism work amount [batch_dim * batch_m_dim], - // where batch_m_dim is divisor of dimension M. - // The optimal parallelism work amount means the case when as many threads as possible have work - // For example, there are 8 threads and shape [5, 384, 32] - // 768 = [2 x 192] = [3 x 128] = [4 x 96] = [6 x 64] - // - [5, 2, 192, 32] - WA = 10 = 8 + 2 (6 threads calculates once and 2 threads twice) - // - [5, 3, 128, 32] - WA = 15 = 8 + 7 (all threads have 2 kernel except one thread) <- the most optimal case - // - [5, 4, 96, 32] - WA = 20 = 8 x 2 + 4 - // - [5, 6, 64, 32] - WA = 30 = 8 x 3 + 6 - // The most optimal and possible case is [5, 3, 128, 32] - almost all threads executes kernel twice - // Heuristic value for a quick exit from the algorithm. - // The value shows the number of threads in percentages that perform the most equal work - const auto optimal_thread_num_percent = 0.8; - size_t optimal_remainder = 1; - auto get_remainder = [batch_dim, optimal_parallelism_work_amount](const size_t potential_batch_dim) { - return (batch_dim * potential_batch_dim) % optimal_parallelism_work_amount; - }; - - auto update_optimal_params = [&](size_t divisor_0, size_t divisor_1) { - const auto remainder = batch_dim * divisor_0 % optimal_parallelism_work_amount; - if (remainder > optimal_remainder || remainder == 0) { - optimal_remainder = remainder; - batch_m_dim = divisor_0; - new_m_dim = divisor_1; - } - }; - - // Firstly we have shape [batch, 1, m_dim, smth]. - // So at the beginning we have parallel_work_amount = batch x 1 - optimal_remainder = get_remainder(1); - const auto root = std::sqrt(m_dim) + 1; - for (size_t divisor_0 = 2; divisor_0 < root; ++divisor_0) { - const size_t divisor_1 = m_dim / divisor_0; - if (divisor_0 * divisor_1 != m_dim) - continue; - - update_optimal_params(divisor_0, divisor_1); - update_optimal_params(divisor_1, divisor_0); - if ((static_cast(optimal_remainder) / static_cast(optimal_parallelism_work_amount) > optimal_thread_num_percent) || - (optimal_remainder == 0)) { - break; - } - } - } - - OPENVINO_ASSERT(batch_m_dim * new_m_dim == m_dim, "Incorrect dimension M splitting!"); - // nothing to split - if (!is_optimized(batch_m_dim)) - return; - - /***** Reshape insertion *****/ - - // There are two Parameter variants: - // - Parameter on branches for Second input of MatMul - the shape should be only unsqueezed (add just 1) - // - Other Parameters (on First input of MatMuls and between) - the shape should be splitted on M dimension - - bool updated = false; - std::set> reshaped_params; - - auto insert_reshape = [&](const std::shared_ptr& param, const ov::Shape& new_shape) { - const auto index = std::distance(parameters.begin(), std::find(parameters.begin(), parameters.end(), param)); - const auto shape_const = std::make_shared(ov::element::i32, ov::Shape{new_shape.size()}, new_shape); - const auto reshape = std::make_shared(subgraph->input_value(index), shape_const, false); - subgraph->input(index).replace_source_output(reshape); - param->set_partial_shape(new_shape); - reshaped_params.insert(param); - updated = true; - }; - - auto get_updated_shape = [&](const ov::Shape& shape, bool split_m_dim) { - const auto current_m_dim = get_dim_M(shape); - OPENVINO_ASSERT(!split_m_dim || current_m_dim == 1 || current_m_dim == m_dim, "Incorrect shape for splitting!"); - ov::Shape new_shape = shape; - if ((split_m_dim && current_m_dim == 1) || !split_m_dim) { - new_shape.insert((new_shape.rbegin() + 2).base(), 1); - } else { - new_shape.insert((new_shape.rbegin() + 2).base(), batch_m_dim); - *(new_shape.rbegin() + 1) = new_m_dim; - } - OPENVINO_ASSERT(ov::shape_size(new_shape) == ov::shape_size(shape), "Incorrect shape splitting!"); - return new_shape; - }; - - auto reshape_parameter = [&](const std::shared_ptr& node, bool split_m_dim = true) { - const auto param = ov::as_type_ptr(node); - if (!param || reshaped_params.count(param) > 0) - return; - insert_reshape(param, get_updated_shape(param->get_partial_shape().get_shape(), split_m_dim)); - }; - - auto update_matmul_second_branch = [&](const std::shared_ptr& node) { - auto parent = node->get_input_node_shared_ptr(1); - while (!ov::is_type(parent)) { - if (parent->get_input_size() > 1) { - for (const auto& input_source : parent->input_values()) { - reshape_parameter(input_source.get_node_shared_ptr(), false); - } - } - - // [107731]: It's covered my MHA tokenization - parent = parent->get_input_node_shared_ptr(0); - } - reshape_parameter(parent, false); - }; - - // Firstly, Unsqueeze parameters on second branches of MatMuls - for (const auto& op : ops) { - if (ov::is_type(op)) { - update_matmul_second_branch(op); - } - } - - // Secondly, Update All M dimensions for remaining parameters - for (const auto& param : parameters) { - if (reshaped_params.count(param) == 0) - reshape_parameter(param, true); - } - - // Return the previous shape on outputs - for (size_t i = 0; i < subgraph->get_output_size() && updated; ++i) { - const auto output_shape = subgraph->get_output_shape(i); - if (is_scalar(output_shape)) - continue; - - const auto& target_inputs = subgraph->get_output_target_inputs(i); - const auto shape_const = std::make_shared(ov::element::i32, ov::Shape{output_shape.size()}, output_shape); - const auto reshape = std::make_shared(subgraph->output(i), shape_const, false); - // Save output name - const auto original_output = body->get_results()[i]->get_input_node_shared_ptr(0); - const auto original_name = original_output->get_friendly_name(); - reshape->set_friendly_name(original_name); - original_output->set_friendly_name(original_name + "_original"); - - for (const auto& input : target_inputs) { - input.replace_source_output(reshape); - // Result input tensor name was changed, the name has to be restored - if (ov::is_type(input.get_node())) { - input.get_tensor_ptr()->add_names(subgraph->output(i).get_tensor_ptr()->get_names()); - } - } - subgraph->output(i).get_tensor_ptr()->set_names({}); - updated = true; - } - subgraph->set_friendly_name(subgraph->get_friendly_name() + "_original"); - - // Need to update inner Shapes and Softmax Axis - if (updated) { - for (const auto &op : ops) { - if (const auto softmax_v8 = ov::as_type_ptr(op)) { - softmax_v8->set_axis(-1); - } else if (const auto softmax_v1 = ov::as_type_ptr(op)) { - softmax_v1->set_axis(softmax_v1->get_output_partial_shape(0).size()); // since new_shape.size() = old_shape.size() + 1 - } else if (const auto broadcast = ov::as_type_ptr(op)) { - // Broadcast is tokenized only between MatMuls -> Split M dimension - const auto shape_const = ov::as_type_ptr(broadcast->input_value(1).get_node_shared_ptr()); - OPENVINO_ASSERT(shape_const, "SplitDimensionM expects Broadcast with Constant output shape"); - const auto new_shape = get_updated_shape(shape_const->cast_vector(), true); - broadcast->set_argument(1, std::make_shared(shape_const->get_element_type(), ov::Shape{new_shape.size()}, new_shape)); - } - } - subgraph->validate_and_infer_types(); - } -} - -void CommonOptimizations::ExtractConstants(const std::shared_ptr& subgraph) { - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ExtractConstants"); - auto body = subgraph->body_ptr(); - - ParameterVector new_parameters; - OutputVector new_external_inputs = subgraph->input_values(); - - for (auto& op : body->get_ops()) { - auto constant = ov::as_type_ptr(op); - if (!constant || ov::shape_size(constant->get_shape()) == 1ul) - continue; - - const auto child = constant->get_output_target_inputs(0).begin()->get_node()->shared_from_this(); - if (op::Subgraph::constant_input_should_be_inside_body(child)) - continue; - - auto parameter = std::make_shared(constant->get_element_type(), constant->output(0).get_partial_shape()); - parameter->set_friendly_name(constant->get_friendly_name()); - ov::copy_runtime_info(constant, parameter); - constant->output(0).replace(parameter->output(0)); - - new_external_inputs.push_back(constant); - new_parameters.push_back(parameter); - } - - if (new_parameters.size() != 0) { - body->add_parameters(new_parameters); - body->validate_nodes_and_infer_types(); - subgraph->set_arguments(new_external_inputs); - } -} - -void CommonOptimizations::ExtractUnsupportedTransposes(const std::shared_ptr& subgraph) { - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ExtractUnsupportedTransposes"); - const auto& body = subgraph->body_ptr(); - const auto parameters = body->get_parameters(); - // [107806]: If count of Parameters isn't equal to Subgraph inputs, - // we cannot guarantee correct extraction since we don't have correct connections between body I/O and Subgraph I/O. - OPENVINO_ASSERT(parameters.size() == subgraph->input_values().size(), - "Failed to extract unsupported transposes: the count of Parameters isn't equal to Subgraph inputs"); - - bool updated = false; - for (size_t i = 0; i < parameters.size(); ++i) { - const auto& parameter = parameters[i]; - const auto& consumers = parameter->get_output_target_inputs(0); - if (consumers.size() != 1) - continue; - - const auto transpose = ov::as_type_ptr(consumers.begin()->get_node()->shared_from_this()); - if (!transpose) - continue; - - const auto& order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); - if (!order) - continue; - - const auto order_value = order->cast_vector(); - const auto transpose_child = *(transpose->get_output_target_inputs(0).begin()); - const auto is_brgemm_case = ov::is_type(transpose_child.get_node()->shared_from_this()); - // If Transpose is supported (can be decomposed or fused into Brgemm), skip - if ((is_brgemm_case && FuseTransposeBrgemm::supported_cases.count(order_value) != 0) || - (TransposeDecomposition::supported_cases.count(order_value) != 0)) - continue; - - // If the transpose isn't supported - we have to extract it from Subgraph - transpose->set_argument(0, subgraph->input_value(i)); - subgraph->set_argument(i, transpose); - transpose_child.replace_source_output(parameter); - // Update shape - parameter->set_partial_shape(transpose->get_output_partial_shape(0)); - updated = true; - } - - if (updated) { - subgraph->validate_and_infer_types(); - } -} +#define REGISTER_SNIPPETS_PASS(manager, pass, enabled, ...) \ + if (enabled) \ + manager.register_pass(__VA_ARGS__); CommonOptimizations::CommonOptimizations(const SnippetsTokenization::Config& config) { MATCHER_SCOPE(CommonOptimizations); @@ -371,29 +41,24 @@ CommonOptimizations::CommonOptimizations(const SnippetsTokenization::Config& con const auto& body = subgraph->body_ptr(); const auto is_quantized = subgraph->is_quantized(); + const auto is_domain_sensitive = subgraph->has_domain_sensitive_ops(); // Firstly, we should transform all original Converts inside body to ConvertTruncation to save original behavior. // Then if Subgraph contains FakeQuantize we enable specific transformation for quantized subgraphs. ov::pass::Manager manager(get_pass_config()); - manager.register_pass(); - manager.register_pass(); - if (is_quantized) { - manager.register_pass(); - } - manager.register_pass(); + REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::TransformConvertToConvertTruncation, true); + REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::ExplicitTransposeMatMulInputs, is_domain_sensitive); + REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::CommonFakeQuantizeDecomposition, is_quantized); + REGISTER_SNIPPETS_PASS(manager, ov::snippets::pass::SoftmaxReshapeElimination, is_domain_sensitive); manager.run_passes(body); + ov::snippets::pass::CommonOptimizations::SubgraphManager subgraph_manager; // At the moment only non-scalar Constants of FakeQuantize can be inside Subgraph // so we can enable ExtractConstants pass for quantized models - if (is_quantized) { - ExtractConstants(subgraph); - } - // Extract unsupported Transposes from body - if (subgraph->has_domain_sensitive_ops()) { - ExtractUnsupportedTransposes(subgraph); - if (config.split_m_dimension) - SplitDimensionM(subgraph, config.concurrency); - } + REGISTER_SNIPPETS_PASS(subgraph_manager, ov::snippets::pass::ExtractConstants, is_quantized); + REGISTER_SNIPPETS_PASS(subgraph_manager, ov::snippets::pass::ExtractUnsupportedTransposes, is_domain_sensitive); + REGISTER_SNIPPETS_PASS(subgraph_manager, ov::snippets::pass::SplitDimensionM, is_domain_sensitive && config.split_m_dimension, config.concurrency); + subgraph_manager.run_passes(subgraph); // Validate the body after all common optimizations ov::snippets::pass::Validate(get_pass_config()).run_on_model(body); diff --git a/src/common/snippets/src/pass/extract_constants.cpp b/src/common/snippets/src/pass/extract_constants.cpp new file mode 100644 index 00000000000000..54a2a56cd27cf5 --- /dev/null +++ b/src/common/snippets/src/pass/extract_constants.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/extract_constants.hpp" + +#include "openvino/opsets/opset1.hpp" +#include "snippets/itt.hpp" + + +bool ov::snippets::pass::ExtractConstants::run_on_subgraph(const std::shared_ptr& subgraph) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ExtractConstants"); + auto body = subgraph->body_ptr(); + + ParameterVector new_parameters; + OutputVector new_external_inputs = subgraph->input_values(); + + for (auto& op : body->get_ops()) { + auto constant = ov::as_type_ptr(op); + if (!constant || ov::shape_size(constant->get_shape()) == 1ul) + continue; + + const auto child = constant->get_output_target_inputs(0).begin()->get_node()->shared_from_this(); + if (ov::snippets::op::Subgraph::constant_input_should_be_inside_body(child)) + continue; + + auto parameter = std::make_shared(constant->get_element_type(), constant->get_shape()); + ov::replace_output_update_name(constant->output(0), parameter->output(0)); + + new_external_inputs.push_back(constant); + new_parameters.push_back(parameter); + } + + if (new_parameters.size() != 0) { + body->add_parameters(new_parameters); + body->validate_nodes_and_infer_types(); + subgraph->set_arguments(new_external_inputs); + return true; + } + + return false; +} diff --git a/src/common/snippets/src/pass/extract_unsupported_transposes.cpp b/src/common/snippets/src/pass/extract_unsupported_transposes.cpp new file mode 100644 index 00000000000000..4cc87b3810c1ae --- /dev/null +++ b/src/common/snippets/src/pass/extract_unsupported_transposes.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/extract_unsupported_transposes.hpp" + +#include "openvino/opsets/opset1.hpp" +#include "snippets/pass/mha_tokenization.hpp" +#include "snippets/itt.hpp" + + +bool ov::snippets::pass::ExtractUnsupportedTransposes::run_on_subgraph(const std::shared_ptr& subgraph) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ExtractUnsupportedTransposes"); + const auto& body = subgraph->body_ptr(); + const auto parameters = body->get_parameters(); + // [107806]: If count of Parameters isn't equal to Subgraph inputs, + // we cannot guarantee correct extraction since we don't have correct connections between body I/O and Subgraph I/O. + OPENVINO_ASSERT(parameters.size() == subgraph->input_values().size(), + "Failed to extract unsupported transposes: the count of Parameters isn't equal to Subgraph inputs"); + + bool updated = false; + for (size_t i = 0; i < parameters.size(); ++i) { + const auto& parameter = parameters[i]; + const auto& consumers = parameter->get_output_target_inputs(0); + if (consumers.size() != 1) + continue; + + const auto transpose = ov::as_type_ptr(consumers.begin()->get_node()->shared_from_this()); + if (!transpose) + continue; + + const auto& order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); + OPENVINO_ASSERT(order, "ExtractUnsupportedTransposes expects Transposes with constant order"); + + const auto order_value = order->cast_vector(); + const auto transpose_child = *(transpose->get_output_target_inputs(0).begin()); + const auto is_brgemm_case = ov::is_type(transpose_child.get_node()->shared_from_this()); + // If Transpose is supported (can be decomposed or fused into Brgemm), skip + // [116568]: It should be covered by TransposeDecomposition::is_supported or FuseTransposeBrgemm::is_supported + if ((is_brgemm_case && TokenizeMHASnippets::get_fusion_transpose_order(order_value.size()) == order_value) || + (TokenizeMHASnippets::get_decomposed_transpose_order(order_value.size()) == order_value)) + continue; + + // If the transpose isn't supported - we have to extract it from Subgraph + transpose->set_argument(0, subgraph->input_value(i)); + subgraph->set_argument(i, transpose); + transpose_child.replace_source_output(parameter); + parameter->set_partial_shape(transpose->get_output_partial_shape(0)); + updated = true; + } + + if (updated) { + subgraph->validate_and_infer_types(); + } + + return updated; +} diff --git a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp index 24a4141916e189..4492c1f7466505 100644 --- a/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp +++ b/src/common/snippets/src/pass/fuse_transpose_brgemm.cpp @@ -17,24 +17,19 @@ namespace ov { namespace snippets { namespace pass { -const std::set> FuseTransposeBrgemm::supported_cases = {{0, 2, 1, 3}}; - -bool FuseTransposeBrgemm::is_supported_transpose(const Output& transpose_port) { - const auto transpose_node = transpose_port.get_node_shared_ptr(); - // it's safe to do so because of the patterns we used. alternatively we can do it through pattern_values_map - const auto& constant = as_type_ptr(transpose_node->get_input_node_shared_ptr(1)); - // if Transpose in and out layout is not empty => something was already fused on this port - auto default_layout = std::vector(transpose_port.get_shape().size()); - std::iota(default_layout.begin(), default_layout.end(), 0);// NCHW layout by default - if (lowered::PortDescriptorUtils::get_port_descriptor_ptr(transpose_port)->get_layout() != default_layout || - lowered::PortDescriptorUtils::get_port_descriptor_ptr(transpose_node->input_value(0))->get_layout() != default_layout) +bool FuseTransposeBrgemm::is_supported_transpose(const Output& transpose_out) { + const auto transpose = ov::as_type_ptr(transpose_out.get_node_shared_ptr()); + if (!transpose) return false; - const auto& transpose_order = constant->cast_vector(); - // todo: this limitation is due to the fact that offsets are calculated in Kernel, and the only way - // to calc them non-default way is to set Parameter rt_info field. This limitation can be removed if - // the rt_info is properly propagated to the corresponding parameter - return is_type(transpose_node->get_input_node_shared_ptr(0)) && - supported_cases.count(transpose_order) != 0; + const auto order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); + if (!order) + return false; + return is_supported_transpose_order(order->cast_vector()); +} + +bool FuseTransposeBrgemm::is_supported_transpose_order(const std::vector& order) { + const auto size = order.size(); + return order.size() > 0 && order.back() == (static_cast(size) - 1); } FuseTransposeBrgemm::FuseTransposeBrgemm() { @@ -51,7 +46,7 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() { // Pattern 2: Transpose on output of MatMul auto brgemm_out = ov::pass::pattern::wrap_type({ov::pass::pattern::any_input(), ov::pass::pattern::any_input()}); - auto transpose2 = ov::pass::pattern::wrap_type({brgemm_out, constant}); + auto transpose2 = ov::pass::pattern::wrap_type({brgemm_out, constant}, is_supported_transpose); auto brgemm_or_transpose = std::make_shared(OutputVector{brgemm_in0, brgemm_in1, transpose2}); diff --git a/src/common/snippets/src/pass_manager.cpp b/src/common/snippets/src/pass/manager.cpp similarity index 97% rename from src/common/snippets/src/pass_manager.cpp rename to src/common/snippets/src/pass/manager.cpp index bc9237c1ec8ab1..af59a99e348e5e 100644 --- a/src/common/snippets/src/pass_manager.cpp +++ b/src/common/snippets/src/pass/manager.cpp @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/pass_manager.hpp" +#include "snippets/pass/manager.hpp" + namespace ov { namespace snippets { @@ -77,5 +78,5 @@ std::shared_ptr Manager::insert_pass_instance(const PassPosit } } // namespace pass -}// namespace snippets -}// namespace ov +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/src/pass/mha_tokenization.cpp b/src/common/snippets/src/pass/mha_tokenization.cpp index e9f939e8d72d75..67957c286a9e66 100644 --- a/src/common/snippets/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/src/pass/mha_tokenization.cpp @@ -18,11 +18,11 @@ namespace { -auto is_supported_tensor(const ov::descriptor::Tensor& t) -> bool { +bool is_supported_tensor(const ov::descriptor::Tensor& t) { return t.get_partial_shape().is_static() && ov::snippets::utils::one_of(t.get_shape().size(), 3lu, 4lu); } -auto is_supported_intermediate_op(const std::shared_ptr& node) -> bool { +bool is_supported_intermediate_op(const std::shared_ptr& node) { const auto is_intermediate_op = [](const std::shared_ptr& node) { return ov::is_type(node) || ov::is_type(node) || @@ -32,22 +32,23 @@ auto is_supported_intermediate_op(const std::shared_ptr& node) -> bool return is_intermediate_op(node) && ov::snippets::pass::TokenizeSnippets::AppropriateForSubgraph(node); } -auto is_valid_transpose(const std::shared_ptr& node, std::vector expected_order) -> bool { - auto valid_transpose_order = [expected_order](const std::shared_ptr& node) -> bool { +bool is_valid_transpose(const std::shared_ptr& node, const std::set& supported_ranks, std::vector expected_order) { + auto is_valid_transpose_order = [expected_order, supported_ranks](const std::shared_ptr& node) -> bool { const auto transpose_pattern = ov::as_type_ptr(node); if (!transpose_pattern) return false; - return transpose_pattern->cast_vector() == expected_order; + const auto existing_order = transpose_pattern->cast_vector(); + return existing_order == expected_order && supported_ranks.count(existing_order.size()) != 0; }; auto is_supported_transpose_tensor = [](const ov::descriptor::Tensor& t) { return is_supported_tensor(t) && ov::snippets::pass::TokenizeSnippets::get_supported_element_types().count(t.get_element_type()) != 0; }; - return node && node->get_output_target_inputs(0).size() == 1 && node->get_shape().size() == 4 && - valid_transpose_order(node->get_input_node_shared_ptr(1)) && is_supported_transpose_tensor(node->get_input_tensor(0)); + return node && node->get_output_target_inputs(0).size() == 1 && is_valid_transpose_order(node->get_input_node_shared_ptr(1)) && + is_supported_transpose_tensor(node->get_input_tensor(0)); } -auto tokenize_broadcast(const std::shared_ptr& interm_op, ov::NodeVector& ordered_ops) -> void { +void tokenize_broadcast(const std::shared_ptr& interm_op, ov::NodeVector& ordered_ops) { // We can tokenize Broadcast op only when output shape of child doesn't depend on Broadcast shape without last dimension. // Snippets remove Broadcast op and insert BroadcastMove if last dimensions before and after Broadcast are different. // Otherwise, we can lose original shape. @@ -95,9 +96,7 @@ auto tokenize_broadcast(const std::shared_ptr& interm_op, ov::NodeVect } } -auto tokenize_reshape_around_softmax(std::shared_ptr& interm_op, - std::shared_ptr& reshape, - ov::NodeVector& ordered_ops) -> bool { +bool tokenize_reshape_around_softmax(std::shared_ptr& interm_op, std::shared_ptr& reshape, ov::NodeVector& ordered_ops) { reshape = ov::as_type_ptr(interm_op); if (reshape) { const auto in_shape = reshape->get_input_shape(0); @@ -110,7 +109,7 @@ auto tokenize_reshape_around_softmax(std::shared_ptr& interm_op, return true; } -auto get_potential_body_params(const std::shared_ptr& op) -> size_t { +size_t get_potential_body_params(const std::shared_ptr& op) { size_t count = 0; for (size_t i = 1; i < op->get_input_size(); ++i) { const auto input = op->input_value(i); @@ -125,8 +124,8 @@ auto get_potential_body_params(const std::shared_ptr& op) -> size_t { return count; } -auto update_intermediate_supported_ops(std::shared_ptr& interm_op, ov::NodeVector& ordered_ops, - size_t& hidden_virtual_ports_count, size_t& potential_body_params_count) -> bool { +bool update_intermediate_supported_ops(std::shared_ptr& interm_op, ov::NodeVector& ordered_ops, + size_t& hidden_virtual_ports_count, size_t& potential_body_params_count) { while (is_supported_intermediate_op(interm_op)) { // All supported intermediate ops have only one output port if (interm_op->get_output_target_inputs(0).size() != 1) @@ -176,8 +175,26 @@ auto update_intermediate_supported_ops(std::shared_ptr& interm_op, ov: } return true; } + +std::vector get_rank_equivalent_order(std::vector default_order, size_t rank) { + OPENVINO_ASSERT(rank > 2, "Incorrect order rank for Transpose tokenization"); + auto order = std::vector(rank); + std::iota(order.begin(), order.end(), 0); + const auto diff = static_cast(rank - default_order.size()); + for (size_t i = 0; i < default_order.size(); ++i) { + order[diff + i] = default_order[i] + diff; + } + return order; +} } // namespace +std::vector ov::snippets::pass::TokenizeMHASnippets::get_fusion_transpose_order(size_t rank) { + return get_rank_equivalent_order({1, 0, 2}, rank); +} +std::vector ov::snippets::pass::TokenizeMHASnippets::get_decomposed_transpose_order(size_t rank) { + return get_rank_equivalent_order({1, 2, 0}, rank); +} + bool ov::snippets::pass::TokenizeMHASnippets::is_matmul0_supported(const std::shared_ptr& matmul) { if (!matmul || matmul->get_output_target_inputs(0).size() != 1 || matmul->get_transpose_a() || !is_supported_tensor(matmul->get_input_tensor(0)) || !is_supported_tensor(matmul->get_input_tensor(1))) @@ -257,6 +274,8 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken ordered_ops.push_back(matmul0); + const auto pattern_rank = matmul0->get_output_partial_shape(0).size(); + auto interm_op = matmul0->get_output_target_inputs(0).begin()->get_node()->shared_from_this(); // Add supported operations which are between MatMul0 and Softmax to ordered_ops if (!update_intermediate_supported_ops(interm_op, ordered_ops, hidden_virtual_ports_count, potential_body_params_count)) @@ -368,12 +387,12 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken } auto tokenize_transpose = [&](const std::shared_ptr& transpose, - bool is_input_transposed, std::vector order, + bool is_input_transposed, std::vector order, const ov::NodeVector::const_iterator& pos) { // If Transpose has valid order for the Transpose fusing (ExplicitTransposeMatMulInputs pass call), tokenize him. // Otherwise, skip the Transpose. if (!is_input_transposed) { - if (is_valid_transpose(transpose, order)) { + if (is_valid_transpose(transpose, config.mha_supported_transpose_ranks, order)) { ordered_ops.insert(pos, transpose); } return; @@ -383,7 +402,7 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken if (rank < 2) return; std::swap(transposed_order[rank - 1], transposed_order[rank - 2]); - if (is_valid_transpose(transpose, transposed_order)) { + if (is_valid_transpose(transpose, config.mha_supported_transpose_ranks, transposed_order)) { ordered_ops.insert(pos, transpose); } }; @@ -391,9 +410,9 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken const auto transpose1 = ov::as_type_ptr(parent); const auto transpose0 = ov::as_type_ptr(matmul0->get_input_node_shared_ptr(0)); const auto transpose2 = ov::as_type_ptr(matmul1->get_input_node_shared_ptr(1)); - tokenize_transpose(transpose1, is_transposed_b_0, {0, 2, 3, 1}, ordered_ops.begin()); - tokenize_transpose(transpose0, matmul0->get_transpose_a(), {0, 2, 1, 3}, ordered_ops.begin()); - tokenize_transpose(transpose2, matmul1->get_transpose_b(), {0, 2, 1, 3}, ordered_ops.end()); + tokenize_transpose(transpose1, is_transposed_b_0, get_decomposed_transpose_order(pattern_rank), ordered_ops.begin()); + tokenize_transpose(transpose0, matmul0->get_transpose_a(), get_fusion_transpose_order(pattern_rank), ordered_ops.begin()); + tokenize_transpose(transpose2, matmul1->get_transpose_b(), get_fusion_transpose_order(pattern_rank), ordered_ops.end()); ordered_ops.push_back(matmul1); bool are_ops_after_matmul1 = false; @@ -427,7 +446,7 @@ ov::snippets::pass::TokenizeMHASnippets::TokenizeMHASnippets(const SnippetsToken // Transpose3 if (!are_ops_after_matmul1) { auto transpose3 = config.mha_token_enable_transpose_on_output ? ov::as_type_ptr(child) : nullptr; - if (is_valid_transpose(transpose3, {0, 2, 1, 3}) && + if (is_valid_transpose(transpose3, config.mha_supported_transpose_ranks, get_fusion_transpose_order(pattern_rank)) && transpose3->get_input_element_type(0) == matmul1_out_type) { // To avoid Convert between MatMul1 and Transpose3 ordered_ops.push_back(transpose3); } diff --git a/src/common/snippets/src/pass/split_dimension_m.cpp b/src/common/snippets/src/pass/split_dimension_m.cpp new file mode 100644 index 00000000000000..671a12bffa34d2 --- /dev/null +++ b/src/common/snippets/src/pass/split_dimension_m.cpp @@ -0,0 +1,275 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/split_dimension_m.hpp" + +#include "snippets/utils.hpp" +#include "snippets/itt.hpp" + +namespace { +size_t get_dim_M(const ov::Shape& shape) { + return *(shape.rbegin() + 1); +} +bool is_prime_number(size_t value) { + if (ov::snippets::utils::one_of(value, 2lu, 3lu)) return true; + if (value == 1 || value % 2 == 0 || value % 3 == 0) return false; + const auto root = std::sqrt(value) + 1; + for (size_t divisor = 5; divisor < root; divisor += 6) { + if ((value % divisor == 0) || (value % (divisor + 2) == 0)) + return false; + } + return true; +} +} // namespace + +bool ov::snippets::pass::SplitDimensionM::is_supported_matmul(const std::shared_ptr& node) { + const auto matmul = ov::as_type_ptr(node); + return matmul && !matmul->get_transpose_a() && !matmul->is_dynamic(); +} + +std::pair ov::snippets::pass::SplitDimensionM::get_splited_dimensions(size_t batch_dim, size_t m_dim, + size_t optimal_parallelism_work_amount) { + std::pair splited = { 1, m_dim }; + + const size_t lower_bound = optimal_parallelism_work_amount / batch_dim; + if (lower_bound * batch_dim == optimal_parallelism_work_amount && m_dim % lower_bound == 0) { + splited.first = lower_bound; + splited.second = m_dim / lower_bound; + OPENVINO_ASSERT(splited.first * splited.second == m_dim, "Incorrect dimension M splitting!"); + return splited; + } + + const size_t upper_bound = utils::div_up(2 * optimal_parallelism_work_amount, batch_dim); + for (size_t divisor_0 = upper_bound - 1; divisor_0 > 1; divisor_0--) { + size_t divisor_1 = m_dim / divisor_0; + if (divisor_1 * divisor_0 == m_dim) { + splited.first = divisor_0; + splited.second = divisor_1; + break; + } + } + OPENVINO_ASSERT(splited.first * splited.second == m_dim, "Incorrect dimension M splitting!"); + return splited; +} + +bool ov::snippets::pass::SplitDimensionM::can_be_optimized(const std::shared_ptr& node, size_t concurrency) { + if (!is_supported_matmul(node)) + return false; + size_t batch_m_dim, new_m_dim; + return split(node->get_shape(), concurrency, batch_m_dim, new_m_dim); +} + +std::shared_ptr ov::snippets::pass::SplitDimensionM::get_matmul(const std::shared_ptr& subgraph) { + const auto& body = subgraph->body_ptr(); + const auto& parameters = body->get_parameters(); + // [107806]: If count of Parameters isn't equal to Subgraph inputs (it's possible case in general), + // we cannot garantee correct extraction since we don't have correct connections between body I/O and Subgraph I/O. + OPENVINO_ASSERT(parameters.size() == subgraph->input_values().size(), + "Failed to extract unsupported transposes: the count of Parameters isn't equal to Subgraph inputs"); + + // Need to find MatMul0 and check output shape + const auto& ops = body->get_ordered_ops(); + const auto mm_it = std::find_if(ops.cbegin(), ops.cend(), + [](const std::shared_ptr& node){ return ov::is_type(node); }); + if (mm_it == ops.end()) + return nullptr; + + const auto matmul0 = *mm_it; + return is_supported_matmul(matmul0) ? ov::as_type_ptr(matmul0) : nullptr; +} + +bool ov::snippets::pass::SplitDimensionM::split(const ov::Shape& shape, size_t optimal_parallelism_work_amount, size_t& batch_m_dim, size_t& new_m_dim) { + const auto batch_dim = + std::accumulate(shape.rbegin() + 2, shape.rend(), size_t(1), std::multiplies()); // B (batch) + const auto m_dim = get_dim_M(shape); // M + if (is_prime_number(m_dim)) + return false; + + auto is_optimized = [&](size_t batch_dim) { + return batch_dim >= optimal_parallelism_work_amount; + }; + + // We skip optimization if the current batch is optimal for concurrency + if (is_optimized(batch_dim)) + return false; + + std::tie(batch_m_dim, new_m_dim) = get_splited_dimensions(batch_dim, m_dim, optimal_parallelism_work_amount); + return is_optimized(batch_dim * batch_m_dim); +} + +void ov::snippets::pass::SplitDimensionM::reshape_subgraph(const std::shared_ptr& subgraph, + const ov::Shape& shape, size_t batch_m_dim, size_t new_m_dim) { + const auto& body = subgraph->body_ptr(); + const auto& parameters = body->get_parameters(); + const auto& results = body->get_results(); + const auto ops = body->get_ordered_ops(); + const auto m_dim = get_dim_M(shape); + + // There are two Parameter variants: + // - Parameter on branches for Second input of MatMul - the shape should be only unsqueezed (add just 1) + // - Other Parameters (on First input of MatMuls and between) - the shape should be splitted on M dimension + + std::set> reshaped_params; + + auto insert_reshape = [&](const std::shared_ptr& param, const ov::Shape& new_shape) { + const auto index = std::distance(parameters.begin(), std::find(parameters.begin(), parameters.end(), param)); + const auto shape_const = std::make_shared(ov::element::i32, ov::Shape{new_shape.size()}, new_shape); + const auto reshape = std::make_shared(subgraph->input_value(index), shape_const, false); + subgraph->input(index).replace_source_output(reshape); + param->set_partial_shape(new_shape); + reshaped_params.insert(param); + }; + + auto get_updated_shape = [&](const ov::Shape& shape, size_t m_index, bool split_m_dim) { + const auto current_m_dim = shape[m_index]; + OPENVINO_ASSERT(!split_m_dim || current_m_dim == 1 || current_m_dim == m_dim, "Incorrect shape for splitting!"); + ov::Shape new_shape = shape; + if ((split_m_dim && current_m_dim == 1) || !split_m_dim) { + new_shape.insert(new_shape.begin() + m_index, 1); + } else { + new_shape[m_index] = new_m_dim; + new_shape.insert(new_shape.begin() + m_index, batch_m_dim); + } + OPENVINO_ASSERT(ov::shape_size(new_shape) == ov::shape_size(shape), "Incorrect shape splitting!"); + return new_shape; + }; + + auto get_updated_order = [](const std::vector& order, int m_index) { + std::vector new_order(order.size() + 1, 0); + size_t shift_idx = 0; + for (size_t i = 0; i < order.size(); ++i) { + if (order[i] < m_index) { + new_order[i + shift_idx] = order[i]; + } else if (order[i] == m_index) { + new_order[i + shift_idx++] = order[i]; + new_order[i + shift_idx] = order[i] + 1; + } else { + new_order[i + shift_idx] = order[i] + 1; + } + } + return new_order; + }; + + auto reshape_transpose = [&](const std::shared_ptr& transpose, bool is_input) -> size_t { + const auto order_constant = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); + OPENVINO_ASSERT(order_constant != nullptr, "Transpose must have Constant order"); + const auto order = order_constant->cast_vector(); + const auto m_index = is_input ? order[order.size() - 2] : order.size() - 2; // Index of M dimension in the previous order + const auto new_order = get_updated_order(order, static_cast(m_index)); + transpose->set_argument(1, std::make_shared(order_constant->get_element_type(), ov::Shape{new_order.size()}, new_order)); + return m_index; + }; + + auto reshape_parameter = [&](const std::shared_ptr& node, bool split_m_dim = true) { + const auto param = ov::as_type_ptr(node); + if (!param || reshaped_params.count(param) > 0) + return; + + const auto shape = param->get_partial_shape().get_shape(); + const auto consumers = param->get_output_target_inputs(0); + const auto shared_consumer = consumers.begin()->get_node()->shared_from_this(); + auto m_index = shape.size() - 2; + if (ov::is_type(shared_consumer)) { + m_index = reshape_transpose(shared_consumer, true); + } + insert_reshape(param, get_updated_shape(shape, m_index, split_m_dim)); + }; + + auto update_matmul_second_branch = [&](const std::shared_ptr& node) { + auto parent = node->get_input_node_shared_ptr(1); + while (!ov::is_type(parent)) { + if (parent->get_input_size() > 1) { + for (const auto& input_source : parent->input_values()) { + reshape_parameter(input_source.get_node_shared_ptr(), false); + } + } + + // [107731]: It's covered my MHA tokenization + parent = parent->get_input_node_shared_ptr(0); + } + reshape_parameter(parent, false); + }; + + // Firstly, Unsqueeze parameters on second branches of MatMuls + for (const auto& op : ops) { + if (const auto matmul = ov::as_type_ptr(op)) { + update_matmul_second_branch(matmul); + } else if (const auto softmax_v8 = ov::as_type_ptr(op)) { + softmax_v8->set_axis(-1); + } else if (const auto softmax_v1 = ov::as_type_ptr(op)) { + softmax_v1->set_axis(softmax_v1->get_output_partial_shape(0).size()); // since new_shape.size() = old_shape.size() + 1 + } else if (const auto broadcast = ov::as_type_ptr(op)) { + // Broadcast is tokenized only between MatMuls -> Split M dimension + const auto shape_const = ov::as_type_ptr(broadcast->input_value(1).get_node_shared_ptr()); + OPENVINO_ASSERT(shape_const, "SplitDimensionM expects Broadcast with Constant output shape"); + const auto new_shape = get_updated_shape(shape_const->cast_vector(), broadcast->get_output_shape(0).size() - 2, true); + broadcast->set_argument(1, std::make_shared(shape_const->get_element_type(), ov::Shape{new_shape.size()}, new_shape)); + } + } + + // Secondly, Update All M dimensions for remaining parameters + for (const auto& param : parameters) { + if (reshaped_params.count(param) == 0) + reshape_parameter(param, true); + } + + // Update Transpose order on Result + for (const auto& res : results) { + const auto parent = res->get_input_node_shared_ptr(0); + if (ov::is_type(parent)) { + reshape_transpose(parent, false); + } + } + + // Return the previous shape on outputs + for (size_t i = 0; i < subgraph->get_output_size(); ++i) { + const auto output_shape = subgraph->get_output_shape(i); + if (is_scalar(output_shape)) + continue; + + const auto& target_inputs = subgraph->get_output_target_inputs(i); + const auto shape_const = std::make_shared(ov::element::i32, ov::Shape{output_shape.size()}, output_shape); + const auto reshape = std::make_shared(subgraph->output(i), shape_const, false); + // Save output name + const auto original_output = body->get_results()[i]->get_input_node_shared_ptr(0); + const auto original_name = original_output->get_friendly_name(); + reshape->set_friendly_name(original_name); + original_output->set_friendly_name(original_name + "_original"); + + for (const auto& input : target_inputs) { + input.replace_source_output(reshape); + // Result input tensor name was changed, the name has to be restored + if (ov::is_type(input.get_node())) { + input.get_tensor_ptr()->add_names(subgraph->output(i).get_tensor_ptr()->get_names()); + } + } + subgraph->output(i).get_tensor_ptr()->set_names({}); + } + subgraph->set_friendly_name(subgraph->get_friendly_name() + "_original"); + // Need to update inner Shapes and Softmax Axis + subgraph->validate_and_infer_types(); +} + +bool ov::snippets::pass::SplitDimensionM::run_on_subgraph(const std::shared_ptr& subgraph) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SplitDimensionM"); + // To increase parallelism work in MHA pattern, + // we split 1st dimension (starting from 0th) into 2 new dimensions to get 4D Shapes where + // - 0th and 1st dimensions are used in parallel scheduling, + // - 2nd and 3rd dimensions are used in kernel + + // It's needed only for MHA patterns. Need to add support for common patterns + if (!subgraph->has_domain_sensitive_ops()) + return false; + + if (const auto matmul0 = get_matmul(subgraph)) { + const auto mm_shape = matmul0->get_shape(); + size_t batch_m_dim, new_m_dim; + if (!split(mm_shape, m_concurrency, batch_m_dim, new_m_dim)) + return false; + + reshape_subgraph(subgraph, mm_shape, batch_m_dim, new_m_dim); + return true; + } + return false; +} diff --git a/src/common/snippets/src/pass/subgraph_manager.cpp b/src/common/snippets/src/pass/subgraph_manager.cpp new file mode 100644 index 00000000000000..860a2b15c359fd --- /dev/null +++ b/src/common/snippets/src/pass/subgraph_manager.cpp @@ -0,0 +1,21 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/subgraph_manager.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +bool CommonOptimizations::SubgraphManager::run_passes(std::shared_ptr subgraph) { + bool updated = false; + for (const auto& pass : m_pass_list) { + updated = pass->run_on_subgraph(subgraph) || updated; + } + return updated; +} + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/src/pass/transpose_decomposition.cpp b/src/common/snippets/src/pass/transpose_decomposition.cpp index bb581105a7523a..fe7cf7a702b09f 100644 --- a/src/common/snippets/src/pass/transpose_decomposition.cpp +++ b/src/common/snippets/src/pass/transpose_decomposition.cpp @@ -14,7 +14,22 @@ namespace snippets { namespace pass { using namespace lowered; -const std::set> TransposeDecomposition::supported_cases = {{0, 2, 3, 1}}; +bool TransposeDecomposition::is_supported_transpose(const Output& transpose_out) { + const auto transpose = ov::as_type_ptr(transpose_out.get_node_shared_ptr()); + if (!transpose) + return false; + const auto order = ov::as_type_ptr(transpose->get_input_node_shared_ptr(1)); + if (!order) + return false; + return is_supported_transpose_order(order->cast_vector()); +} + +bool TransposeDecomposition::is_supported_transpose_order(const std::vector& order) { + const auto size = order.size(); + if (size > 0) + return order.back() != static_cast(size - 1); + return true; +} TransposeDecomposition::TransposeDecomposition() { MATCHER_SCOPE(TransposeDecomposition); @@ -37,7 +52,7 @@ TransposeDecomposition::TransposeDecomposition() { return false; auto order_value = order->cast_vector(); - if (supported_cases.count(order_value) == 0) + if (!is_supported_transpose_order(order_value)) return false; // number of elements that can be processed on every iteration. For 0,1,2,3 -> 0,2,3,1 we can guarantee only scalar access diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index 242391b908dc03..2bd5423babb805 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -12,6 +12,37 @@ namespace ov { namespace snippets { namespace utils { +namespace { +template +void ordered_shape(const Shape& shape, const std::vector& layout, bool is_forward, Shape& reordered_shape) { + for (size_t i = 0; i < layout.size(); i++) { + OPENVINO_ASSERT(layout[i] < shape.size(), "layout index is greater than the shape size"); + const auto src_idx = is_forward ? layout[i] : i; + const auto dst_idx = is_forward ? i : layout[i]; + reordered_shape[dst_idx] = shape[src_idx]; + } +} + +// Note: +// - If `is_forward` is True, `result shape` is ordered `shape` by `layout` +// - If `is_forward` is False, `result shape` is original shape to which the `layout` was applied +ov::PartialShape get_pshape(const ov::PartialShape& shape, const std::vector& layout, bool is_forward) { + if (layout.empty()) + return shape; + ov::PartialShape reordered_shape(std::vector(layout.size())); + if (shape.rank().is_dynamic()) + OPENVINO_THROW("get_reordered_planar_shape can't be called for outputs with dynamic rank"); + const size_t rank = shape.rank().get_length(); + if (layout.size() > rank) + OPENVINO_THROW("Layout rank can't be larger than tensor rank"); + // Note that it can be smaller though, for example tensor shape can be prepended with 1 for scheduling purposes + if (std::any_of(layout.begin(), layout.end(), [=](size_t x) {return x >= rank;})) + OPENVINO_THROW("Invalid layout detected: all layout indexes must be smaller than the tensor rank"); + ordered_shape(shape, layout, is_forward, reordered_shape); + return reordered_shape; +} +} // namespace + auto get_non_scalar_constant_count_for_fq(const std::shared_ptr& fq) -> size_t { std::vector cl, ch, isc, ish, osc, osh; const bool status = ov::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(fq, cl, ch, isc, ish, osc, osh); @@ -70,23 +101,46 @@ auto get_non_scalar_constant_count_for_fq(const std::shared_ptr& layout) { - if (layout.empty()) - return shape; - std::vector reordered_shape(layout.size()); - if (shape.rank().is_dynamic()) - OPENVINO_THROW("get_reordered_planar_shape can't be called for outputs with dynamic rank"); - const size_t rank = shape.rank().get_length(); - if (layout.size() > rank) - OPENVINO_THROW("Layout rank can't be larger than tensor rank"); - // Note that it can be smaller though, for example tensor shape can be prepended with 1 for scheduling purposes - if (std::any_of(layout.begin(), layout.end(), [=](size_t x) {return x >= rank;})) - OPENVINO_THROW("Invalid layout detected: all layout indexes must be smaller than the tensor rank"); - for (size_t i = 0; i < layout.size(); i++) - reordered_shape[i] = shape[layout[i]]; +ov::PartialShape get_planar_pshape(const ov::PartialShape& shape, const std::vector& order) { + return get_pshape(shape, order, true); +} +ov::PartialShape get_preordered_pshape(const ov::PartialShape& shape, const std::vector& order) { + return get_pshape(shape, order, false); +} + +ov::PartialShape get_planar_pshape(const Input& in) { + const auto& port = snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(in); + return get_planar_pshape(ov::Shape{port->get_shape()}, port->get_layout()); +} +ov::PartialShape get_preordered_pshape(const Output& out) { + const auto& port = snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(out); + return get_preordered_pshape(ov::Shape{port->get_shape()}, port->get_layout()); +} + +VectorDims get_planar_vdims(const VectorDims& shape, const std::vector& order) { + VectorDims reordered_shape(order.size()); + ordered_shape(shape, order, true, reordered_shape); + return reordered_shape; +} +VectorDims get_preordered_vdims(const VectorDims& shape, const std::vector& order) { + VectorDims reordered_shape(order.size()); + ordered_shape(shape, order, false, reordered_shape); return reordered_shape; } +VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port) { + OPENVINO_ASSERT(expr_port.get_type() == snippets::lowered::ExpressionPort::Type::Input, "get_planar_vdims expects Expression Input port"); + return get_planar_vdims(expr_port.get_descriptor_ptr()->get_shape(), expr_port.get_descriptor_ptr()->get_layout()); +} +VectorDims get_preordered_vdims(const snippets::lowered::ExpressionPort& expr_port) { + OPENVINO_ASSERT(expr_port.get_type() == snippets::lowered::ExpressionPort::Type::Output, "get_preordered_vdims expects Expression Output port"); + return get_preordered_vdims(expr_port.get_descriptor_ptr()->get_shape(), expr_port.get_descriptor_ptr()->get_layout()); +} + +bool is_dynamic_vdims(const VectorDims& shape) { + return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return v == IShapeInferSnippets::DYNAMIC_DIMENSION; }); +} + VectorDims pshape_to_vdims(const PartialShape& pshape) { VectorDims result; result.reserve(pshape.size()); @@ -106,37 +160,6 @@ ov::PartialShape vdims_to_pshape(const VectorDims& vdims) { return result; } -ov::PartialShape get_planar_pshape(const Input& in) { - const auto& port = snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(in); - return utils::get_planar_pshape(ov::Shape{port->get_shape()}, port->get_layout()); -} - -ov::PartialShape get_planar_pshape(const Output& out) { - const auto& port = snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(out); - return utils::get_planar_pshape(ov::Shape{port->get_shape()}, port->get_layout()); -} - -VectorDims get_planar_vdims(const VectorDims& shape, const std::vector& layout) { - VectorDims reordered_shape(shape.size()); - for (size_t i = 0; i < layout.size(); i++) { - OPENVINO_ASSERT(layout[i] < shape.size(), "get_planar_vdims: layout index is greater than the shape size"); - reordered_shape[i] = shape[layout[i]]; - } - return reordered_shape; -} - -VectorDims get_planar_vdims(const snippets::lowered::PortDescriptorPtr& port_desc) { - return get_planar_vdims(port_desc->get_shape(), port_desc->get_layout()); -} - -VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port) { - return get_planar_vdims(expr_port.get_descriptor_ptr()); -} - -bool is_dynamic_vdims(const VectorDims& shape) { - return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return v == IShapeInferSnippets::DYNAMIC_DIMENSION; }); -} - } // namespace utils } // namespace snippets } // namespace ov diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp index f2c872f725b7d6..379a8f16cec4f0 100644 --- a/src/common/snippets/tests/include/lowering_utils.hpp +++ b/src/common/snippets/tests/include/lowering_utils.hpp @@ -6,7 +6,7 @@ #include #include "snippets/op/subgraph.hpp" #include "snippets_helpers.hpp" -#include "snippets/pass_manager.hpp" +#include "snippets/pass/manager.hpp" #include "snippets/shape_inference/shape_inference.hpp" namespace ov { diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp index 4fa525ba5d1f0c..49087d4ffcf675 100644 --- a/src/common/snippets/tests/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/tests/src/pass/mha_tokenization.cpp @@ -31,8 +31,7 @@ void TokenizeMHASnippetsTests::run() { disable_rt_info_check(); } -TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-114607 */, smoke_Snippets_MHA) { - GTEST_SKIP(); +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_4D) { const auto &f = MHAFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32})); model = f.getOriginal(); @@ -40,6 +39,14 @@ TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-114607 */, smoke_Snippets_MHA) { run(); } +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_3D) { + const auto &f = MHAFunction(std::vector{{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, + std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32})); + model = f.getOriginal(); + model_ref = f.getReference(); + run(); +} + TEST_F(SKIP_TokenizeMHASnippetsTests /* CVS-114607 */, smoke_Snippets_MHA_with_MatMul0_Transpose) { GTEST_SKIP(); const auto &f = MHAMatMul0TransposeFunction(std::vector{{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, @@ -80,10 +87,54 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_Transpose_fusion) { run(); } -TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_SplitM) { +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM) { + const auto& f = MHASplitMFunction(std::vector{{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, + std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), + std::vector{{2, 64, 12, 64}, {128, 12, 1, 64}, {12, 2, 64, 128}, {1, 128, 12, 64}, {128, 12, 64}}, + false); + model = f.getOriginal(); + model_ref = f.getReference(); + config.concurrency = 24; + run(); +} + +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM_withMul) { + const auto& f = MHASplitMFunction(std::vector{{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, + std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), + std::vector{{2, 64, 12, 64}, {128, 12, 1, 64}, {12, 2, 64, 128}, {1, 128, 12, 64}, {128, 12, 64}}, + true); + model = f.getOriginal(); + model_ref = f.getReference(); + config.concurrency = 16; + run(); +} + +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM) { + const auto& f = MHASplitMFunction(std::vector{{1, 384, 16, 64}, {1, 384, 16, 64}, {1, 1, 1, 384}, {1, 384, 16, 64}}, + std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), + std::vector{{1, 6, 64, 16, 64}, {1, 384, 16, 1, 64}, {1, 1, 1, 1, 384}, {1, 1, 384, 16, 64}, {1, 384, 16, 64}}, + false); + model = f.getOriginal(); + model_ref = f.getReference(); + config.concurrency = 60; + run(); +} + +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM_withMul) { + const auto& f = MHASplitMFunction(std::vector{{1, 384, 16, 64}, {1, 384, 16, 64}, {1, 1, 1, 384}, {1, 384, 16, 64}}, + std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), + std::vector{{1, 6, 64, 16, 64}, {1, 384, 16, 1, 64}, {1, 1, 1, 1, 384}, {1, 1, 384, 16, 64}, {1, 384, 16, 64}}, + true); + model = f.getOriginal(); + model_ref = f.getReference(); + config.concurrency = 60; + run(); +} + +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHAWOTranspose_SplitM) { const auto& f = MHAWOTransposeSplitMFunction(std::vector{{10, 9216, 128}, {10, 128, 9216}, {10, 9216, 128}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32}), - std::vector{{10, 9, 1024, 128}, {10, 1, 128, 9216}, {10, 1, 9216, 128}, {10, 9216, 128}}); + std::vector{{10, 3, 3072, 128}, {10, 1, 128, 9216}, {10, 1, 9216, 128}, {10, 9216, 128}}); model = f.getOriginal(); model_ref = f.getReference(); config.concurrency = 18; @@ -93,7 +144,7 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_SplitM) { TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_SplitM_AlmostAllThreads) { const auto& f = MHAWOTransposeSplitMFunction(std::vector{{5, 30, 32}, {5, 32, 30}, {5, 30, 32}}, std::vector({ov::element::f32, ov::element::f32, ov::element::f32}), - std::vector{{5, 6, 5, 32}, {5, 1, 32, 30}, {5, 1, 30, 32}, {5, 30, 32}}); + std::vector{{5, 10, 3, 32}, {5, 1, 32, 30}, {5, 1, 30, 32}, {5, 30, 32}}); model = f.getOriginal(); model_ref = f.getReference(); config.concurrency = 32; diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index 072c3f7edcf60b..6e75dc8794ec6b 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -7,6 +7,7 @@ #include #include "snippets/snippets_isa.hpp" +#include "snippets/utils.hpp" #include "snippets/lowered/expression.hpp" #include "snippets/lowered/port_connector.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" @@ -229,7 +230,7 @@ void KernelEmitter::init_data_pointers(const Xbyak::Reg64& reg_indexes, const Xb // Note that we don't need offset for the last dim, since it's handled directly by Tile emitter const size_t offset_rank = master_shape.size() - 1; std::vector> data_offsets(num_params, std::vector{}); - auto offset_calculation = [=](const std::vector& shape, const std::vector& layout, const size_t data_size) { + auto offset_calculation = [=](const std::vector& shape, const std::vector& layout, const size_t data_size, bool is_input) { // Strides represent distance between consecutive elements of corresponding dimension. // If a dim size == 1, then the next dim starts immediately and the stride is 0 // case 1: @@ -248,8 +249,11 @@ void KernelEmitter::init_data_pointers(const Xbyak::Reg64& reg_indexes, const Xb // Note: this is an extra copy, but let's keep it for clarity if (!layout.empty()) { std::vector reordered_strides(strides.size()); - for (size_t i = 0; i < layout.size(); i++) - reordered_strides[i] = strides[layout[i]]; + for (size_t i = 0; i < layout.size(); i++) { + const auto& src_idx = is_input ? layout[i] : i; + const auto& dst_idx = is_input ? i : layout[i]; + reordered_strides[dst_idx] = strides[src_idx]; + } strides = std::move(reordered_strides); } // the last stride is ignored, since the entire last dim is processed by kernel @@ -261,7 +265,7 @@ void KernelEmitter::init_data_pointers(const Xbyak::Reg64& reg_indexes, const Xb return strides; }; for (size_t i = 0; i < num_params; i++) { - data_offsets[i] = offset_calculation(io_shapes[i], io_data_layouts[i], io_data_sizes[i]); + data_offsets[i] = offset_calculation(io_shapes[i], io_data_layouts[i], io_data_sizes[i], i < num_inputs); } // master_shape size must be valid in both static and dynamic cases std::function&, Reg64)> init_ptr_with_offset; @@ -718,6 +722,33 @@ size_t BrgemmEmitter::getBrgIdx(size_t kIdx, size_t nIdx) { return kIdx * BRGEMM_N_KERNEL_NUM + nIdx; } +size_t BrgemmEmitter::get_in_leading_dim(const VectorDims& shape, const std::vector& layout) { + // Input shape is original, so we need to correctly read this data by order + // Example: + // Original shape (shape) = [1, 49, 2, 23] + // Layout (transpose order) = [2, 0, 1, 3] + // Transposed shape = [2, 1, 49, 23] + // The leading dimension is equal to stride of shape[layout[3]] = 2 x 23 + OPENVINO_ASSERT(layout.back() == layout.size() - 1 && layout.size() == shape.size(), + "BrgemmEmitter detected invalid layout values: check that this shape + layout combination is schedulable"); + const auto idx = layout[layout.size() - 2]; // `1` in example + return std::accumulate(shape.cbegin() + idx + 1, shape.end(), 1, std::multiplies()); +} +size_t BrgemmEmitter::get_out_leading_dim(const VectorDims& shape, const std::vector& layout) { + // Output shape is already transposed, we need to correctly write the data with original shape by the order + // Example: + // Original transposed shape (shape) = [49, 2, 7, 39] + // Layout (transpose order) = [2, 0, 1, 3] + // Before leading dimension with index 3 there is dimension with index 2 in planar layout. + // Since we have non-planar layout, we have to find this before LD dim in transposed order. + // In layout 2nd idx is first element, it means, that the leading dimension is equal to stride of shape[0] + OPENVINO_ASSERT(layout.back() == layout.size() - 1 && layout.size() == shape.size(), + "BrgemmEmitter detected invalid layout values: check that this shape + layout combination is schedulable"); + const auto idx = layout.size() - 2; // 2 in the example + const auto dim = std::distance(layout.cbegin(), std::find(layout.cbegin(), layout.cend(), idx)); // 0 in the example: shape[0] = 49 + return std::accumulate(shape.cbegin() + dim + 1, shape.cend(), 1, std::multiplies()); // shape[1] x shape[2] x shape[3] = 2 x 7 x 39 +} + BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_emitter(h, isa) { m_brgCtxs.fill(brgemmCtx()); std::generate(m_brgKernels.begin(), m_brgKernels.end(), [](){ return nullptr; }); @@ -730,38 +761,33 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt std::vector leading_dimensions; std::vector> io_layouts; - auto init_scheduling_params = [&](const std::vector& layout, const ov::Shape& io_shape) { - if (layout.empty()) { - // empty value indicates a planar layout - leading_dimensions.push_back(io_shape.back()); - std::vector default_layout(io_shape.size()); - std::iota(default_layout.begin(), default_layout.end(), 0); - io_layouts.push_back(default_layout); - } else { - // The idea here is to find "2" (for 4D shapes) in the layout and multiply dimensions that are to the right - // This implies that "3" is the last layout value, otherwise this layout is not supported. - // counting from the end since shape could be prepended with ones - const int64_t num_last_dims = layout.end() - std::find(layout.begin(), layout.end(), layout.size() - 2) - 1; - if (layout.back() != layout.size() - 1 || num_last_dims < 1) - IE_THROW() << "BrgemmEmitter detected invalid layout values: check that this shape + layout combination is schedulable"; - leading_dimensions.emplace_back( - std::accumulate(io_shape.end() - num_last_dims, io_shape.end(), 1, std::multiplies())); - io_layouts.push_back(layout); - } + auto get_layout = [](const std::vector& layout, const snippets::VectorDims& io_shape) { + if (!layout.empty()) return layout; + std::vector default_layout(io_shape.size()); + std::iota(default_layout.begin(), default_layout.end(), 0); + return default_layout; }; - std::vector> brgemm_inputs = {brgemm_node->input(0), - brgemm_copy ? brgemm_copy->input(0) : brgemm_node->input(1)}; - for (const auto& input : brgemm_inputs) { - init_scheduling_params(snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(input)->get_layout(), - input.get_shape()); + auto init_in_scheduling_params = [&](const snippets::lowered::PortDescriptorPtr& input) { + io_layouts.push_back(get_layout(input->get_layout(), input->get_shape())); + leading_dimensions.push_back(get_in_leading_dim(input->get_shape(), io_layouts.back())); + }; + auto init_out_scheduling_params = [&](const snippets::lowered::PortDescriptorPtr& output) { + io_layouts.push_back(get_layout(output->get_layout(), output->get_shape())); + leading_dimensions.push_back(get_out_leading_dim(output->get_shape(), io_layouts.back())); + }; + init_in_scheduling_params(expr->get_input_port_descriptor(0)); + if (brgemm_node->is_with_data_repacking()) { + io_layouts.push_back(std::vector{}); + leading_dimensions.push_back(0); + } else { + init_in_scheduling_params(expr->get_input_port_descriptor(1)); } - init_scheduling_params(snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(brgemm_node->output(0))->get_layout(), - brgemm_node->output(0).get_shape()); + init_out_scheduling_params(expr->get_output_port_descriptor(0)); - const auto& A_shape = brgemm_node->get_input_shape(0); + const auto& A_shape = expr->get_input_port_descriptor(0)->get_shape(); const auto& A_layout = io_layouts[0]; - const auto& C_shape = brgemm_node->get_output_shape(0); + const auto& C_shape = expr->get_output_port_descriptor(0)->get_shape(); const auto& C_layout = io_layouts[2]; // We need find original M,N,K having layouts and ordered shapes @@ -777,6 +803,9 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt m_M = brgemm_node->get_input_count(0); m_N = C_shape[get_ordered_idx(C_layout, C_layout.size() - 1)]; + if (brgemm_node->is_with_data_repacking()) + leading_dimensions[1] = rnd_up(m_N, brgemm_copy->get_n_block_size()); + auto brg0Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(0)); auto brg1Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(1)); m_brg0VnniFactor = 4 / brg0Prc.size(); @@ -827,7 +856,7 @@ BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt brgemmCtx.N = N(n); brgemmCtx.K = K(k); brgemmCtx.LDA = leading_dimensions[0]; - brgemmCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, brgemm_copy->get_n_block_size()) : leading_dimensions[1]; + brgemmCtx.LDB = leading_dimensions[1]; brgemmCtx.LDC = leading_dimensions[2]; brgemmCtx.dt_in0 = static_cast(DnnlExtensionUtils::IEPrecisionToDataType(brg0Prc)); brgemmCtx.dt_in1 = static_cast(DnnlExtensionUtils::IEPrecisionToDataType(brg1Prc)); @@ -1219,23 +1248,14 @@ BrgemmCopyBEmitter::BrgemmCopyBEmitter(jit_generator* h, cpu_isa_t isa, const Ex if (m_with_comp) m_comp_offset = brgemm_repack->get_offset_compensations(); - const auto& layout = snippets::lowered::PortDescriptorUtils::get_port_descriptor_ptr(brgemm_repack->input(0))->get_layout(); - const auto& original_shape = brgemm_repack->get_input_shape(0); + const auto& in_desc = expr->get_input_port_descriptor(0); + const auto& layout = in_desc->get_layout(); + const auto& original_shape = in_desc->get_shape(); auto transposed_shape = original_shape; size_t leading_dimension = *(original_shape.rbegin()); if (!layout.empty()) { - transposed_shape.resize(layout.size(), 1); - for (size_t i = 0; i < layout.size(); ++i) { - transposed_shape[i] = original_shape[layout[i]]; - } - // The idea here is to find "2" (for 4D shapes) in the layout and multiply dimensions that are to the right - // This implies that "3" is the last layout value, otherwise this layout is not supported. - // counting from the end since shape could be prepended with ones - const int64_t num_last_dims = layout.end() - std::find(layout.begin(), layout.end(), layout.size() - 2) - 1; - if (layout.back() != layout.size() - 1 || num_last_dims < 1) - IE_THROW() << "BrgemmRepackEmitter detected invalid layout values: " << - "check that this shape + layout combination is schedulable"; - leading_dimension = std::accumulate(original_shape.end() - num_last_dims, original_shape.end(), 1, std::multiplies()); + transposed_shape = snippets::utils::get_planar_vdims(original_shape, layout); + leading_dimension = BrgemmEmitter::get_in_leading_dim(original_shape, layout); } m_N = *(transposed_shape.rbegin()); diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp index 7019fb14c6ec29..40437eb9898099 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp @@ -367,6 +367,9 @@ class BrgemmEmitter : public jit_emitter { static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); size_t aux_gprs_count() const override; + static size_t get_in_leading_dim(const VectorDims& shape, const std::vector& layout); + static size_t get_out_leading_dim(const VectorDims& shape, const std::vector& layout); + private: void validate_arguments(const std::vector &in, const std::vector &out) const override; void emit_impl(const std::vector& in, diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 96be8734ec0dce..d2dd2b0eda08ce 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -495,6 +495,8 @@ static Config::SnippetsMode getSnippetsMode(const std::mapinput(1)); const auto& brgemm_out_desc = PortDescriptorUtils::get_port_descriptor_ptr(brgemm->output(0)); - const auto dimsMatMulIn0 = snippets::utils::get_planar_pshape(brgemm->input_value(0)).get_shape(); - const auto dimsMatMulIn1 = snippets::utils::get_planar_pshape(brgemm->input_value(1)).get_shape(); + const auto dimsMatMulIn0 = snippets::utils::get_planar_pshape(brgemm->input(0)).get_shape(); + const auto dimsMatMulIn1 = snippets::utils::get_planar_pshape(brgemm->input(1)).get_shape(); const auto K = *dimsMatMulIn0.rbegin(); const auto N = *dimsMatMulIn1.rbegin(); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp index df88ffa7edcd82..939998c08bd79e 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/set_brgemm_cpu_blocking_params.cpp @@ -35,8 +35,8 @@ pass::SetBrgemmCPUBlockingParams::SetBrgemmCPUBlockingParams() { return false; } - const auto dimsMatMulIn0 = snippets::utils::get_planar_pshape(brgemm->input_value(0)).get_shape(); - const auto dimsMatMulIn1 = snippets::utils::get_planar_pshape(brgemm->input_value(1)).get_shape(); + const auto dimsMatMulIn0 = snippets::utils::get_planar_pshape(brgemm->input(0)).get_shape(); + const auto dimsMatMulIn1 = snippets::utils::get_planar_pshape(brgemm->input(1)).get_shape(); const auto K = *dimsMatMulIn0.rbegin(); const auto N = *dimsMatMulIn1.rbegin(); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index d67c5047b992e0..e979270fee3318 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -112,6 +112,7 @@ #include "snippets/pass/mha_tokenization.hpp" #include "snippets/pass/collapse_subgraph.hpp" #include "snippets/pass/common_optimizations.hpp" +#include "snippets/pass/split_dimension_m.hpp" #include "snippets/pass/extract_reshapes_from_mha.hpp" // Misc @@ -612,10 +613,14 @@ void Transformations::MainSnippets(void) { // To avoid sitations when Transpose is not alone node between MatMul and Result, // Plugin disables Transpose tokenization on output tokenization_config.mha_token_enable_transpose_on_output = (inferencePrecision == ov::element::f32); - tokenization_config.concurrency = parallel_get_num_threads(); + tokenization_config.concurrency = config.streamExecutorConfig._threadsPerStream; + if (tokenization_config.concurrency == 0) + tokenization_config.concurrency = parallel_get_max_threads(); // The optimization "SplitDimensionM" depends on target machine (thread count). // To avoid uncontrolled behavior in tests, we disabled the optimization when there is Config::SnippetsMode::IgnoreCallback tokenization_config.split_m_dimension = snippetsMode != Config::SnippetsMode::IgnoreCallback; + // [122706] Some 3D MHA Patterns have perf regressions when Transpose op is tokenized + tokenization_config.mha_supported_transpose_ranks = { 4 }; ngraph::pass::Manager snippetsManager; snippetsManager.set_per_pass_validation(false); @@ -671,15 +676,10 @@ void Transformations::MainSnippets(void) { return true; }; auto is_unsupported_parallel_work_amount = [&](const std::shared_ptr& n, const ov::Shape& shape) { - const auto parallel_work_amount = std::accumulate(shape.rbegin() + 2, shape.rend(), 1, std::multiplies()); - // Heuristic values: - // parallelism work amount - not enough work amount for parallelism - // TODO: The heuristic will be removed after parallelism support on JIT level - const auto needed_num_of_threads = 12lu; + const size_t parallel_work_amount = std::accumulate(shape.rbegin() + 2, shape.rend(), 1, std::multiplies()); const auto is_unsupported_parallel_work_amount = - parallel_get_num_threads() / 2 > parallel_work_amount && - static_cast(parallel_work_amount) < needed_num_of_threads && - !ov::snippets::pass::CommonOptimizations::CanOptimizeParallelWA(n, tokenization_config.concurrency); + parallel_work_amount < tokenization_config.concurrency && + !ov::snippets::pass::SplitDimensionM::can_be_optimized(n, tokenization_config.concurrency); return is_unsupported_parallel_work_amount; }; #endif // OPENVINO_ARCH_X86_64 diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp index 8193709b479741..b05bf845538859 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp @@ -5,6 +5,7 @@ #include "snippets/mha.hpp" #include "common_test_utils/test_constants.hpp" #include "test_utils/cpu_test_utils.hpp" +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "ie_plugin_config.hpp" #include "ie_system_conf.h" @@ -15,7 +16,7 @@ namespace snippets { namespace { -const std::vector> inputShapes = { +const std::vector> inputShapes_4D = { {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}}, {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}}, @@ -23,6 +24,11 @@ const std::vector> inputShapes = { {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}}, }; +const std::vector> inputShapes_3D = { + {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, + {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, { 68, 6, 92}}, +}; + static inline bool is_bf16_supported() { return InferenceEngine::with_cpu_x86_bfloat16() || InferenceEngine::with_cpu_x86_avx512_core_amx_bf16(); } @@ -40,24 +46,74 @@ static inline std::vector> precision_bf16(size_t coun return prc; } -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA, MHA, +static std::map enable_callback() { + return std::map{ + { + InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE, + InferenceEngine::PluginConfigInternalParams::ENABLE + }, + }; +} + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D, MHA, ::testing::Combine( - ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(inputShapes_4D), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({false, true}), + ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::Values(CPUTestUtils::cpuEmptyPluginConfig)), MHA::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16, MHA, +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D, MHA, + ::testing::Combine( + ::testing::ValuesIn(inputShapes_3D), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({false, true}), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // [122706]: Subgraph + 4 Transpose + ::testing::Values(2), // decomposed Transpose + MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::cpuEmptyPluginConfig)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D_SplitDimensionM, MHA, + ::testing::Combine( + ::testing::Values(std::vector{{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}}), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(6), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D_SplitDimensionM, MHA, + ::testing::Combine( + ::testing::Values(std::vector{{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}}), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(10), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes + ::testing::Values(1), // MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D, MHA, ::testing::Combine( - ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(inputShapes_4D), ::testing::ValuesIn(precision_bf16(4)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({false, true}), + ::testing::Values(MHA::default_thread_count), ::testing::Values(7), // MHA + 5 Converts + 1 Transpose on output ::testing::Values(6), // MHA + 5 Converts on inputs and output ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -66,10 +122,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16, MHA, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, MHA, ::testing::Combine( - ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(inputShapes_4D), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::bf16), ::testing::ValuesIn({false}), + ::testing::Values(MHA::default_thread_count), ::testing::Values(7), ::testing::Values(7), ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -83,6 +140,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAMulAdd, MHAMulAdd, ::testing::ValuesIn(precision_f32(3)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({false}), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -104,6 +162,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA, MHASelect, ::testing::ValuesIn(precision_f32(6)), ::testing::Values(ov::element::f32), ::testing::Values(false), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(2), // Less + MHA ::testing::Values(2), ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -125,6 +184,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTransposeOnInputs_4D, MHAWOTranspos ::testing::Values(std::vector{}), ::testing::Values(ov::element::f32), ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -137,6 +197,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTranspose_4D, MHAWOTranspose, ::testing::ValuesIn(precision_f32(3)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -149,6 +210,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTranspose_3D, MHAWOTranspose, ::testing::ValuesIn(precision_f32(3)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -161,6 +223,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTransposeBF16_4D, MHAWOTranspose, ::testing::ValuesIn(precision_bf16(3)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -173,6 +236,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTransposeBF16_3D, MHAWOTranspose, ::testing::ValuesIn(precision_bf16(3)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -185,6 +249,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTransposeEnforceBF16_4D, MHAWOTrans ::testing::ValuesIn(precision_f32(3)), ::testing::Values(ov::element::bf16), ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -197,6 +262,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTransposeEnforceBF16_3D, MHAWOTrans ::testing::ValuesIn(precision_f32(3)), ::testing::Values(ov::element::bf16), ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(5), // MHA + 4 extra Converts on inputs and output ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -205,10 +271,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWOTransposeEnforceBF16_3D, MHAWOTrans INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAINT8MatMul, MHAINT8MatMul, ::testing::Combine( - ::testing::ValuesIn(std::vector>(inputShapes.begin(), inputShapes.begin() + 2)), + ::testing::ValuesIn(std::vector>(inputShapes_4D.begin(), inputShapes_4D.begin() + 2)), ::testing::Values(std::vector{}), ::testing::Values(ov::element::f32), ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), ::testing::Values(6), // FQx3 on inputs + MHA + Transpose on output + Deq Mul ::testing::Values(5), // FQx3 on inputs + MHA + Deq Mul ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -221,18 +288,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAQuantMatMul0, MHAQuantMatMul0, ::testing::Values(std::vector{}), ::testing::Values(ov::element::f32), ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), ::testing::Values(8), // FQ on input + MHA + Transpose on output + 4 Reshapes + Deq Mul ::testing::Values(3), // FQ on input + MHA + Deq Mul ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::Values(CPUTestUtils::cpuEmptyPluginConfig)), MHA::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFQAfterMatMul, MHAFQAfterMatMul, +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFQAfterMatMul_4D, MHAFQAfterMatMul, ::testing::Combine( - ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(inputShapes_4D), ::testing::Values(std::vector{}), ::testing::Values(ov::element::f32), ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), ::testing::Values(3), // MHA + Transpose on output + Deq Mul ::testing::Values(2), // MHA + Deq Mul ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -245,6 +314,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFQ, MHAFQ, ::testing::Values(std::vector{}), ::testing::Values(ov::element::f32), ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), ::testing::Values(7), // Transposex2 + Subgraphsx5 ::testing::Values(5), // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -261,6 +331,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHATransposedB, MHATransposedB, ::testing::Values(std::vector{}), ::testing::Values(ov::element::f32), ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(2), ::testing::Values(1), ::testing::Values(ov::test::utils::DEVICE_CPU), @@ -282,6 +353,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAWithExtractedReshape, MHAWithExtracte ::testing::Values(std::vector{}), ::testing::Values(ov::element::f32), ::testing::ValuesIn({true}), // False is not supported for graph builder in tests + ::testing::Values(MHA::default_thread_count), ::testing::Values(3), // Extracted Add + Extracted Reshape + MHA ::testing::Values(2), // Extracted Add + MHA ::testing::Values(ov::test::utils::DEVICE_CPU), diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose.cpp index 4862bdabf03419..4212102e5698e0 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose.cpp @@ -11,11 +11,24 @@ namespace snippets { namespace { -std::vector input_shapes{{2, 3, 5, 13}, {2, 3, 2, 4}, {1, 7, 1, 4}}; -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Transpose, Transpose, +std::vector input_shapes_4D{{2, 3, 5, 13}, {2, 3, 2, 4}, {1, 7, 1, 4}}; +std::vector input_shapes_3D{{3, 5, 13}, {3, 2, 4}, {7, 1, 4}}; + +std::vector> orders_4D{{0, 2, 3, 1}}; +std::vector> orders_3D{{1, 2, 0}}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Transpose_3D, Transpose, + ::testing::Combine( + ::testing::ValuesIn(input_shapes_3D), + ::testing::ValuesIn(orders_3D), + ::testing::Values(1), // Transpose + ::testing::Values(1), // Tokenized Transpose + ::testing::Values(ov::test::utils::DEVICE_CPU)), + Transpose::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Transpose_4D, Transpose, ::testing::Combine( - ::testing::ValuesIn(input_shapes), - ::testing::Values(std::vector {0, 2, 3, 1}), + ::testing::ValuesIn(input_shapes_4D), + ::testing::ValuesIn(orders_4D), ::testing::Values(1), // Transpose ::testing::Values(1), // Tokenized Transpose ::testing::Values(ov::test::utils::DEVICE_CPU)), @@ -25,7 +38,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMul, TransposeMul, ::testing::Combine( ::testing::Values(ov::PartialShape {2, 31, 3, 5}), ::testing::ValuesIn(std::vector{{2, 3, 5, 31}}), - ::testing::Values(std::vector {0, 2, 3, 1}), + ::testing::Values(std::vector {0, 2, 3, 1}), ::testing::Values(1), // Transpose ::testing::Values(1), // Tokenized Transpose ::testing::Values(ov::test::utils::DEVICE_CPU)), diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp index 3a760050d0159a..ced190761843de 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/mul_add_to_fma.cpp @@ -10,7 +10,7 @@ #include "snippets/op/scalar.hpp" #include "lowering_utils.hpp" #include "common_test_utils/common_utils.hpp" -#include "snippets/pass_manager.hpp" +#include "snippets/pass/manager.hpp" namespace ov { namespace test { diff --git a/src/tests/functional/plugin/shared/include/snippets/mha.hpp b/src/tests/functional/plugin/shared/include/snippets/mha.hpp index 1a922d215fa058..547fa323cf4b18 100644 --- a/src/tests/functional/plugin/shared/include/snippets/mha.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/mha.hpp @@ -16,6 +16,7 @@ typedef std::tuple< std::vector, // Input Element types ov::element::Type, // Inference precision bool, // With Multiply + size_t, // Thread count size_t, // Expected num nodes size_t, // Expected num subgraphs std::string, // Target Device @@ -27,13 +28,17 @@ class MHA : public testing::WithParamInterface, public: static std::string getTestCaseName(testing::TestParamInfo obj); + constexpr static size_t default_thread_count = 0; + protected: void SetUp() override; + void compile_model() override; void generate_inputs(const std::vector& targetInputStaticShapes) override; virtual std::shared_ptr get_subgraph(); bool m_with_mul = false; + size_t m_thread_count; std::vector m_input_types; }; diff --git a/src/tests/functional/plugin/shared/src/snippets/mha.cpp b/src/tests/functional/plugin/shared/src/snippets/mha.cpp index 3017fe55a83a44..c21a754b0ad901 100644 --- a/src/tests/functional/plugin/shared/src/snippets/mha.cpp +++ b/src/tests/functional/plugin/shared/src/snippets/mha.cpp @@ -18,10 +18,11 @@ std::string MHA::getTestCaseName(testing::TestParamInfo elem_types; ov::element::Type prc; bool withMul; + size_t thread_count; std::string targetDevice; size_t num_nodes, num_subgraphs; std::map additionalConfig; - std::tie(inputShapes, elem_types, prc, withMul, num_nodes, num_subgraphs, targetDevice, additionalConfig) = obj.param; + std::tie(inputShapes, elem_types, prc, withMul, thread_count, num_nodes, num_subgraphs, targetDevice, additionalConfig) = obj.param; std::ostringstream result; for (size_t i = 0; i < inputShapes.size(); ++i) @@ -29,6 +30,7 @@ std::string MHA::getTestCaseName(testing::TestParamInfo inputShapes; ov::element::Type prc; std::map additionalConfig; - std::tie(inputShapes, m_input_types, prc, m_with_mul, ref_num_nodes, ref_num_subgraphs, targetDevice, additionalConfig) = this->GetParam(); + std::tie(inputShapes, m_input_types, prc, m_with_mul, m_thread_count, + ref_num_nodes, ref_num_subgraphs, targetDevice, additionalConfig) = this->GetParam(); init_input_shapes(static_partial_shapes_to_test_representation(inputShapes)); const auto subgraph_model = get_subgraph(); @@ -66,6 +69,12 @@ void MHA::SetUp() { rel_threshold = 0.05f; } +void MHA::compile_model() { + if (m_thread_count != default_thread_count) + core->set_property(targetDevice, ov::inference_num_threads(m_thread_count)); + SubgraphBaseTest::compile_model(); +} + void MHA::generate_inputs(const std::vector& targetInputStaticShapes) { inputs.clear(); const auto& model_inputs = function->inputs(); diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp index 0c6521dba84e95..57f7bf30e3c860 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp @@ -56,6 +56,19 @@ class MHAFunction : public SnippetsFunctionBase { std::vector precisions; }; +class MHASplitMFunction : public MHAFunction { +public: + explicit MHASplitMFunction(const std::vector& inputShapes, const std::vector& precisions, + const std::vector& reshapes, bool with_mul = true) + : MHAFunction(inputShapes, precisions, with_mul), reshapes(reshapes) { + OPENVINO_ASSERT(reshapes.size() == 5, "Got invalid number of Reshape shapes"); + } +protected: + std::shared_ptr initReference() const override; + + std::vector reshapes; +}; + /* Graph: * Transpose1[0,2,1,3] Constant * \ / diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp index fdefcf03d9dd19..661af347dd4574 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp @@ -13,6 +13,40 @@ namespace ov { namespace test { namespace snippets { +namespace { +std::vector get_rank_equivalent_order(std::vector default_order, size_t rank) { + OPENVINO_ASSERT(rank > 2, "Incorrect rank for testing"); + auto order = std::vector(rank); + std::iota(order.begin(), order.end(), 0); + const auto diff = rank - default_order.size(); + for (size_t i = 0; i < default_order.size(); ++i) { + order[diff + i] = default_order[i] + diff; + } + return order; +} +std::vector get_fusion_order(size_t rank) { + return get_rank_equivalent_order({1, 0, 2}, rank); +} +std::vector get_decomposed_order(size_t rank) { + return get_rank_equivalent_order({1, 2, 0}, rank); +} +std::vector get_fusion_order_after_split_m(size_t rank, bool is_input) { + if (rank == 4) { + return is_input ? std::vector{2, 0, 1, 3} : std::vector{1, 2, 0, 3}; + } else if (rank == 5) { + return is_input ? std::vector{0, 3, 1, 2, 4} : std::vector{0, 2, 3, 1, 4}; + } + OPENVINO_THROW("Incorrect rank for testing"); +} +std::vector get_decomposed_order_after_split_m(size_t rank) { + if (rank == 4) { + return std::vector{1, 2, 3, 0}; + } else if (rank == 5) { + return std::vector{0, 2, 3, 4, 1}; + } + OPENVINO_THROW("Incorrect rank for testing"); +} +} // namespace std::shared_ptr MHAFunction::initOriginal() const { auto transpose0Param = std::make_shared(precisions[0], input_shapes[0]); @@ -21,48 +55,40 @@ std::shared_ptr MHAFunction::initOriginal() const { auto transpose2Param = std::make_shared(precisions[3], input_shapes[3]); ngraph::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - std::vector constantShapes; - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - constantShapes.push_back(ov::Shape({1, input_shapes[1].get_shape()[2], 1, 1})); - constantShapes.push_back(ov::Shape({2})); - constantShapes.push_back(ov::Shape({4})); - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - - auto transpose0Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[0], std::vector{0, 2, 1, 3}); - auto transpose1Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[1], std::vector{0, 2, 3, 1}); - auto transpose2Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[5], std::vector{0, 2, 1, 3}); - auto transpose3Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[6], std::vector{0, 2, 1, 3}); - - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[3], reshape0ConstData); + const auto rank = input_shapes[0].size(); + const auto fusion_order = get_fusion_order(rank); + const auto decomposed_order = get_decomposed_order(rank); - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[4], reshape1ConstData); + const auto transpose0Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, fusion_order); + const auto transpose1Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, decomposed_order); + const auto transpose2Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, fusion_order); + const auto transpose3Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, fusion_order); - float transA = false; - float transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); std::shared_ptr matmul_parent1 = transpose1; if (with_mul) { - std::vector mulConstData(ngraph::shape_size(constantShapes[2])); - auto mulConst = ngraph::builder::makeConstant(precisions[1], constantShapes[2], mulConstData, true); + ov::Shape shape(rank, 1); + shape[rank - 3] = transpose1->get_output_shape(0)[rank - 3]; + std::vector mulConstData(ngraph::shape_size(shape)); + const auto mulConst = ngraph::builder::makeConstant(precisions[1], shape, mulConstData, true); matmul_parent1 = std::make_shared(transpose1, mulConst); } - const auto matMul0 = std::make_shared(transpose0, matmul_parent1, transA, transB); + const auto matMul0 = std::make_shared(transpose0, matmul_parent1); const auto add = std::make_shared(matMul0, addParam); + + const auto interm_shape = add->get_output_shape(0); + const auto batch = std::accumulate(interm_shape.cbegin(), interm_shape.cbegin() + rank - 1, 1, std::multiplies()); + const auto reshape0ConstData = std::vector{ batch, -1 }; + const auto reshape1ConstData = interm_shape; + const auto reshape0Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{reshape0ConstData.size()}, reshape0ConstData); + const auto reshape1Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{reshape1ConstData.size()}, reshape1ConstData); + const auto reshape0 = std::make_shared(add, reshape0Const, true); const auto softMax = std::make_shared(reshape0, 1); const auto reshape1 = std::make_shared(softMax, reshape1Const, true); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); - const auto matMul1 = std::make_shared(reshape1, transpose2, transA, transB); + const auto matMul1 = std::make_shared(reshape1, transpose2); const auto transpose3 = std::make_shared(matMul1, transpose3Const); ngraph::ResultVector results{std::make_shared(transpose3)}; @@ -81,53 +107,36 @@ std::shared_ptr MHAFunction::initReference() const { auto addParam = std::make_shared(precisions[2], input_shapes[2]); auto transpose2Param = std::make_shared(precisions[3], input_shapes[3]); - std::vector constantShapes; - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - constantShapes.push_back(ov::Shape({1, input_shapes[1].get_shape()[2], 1, 1})); - constantShapes.push_back(ov::Shape({2})); - constantShapes.push_back(ov::Shape({4})); - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - constantShapes.push_back(ov::Shape({input_shapes[0].get_shape().size()})); - - auto transpose0Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[0], std::vector{0, 2, 1, 3}); - auto transpose1Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[1], std::vector{0, 2, 3, 1}); - auto transpose2Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[5], std::vector{0, 2, 1, 3}); - auto transpose3Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[6], std::vector{0, 2, 1, 3}); - ngraph::ParameterVector subgraph_params = {transpose0Param, transpose1Param, addParam, transpose2Param}; - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[3], reshape0ConstData); + const auto rank = input_shapes[0].size(); + const auto fusion_order = get_fusion_order(rank); + const auto decomposed_order = get_decomposed_order(rank); - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ngraph::builder::makeConstant(ngraph::element::i64, constantShapes[4], reshape1ConstData); + const auto transpose0Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, fusion_order); + const auto transpose1Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, decomposed_order); + const auto transpose2Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, fusion_order); + const auto transpose3Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, fusion_order); - float transA = false; - float transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); std::shared_ptr matmul_parent1 = transpose1; if (with_mul) { - std::vector mulConstData(ngraph::shape_size(constantShapes[2])); - auto mulConst = ngraph::builder::makeConstant(precisions[1], constantShapes[2], mulConstData, true); - auto mulParam = std::make_shared(precisions[1], mulConst->get_shape()); + ov::Shape shape(rank, 1); + shape[rank - 3] = transpose1->get_output_shape(0)[rank - 3]; + std::vector mulConstData(ngraph::shape_size(shape)); + const auto mulConst = ngraph::builder::makeConstant(precisions[1], shape, mulConstData, true); + const auto mulParam = std::make_shared(precisions[1], mulConst->get_shape()); matmul_parent1 = std::make_shared(transpose1, mulParam); subgraph_params = {transpose0Param, transpose1Param, mulParam, addParam, transpose2Param}; subgraph_inputs = {data0, data1, mulConst, data2, data3}; } - const auto matMul0 = std::make_shared(transpose0, matmul_parent1, transA, transB); + + const auto matMul0 = std::make_shared(transpose0, matmul_parent1); const auto add = std::make_shared(matMul0, addParam); - const auto reshape0 = std::make_shared(add, reshape0Const, true); - const auto softMax = std::make_shared(reshape0, 1); - const auto reshape1 = std::make_shared(softMax, reshape1Const, true); + const auto softMax = std::make_shared(add, rank - 1); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); - const auto matMul1 = std::make_shared(reshape1, transpose2, transA, transB); + const auto matMul1 = std::make_shared(softMax, transpose2); const auto transpose3 = std::make_shared(matMul1, transpose3Const); auto subgraph = std::make_shared(subgraph_inputs, @@ -135,6 +144,70 @@ std::shared_ptr MHAFunction::initReference() const { return std::make_shared(NodeVector{subgraph}, ngraphParams); } +std::shared_ptr MHASplitMFunction::initReference() const { + auto data0 = std::make_shared(precisions[0], input_shapes[0]); + auto data1 = std::make_shared(precisions[1], input_shapes[1]); + auto data2 = std::make_shared(precisions[2], input_shapes[2]); + auto data3 = std::make_shared(precisions[3], input_shapes[3]); + ngraph::ParameterVector ngraphParams = {data0, data1, data2, data3}; + + auto make_reshape = [](const std::shared_ptr& node, const ov::Shape& new_shape) { + auto shape_const = ngraph::builder::makeConstant(ngraph::element::i32, {new_shape.size()}, new_shape); + return std::make_shared(node, shape_const, true); + }; + + auto reshape0 = make_reshape(data0, reshapes[0]); + auto reshape1 = make_reshape(data1, reshapes[1]); + auto reshape2 = make_reshape(data2, reshapes[2]); + auto reshape3 = make_reshape(data3, reshapes[3]); + NodeVector subgraph_inputs = {reshape0, reshape1, reshape2, reshape3}; + + auto transpose0Param = std::make_shared(precisions[0], reshape0->get_shape()); + auto transpose1Param = std::make_shared(precisions[1], reshape1->get_shape()); + auto addParam = std::make_shared(precisions[2], reshape2->get_shape()); + auto transpose2Param = std::make_shared(precisions[3], reshape3->get_shape()); + ngraph::ParameterVector subgraph_params = {transpose0Param, transpose1Param, addParam, transpose2Param}; + + const auto rank = input_shapes[0].size() + 1; + + const auto transpose0Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, get_fusion_order_after_split_m(rank, true)); + const auto transpose1Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, get_decomposed_order_after_split_m(rank)); + const auto transpose2Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, get_fusion_order_after_split_m(rank, true)); + const auto transpose3Const = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{rank}, get_fusion_order_after_split_m(rank, false)); + + const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); + const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); + + std::shared_ptr matmul_parent1 = transpose1; + if (with_mul) { + ov::Shape shape(rank - 1, 1); + shape[rank - 4] = transpose1->get_output_shape(0)[rank - 4]; + ov::Shape reshape_shape = shape; + reshape_shape.insert(reshape_shape.cbegin() + rank - 3, 1); + std::vector mulConstData(ngraph::shape_size(shape)); + const auto mulConst = ngraph::builder::makeConstant(precisions[1], shape, mulConstData, true); + const auto reshape_mul = make_reshape(mulConst, reshape_shape); + const auto mulParam = std::make_shared(precisions[1], reshape_mul->get_shape()); + matmul_parent1 = std::make_shared(transpose1, mulParam); + subgraph_params = {transpose0Param, transpose1Param, mulParam, addParam, transpose2Param}; + subgraph_inputs = {reshape0, reshape1, reshape_mul, reshape2, reshape3}; + } + + const auto matMul0 = std::make_shared(transpose0, matmul_parent1); + const auto add = std::make_shared(matMul0, addParam); + const auto softMax = std::make_shared(add, rank - 1); + const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); + const auto matMul1 = std::make_shared(softMax, transpose2); + const auto transpose3 = std::make_shared(matMul1, transpose3Const); + + const auto subgraph = std::make_shared(subgraph_inputs, + std::make_shared(ov::OutputVector{transpose3}, + subgraph_params)); + + auto reshape4 = make_reshape(subgraph, reshapes[4]); + ngraph::ResultVector results{std::make_shared(reshape4)}; + return std::make_shared(results, ngraphParams, "mha"); +} std::shared_ptr MHAMatMul0TransposeFunction::initOriginal() const { auto transpose0Param = std::make_shared(precisions[0], input_shapes[0]); From 0139fffc1809a87723650ea84a4c7536a25b2b86 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Fri, 27 Oct 2023 11:27:18 +0400 Subject: [PATCH 092/275] [CONFORMANCE] Revert Real Op versions to Opsets (#20714) --- .../src/matchers/subgraph/fused_names.cpp | 5 +- .../subgraphs_dumper/tests/cache/op_cache.cpp | 8 +- .../functional_test_utils/summary/op_info.hpp | 78 ------------------- .../template/report_template.html | 2 +- .../src/summary/op_info.cpp | 12 +-- 5 files changed, 11 insertions(+), 94 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp index 17c477d8a51f70..b8c6408329ac48 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp @@ -16,13 +16,12 @@ using namespace ov::tools::subgraph_dumper; void FusedNamesExtractor::set_target_device(const std::string& _device) { auto available_devices = core->get_available_devices(); - if (_device.empty()) { + if (_device.empty() && !available_devices.empty()) { device = available_devices.front(); std::cout << "[ WARNING ][ GRAPH CACHE ] " << device << " will be used for `fused_names` extractor" << std::endl; return; - } else if (_device != "TEMPLATE" && - std::find(available_devices.begin(), + } else if (std::find(available_devices.begin(), available_devices.end(), _device) == available_devices.end()) { std::string message = "Incorrect device "; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp index 6900efd658b13f..ad141077a8d5dd 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp @@ -133,7 +133,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) { ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 3); // check input_info ASSERT_EQ(meta.get_input_info().size(), 1); - ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-0_0"); + ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-1_0"); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false); @@ -150,7 +150,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) { ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 1); // check input_info ASSERT_EQ(meta.get_input_info().size(), 1); - ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-0_0"); + ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-1_0"); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false); @@ -163,7 +163,7 @@ TEST_F(OpCacheUnitTest, serialize_op) { ASSERT_TRUE(this->serialize_op({convert_node, test_meta})); ASSERT_TRUE(ov::util::directory_exists(test_artifacts_dir)); auto serialized_model_path = ov::util::path_join({test_artifacts_dir, - "operation", "static", "Convert-0", "f16", "Convert-0_0.xml"}); + "operation", "static", "Convert-1", "f16", "Convert-1_0.xml"}); ASSERT_TRUE(ov::util::file_exists(serialized_model_path)); auto serialized_model = core->read_model(serialized_model_path); auto res = compare_functions(test_model, serialized_model, true, false, true, true, true, false); @@ -171,7 +171,7 @@ TEST_F(OpCacheUnitTest, serialize_op) { } TEST_F(OpCacheUnitTest, get_rel_serilization_dir) { - auto ref_path = ov::util::path_join({"operation", "static", "Convert-0", "f16"}); + auto ref_path = ov::util::path_join({"operation", "static", "Convert-1", "f16"}); auto original_path = this->get_rel_serilization_dir(convert_node); ASSERT_EQ(ref_path, original_path); } diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp index ef76694caf9691..3575f42ad342a8 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp @@ -11,84 +11,6 @@ namespace ov { namespace test { namespace functional { -// {{ type_info, real_version }} -const std::map not_aligned_op_version = { - // opset 1 - {ov::op::v0::Abs::get_type_info_static(), 0}, - {ov::op::v0::Acos::get_type_info_static(), 0}, - {ov::op::v0::Asin::get_type_info_static(), 0}, - {ov::op::v0::Atan::get_type_info_static(), 0}, - {ov::op::v0::BatchNormInference::get_type_info_static(), 0}, - {ov::op::v0::CTCGreedyDecoder::get_type_info_static(), 0}, - {ov::op::v0::Ceiling::get_type_info_static(), 0}, - {ov::op::v0::Clamp::get_type_info_static(), 0}, - {ov::op::v0::Concat::get_type_info_static(), 0}, - {ov::op::v0::Constant::get_type_info_static(), 0}, - {ov::op::v0::Convert::get_type_info_static(), 0}, - {ov::op::v0::Cos::get_type_info_static(), 0}, - {ov::op::v0::Cosh::get_type_info_static(), 0}, - {ov::op::v0::DepthToSpace::get_type_info_static(), 0}, - {ov::op::v0::DetectionOutput::get_type_info_static(), 0}, - {ov::op::v0::Elu::get_type_info_static(), 0}, - {ov::op::v0::Erf::get_type_info_static(), 0}, - {ov::op::v0::Exp::get_type_info_static(), 0}, - {ov::op::v0::FakeQuantize::get_type_info_static(), 0}, - {ov::op::v0::Floor::get_type_info_static(), 0}, - {ov::op::v0::GRN::get_type_info_static(), 0}, - {ov::op::v0::HardSigmoid::get_type_info_static(), 0}, - {ov::op::v0::Interpolate::get_type_info_static(), 0}, - {ov::op::v0::Log::get_type_info_static(), 0}, - {ov::op::v0::LRN::get_type_info_static(), 0}, - {ov::op::v0::LSTMCell::get_type_info_static(), 0}, - {ov::op::v0::LSTMSequence::get_type_info_static(), 0}, - {ov::op::v0::MatMul::get_type_info_static(), 0}, - {ov::op::v0::Negative::get_type_info_static(), 0}, - {ov::op::v0::NormalizeL2::get_type_info_static(), 0}, - {ov::op::v0::PRelu::get_type_info_static(), 0}, - {ov::op::v0::PSROIPooling::get_type_info_static(), 0}, - {ov::op::v0::Parameter::get_type_info_static(), 0}, - {ov::op::v0::PriorBox::get_type_info_static(), 0}, - {ov::op::v0::PriorBoxClustered::get_type_info_static(), 0}, - {ov::op::v0::Proposal::get_type_info_static(), 0}, - {ov::op::v0::Range::get_type_info_static(), 0}, - {ov::op::v0::Relu::get_type_info_static(), 0}, - {ov::op::v0::RegionYolo::get_type_info_static(), 0}, - {ov::op::v0::Result::get_type_info_static(), 0}, - {ov::op::v0::ReverseSequence::get_type_info_static(), 0}, - {ov::op::v0::RNNCell::get_type_info_static(), 0}, - {ov::op::v0::Selu::get_type_info_static(), 0}, - {ov::op::v0::ShapeOf::get_type_info_static(), 0}, - {ov::op::v0::ShuffleChannels::get_type_info_static(), 0}, - {ov::op::v0::Sign::get_type_info_static(), 0}, - {ov::op::v0::Sigmoid::get_type_info_static(), 0}, - {ov::op::v0::Sin::get_type_info_static(), 0}, - {ov::op::v0::Sinh::get_type_info_static(), 0}, - {ov::op::v0::Sqrt::get_type_info_static(), 0}, - {ov::op::v0::SpaceToDepth::get_type_info_static(), 0}, - {ov::op::v0::SquaredDifference::get_type_info_static(), 0}, - {ov::op::v0::Squeeze::get_type_info_static(), 0}, - {ov::op::v0::Tan::get_type_info_static(), 0}, - {ov::op::v0::Tanh::get_type_info_static(), 0}, - {ov::op::v0::TensorIterator::get_type_info_static(), 0}, - {ov::op::v0::Tile::get_type_info_static(), 0}, - {ov::op::v0::Unsqueeze::get_type_info_static(), 0}, - {ov::op::v0::Xor::get_type_info_static(), 0}, - // opset 2 - {ov::op::v0::MVN::get_type_info_static(), 0}, - {ov::op::v0::ReorgYolo::get_type_info_static(), 0}, - {ov::op::v0::ROIPooling::get_type_info_static(), 0}, - {ov::op::v0::Gelu::get_type_info_static(), 0}, - {ov::op::v1::BatchToSpace::get_type_info_static(), 1}, - {ov::op::v1::SpaceToBatch::get_type_info_static(), 1}, - // opset 3 - {ov::op::v0::RNNCell::get_type_info_static(), 0}, - {ov::op::v0::ShuffleChannels::get_type_info_static(), 0}, - // opset 4 - {ov::op::v3::Acosh::get_type_info_static(), 3}, - {ov::op::v3::Asinh::get_type_info_static(), 3}, - {ov::op::v3::Atanh::get_type_info_static(), 3}, -}; - // todo: reuse in summary std::string get_node_version(const std::shared_ptr& node, const std::string& postfix = ""); std::string get_node_version(const ov::NodeTypeInfo& node_type_info); diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html b/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html index e440458b2704ed..2b7ea78cc1d7f0 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/template/report_template.html @@ -97,7 +97,7 @@

Operations coverage summary: Tag: {{report_tag}} | Version: {{report_version - + {% for d in devices -%} {% endfor %} diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp index 358be2d424b177..17dd63502454a4 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp @@ -20,14 +20,10 @@ std::string get_node_version(const std::shared_ptr& node, const std::s std::string get_node_version(const ov::NodeTypeInfo& node_type_info) { std::string op_name = node_type_info.name + std::string("-"); std::string opset_version = node_type_info.get_version(); - if (not_aligned_op_version.count(node_type_info)) { - op_name += std::to_string(not_aligned_op_version.at(node_type_info)); - } else { - std::string opset_name = "opset"; - auto pos = opset_version.find(opset_name); - if (pos != std::string::npos) { - op_name += opset_version.substr(pos + opset_name.size()); - } + std::string opset_name = "opset"; + auto pos = opset_version.find(opset_name); + if (pos != std::string::npos) { + op_name += opset_version.substr(pos + opset_name.size()); } return op_name; } From 9cc1e992f44041c78730f9fa01668e04b83d4b93 Mon Sep 17 00:00:00 2001 From: Paul Youngsoo Ahn Date: Fri, 27 Oct 2023 17:17:52 +0900 Subject: [PATCH 093/275] [GPU] Fix outputs are not allocated in loop_inst (#20585) * [GPU] Fix outputs are not allocated in loop_inst * Fill empty padding when the number of output paddings is less than num_outputs * Fill empty data types when the number of output data types is less than num_outputs * Modify postprocess_output_memory to set output memory without set_output_memory function * In postprocess_output_memory, get concatenated_output_mem using input_info including output idx * Modify gpu functional tests for dynamic loop to check multiple outputs of dynamic loop * update postprocessing for condition * Fix empty dimension issue for scalar value * change code to get output paddings and output data type in primitive * allocate memory for scalar data type with zero dimension * Fix mismatch issue of input layout with shape and data types in body_network * Fix output setting in post-processing * pass bytes_count to gpu_usm params * Fix condition gpu functional test issue * Revert "allocate memory for scalar data type with zero dimension" This reverts commit 2f10f3687c78406b20d52b6e37b1be2a30b4b73f. * reinterpret one dimension memory buffer to zer dimension memor buffer to avoid zero byte memory allocation issue --- .../intel_gpu/primitives/primitive.hpp | 21 +++++++ .../intel_gpu/src/graph/arg_max_min.cpp | 2 +- src/plugins/intel_gpu/src/graph/condition.cpp | 16 +++++- .../src/graph/impls/common/condition.cpp | 8 +-- .../src/graph/include/condition_inst.h | 1 + .../intel_gpu/src/graph/include/loop_inst.h | 2 + src/plugins/intel_gpu/src/graph/loop.cpp | 52 +++++++++-------- .../intel_gpu/src/graph/primitive_inst.cpp | 4 +- .../intel_gpu/src/graph/program_node.cpp | 2 +- .../intel_gpu/src/plugin/ops/condition.cpp | 2 +- .../intel_gpu/src/plugin/ops/constant.cpp | 15 +++-- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 8 +-- .../functional/subgraph_tests/condition.cpp | 8 +-- .../tests/functional/subgraph_tests/loop.cpp | 56 ++++++++++--------- 14 files changed, 118 insertions(+), 79 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp index 72c841a7578ab4..bb97725eeac9d3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp @@ -47,6 +47,11 @@ struct input_info { return *this; } + /// @brief Compare + bool operator==(const input_info& rhs) const { + return ((pid == rhs.pid) && (idx == rhs.idx)); + } + primitive_id pid; int32_t idx; struct cmp { @@ -259,6 +264,22 @@ struct primitive { ib >> num_outputs; } + virtual padding get_output_padding(size_t idx) const { + if (idx < output_paddings.size()) { + return output_paddings[idx]; + } else { + return padding(); + } + } + + virtual optional_data_type get_output_data_type(size_t idx) const { + if (idx < output_data_types.size()) { + return output_data_types[idx]; + } else { + return optional_data_type(); + } + } + protected: virtual std::vector> get_dependencies() const { return {}; } class condition; diff --git a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp index 47303ece86cd82..75c02dc4286b92 100644 --- a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp +++ b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp @@ -105,7 +105,7 @@ std::vector arg_max_min_inst::calc_output_layouts(arg_max_min_node const } for (size_t i = 0; i < desc->num_outputs; ++i) { - auto dt = desc->output_data_types[i].value_or(input_layout.data_type); + auto dt = desc->get_output_data_type(i).value_or(input_layout.data_type); layouts.push_back({output_shapes[i], dt, format::get_default_format(output_shapes[i].size())}); } return layouts; diff --git a/src/plugins/intel_gpu/src/graph/condition.cpp b/src/plugins/intel_gpu/src/graph/condition.cpp index 8da80347ea66fd..737725ebcea8f6 100644 --- a/src/plugins/intel_gpu/src/graph/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/condition.cpp @@ -240,14 +240,26 @@ void condition_inst::update_output_layout() { auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params); if (new_layouts.empty()) { auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params); - new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[0], new_layout.data_padding); + new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(0), new_layout.data_padding); _impl_params->output_layouts[0] = new_layout; } else { for (size_t i = 0; i != new_layouts.size(); ++i) { auto new_layout = new_layouts[i]; - new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[i], new_layout.data_padding); + new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(i), new_layout.data_padding); _impl_params->output_layouts[i] = new_layout; } } } + +void condition_inst::postprocess_output_memory(network::ptr executed_net, cldnn::condition::branch& branch) { + _outputs.clear(); + _outputs.resize(outputs_memory_count()); + for (auto out_mem_map : branch.output_map) { + auto out_mem_idx = out_mem_map.first; + auto inner_out_id = out_mem_map.second; + auto mem_ptr = executed_net->get_output(inner_out_id).get_memory(); + _outputs[out_mem_idx] = mem_ptr; + GPU_DEBUG_LOG << "Inner net - Outputs[" << out_mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl; + } +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp index fbcdf6b277645b..0c9b7d843beffa 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp @@ -59,13 +59,7 @@ struct condition_impl : typed_primitive_impl { instance.update_output_layout(); // Set output memory of condition_inst to inner network output memory after inner network execution - for (auto out_mem_map : branch.output_map) { - auto out_mem_idx = out_mem_map.first; - auto inner_out_id = out_mem_map.second; - auto mem_ptr = executed_net->get_output(inner_out_id).get_memory(); - instance.set_output_memory(mem_ptr, false, out_mem_idx); - GPU_DEBUG_LOG << "Inner net - Outputs[" << out_mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl; - } + instance.postprocess_output_memory(executed_net, branch); ev->set(); return ev; diff --git a/src/plugins/intel_gpu/src/graph/include/condition_inst.h b/src/plugins/intel_gpu/src/graph/include/condition_inst.h index 4df3a995e5b2dd..65b56202a9a311 100644 --- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h @@ -79,6 +79,7 @@ class typed_primitive_inst : public typed_primitive_inst_baseget_branch_false(); } void update_output_layout(); + void postprocess_output_memory(network::ptr executed_net, cldnn::condition::branch& branch); private: network::ptr _net_true; diff --git a/src/plugins/intel_gpu/src/graph/include/loop_inst.h b/src/plugins/intel_gpu/src/graph/include/loop_inst.h index 22f4489ae507b5..b41f58accc65e0 100644 --- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h @@ -289,6 +289,7 @@ class typed_primitive_inst : public typed_primitive_inst_base { ss << "* iteration_elements : " << iteration_elements << std::endl; ss << "* stride : " << stride << std::endl; ss << "* initial_offset : " << initial_offset << std::endl; + ss << "* input_info : " << concat_data_id.to_string() << std::endl; ss << "* sliced_mems :{ "; for (auto mem : sliced_mems) { ss << mem->get_layout().to_short_string() << ","; @@ -300,6 +301,7 @@ class typed_primitive_inst : public typed_primitive_inst_base { const int64_t axis; std::shared_ptr concat_data_prim; std::shared_ptr sliced_data_prim; + cldnn::input_info concat_data_id; private: mutable memory::ptr concatenated_mem; diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp index a51c2d0d85973e..f3f29862cc5be9 100644 --- a/src/plugins/intel_gpu/src/graph/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/loop.cpp @@ -306,12 +306,7 @@ void loop_inst::update_input_mapped_memory() { } void loop_inst::update_output_mapped_memory() { - if (is_dynamic()) { - if (!outputs_allocated()) { - _outputs = allocate_outputs(_impl_params.get(), true, true); - } - } - + OPENVINO_ASSERT(outputs_allocated(), "output buffer should be allocated"); for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { const auto& output_mapping = _output_primitive_maps.at(i); const primitive_id& external_id = output_mapping.external_id.pid; @@ -469,6 +464,7 @@ void loop_inst::preprocess_output_memory(const int64_t trip_count) { if (iter == concatenated_output_mem_mappings.end()) { auto memory_mapping_info = create_concat_memory_map(internal_id, output_mapping, memory, trip_count); memory_mapping_info->concat_data_prim = get_network().get_primitive(external_id.pid); + memory_mapping_info->concat_data_id = external_id; concatenated_output_mem_mappings.push_back(memory_mapping_info); GPU_DEBUG_LOG << i << ") generate concat output memory mapping: " << memory_mapping_info->to_string() << std::endl; } @@ -702,44 +698,52 @@ void loop_inst::load(BinaryInputBuffer& ib) { void loop_inst::postprocess_output_memory(bool is_dynamic) { if (is_dynamic) { + std::vector external_outputs; + external_outputs.resize(outputs_memory_count()); + for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { const auto& output_mapping = _output_primitive_maps.at(i); const auto& external_id = output_mapping.external_id; const auto& internal_id = output_mapping.internal_id; + bool output_allocated = (static_cast(external_id.idx) < _outputs.size() && _outputs[external_id.idx] != nullptr); if (output_mapping.axis < 0) { auto internalOutputPrim = get_body_network()->get_primitive(internal_id.pid); auto internal_mem = internalOutputPrim->output_memory_ptr(internal_id.idx); - if (internal_mem == nullptr) { - continue; - } - auto externalOutputPrim = _network.get_primitive(external_id.pid); - if (!externalOutputPrim->outputs_allocated()) { - externalOutputPrim->set_output_memory(internal_mem, external_id.idx); + OPENVINO_ASSERT(internal_mem != nullptr, "internal_mem should not be nullptr"); + if (!output_allocated) { + external_outputs[external_id.idx] = internal_mem; } else { - auto external_mem = externalOutputPrim->output_memory_ptr(external_id.idx); - if (external_mem->get_layout() != internal_mem->get_layout()) { - externalOutputPrim->set_output_memory(internal_mem, external_id.idx); - } else if (external_mem != internal_mem) { - external_mem->copy_from(get_network().get_stream(), *internal_mem); + auto external_mem = _outputs[external_id.idx]; + if (external_mem != internal_mem) { + if (external_mem->get_layout() != internal_mem->get_layout()) { + external_outputs[external_id.idx] = internal_mem; + } else { + external_mem->copy_from(get_network().get_stream(), *internal_mem); + external_outputs[external_id.idx] = external_mem; + } + } else { + external_outputs[external_id.idx] = external_mem; } } } else { - auto externalOutputPrim = _network.get_primitive(external_id.pid); - if (!externalOutputPrim->outputs_allocated() || shape_changed()) { + if (!output_allocated || shape_changed()) { auto concat_layout = _impl_params->get_output_layout(external_id.idx); - auto concat_mem = _network.get_engine().allocate_memory(concat_layout, 0); - externalOutputPrim->set_output_memory(concat_mem, external_id.idx); + auto concat_mem = _network.get_engine().allocate_memory(concat_layout, false); + external_outputs[external_id.idx] = concat_mem; auto iter = std::find_if(concatenated_output_mem_mappings.begin(), concatenated_output_mem_mappings.end(), [&](std::shared_ptr &concat_output){ - return concat_output->concat_data_prim->id() == external_id.pid; + return concat_output->concat_data_id == external_id; }); if (iter != concatenated_output_mem_mappings.end()) { (*iter)->update_concatenated_mem(concat_mem); } + } else { + external_outputs[external_id.idx] = _outputs[external_id.idx]; } } } + _outputs = external_outputs; } for (size_t i = 0; i < concatenated_output_mem_mappings.size(); ++i) { @@ -776,7 +780,7 @@ void loop_inst::update_output_layout() { auto new_layouts = _node->type()->calc_output_layouts(*_node, *_impl_params); if (new_layouts.empty()) { auto new_layout = _node->type()->calc_output_layout(*_node, *_impl_params); - new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[0], new_layout.data_padding); + new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(0), new_layout.data_padding); _impl_params->output_layouts[0] = new_layout; } else { if (_impl_params->output_layouts.size() < new_layouts.size()) { @@ -784,7 +788,7 @@ void loop_inst::update_output_layout() { } for (size_t i = 0; i < new_layouts.size(); ++i) { auto new_layout = new_layouts[i]; - new_layout.data_padding = padding::max(_node->get_primitive()->output_paddings[i], new_layout.data_padding); + new_layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(i), new_layout.data_padding); _impl_params->output_layouts[i] = new_layout; } } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 58ecac8e776b39..cfef12b8722323 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -354,7 +354,7 @@ void primitive_inst::update_shape() { auto update_output_layout = [&](layout& layout, size_t idx) { auto data_padding = padding::max(_impl_params->get_output_layout(idx).data_padding, layout.data_padding); - layout.data_padding = padding::max(_node->get_primitive()->output_paddings[idx], data_padding); + layout.data_padding = padding::max(_node->get_primitive()->get_output_padding(idx), data_padding); if (_impl_params->get_output_layout(idx) != layout) { GPU_DEBUG_TRACE_DETAIL << id() << ": update shape: was: " << _impl_params->get_output_layout(idx).to_short_string() << " now: " << layout.to_short_string() << std::endl; @@ -1013,7 +1013,7 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool _mem_allocated = allocate_memory; if (allocate_memory) { // In case when output is mutable_data primitive, and other users dependencies are only used for - // suychronization, The output memory of such primitive will be fused with mutable_data + // synchronization, The output memory of such primitive will be fused with mutable_data auto users = node.get_users(); auto user_count = users.size(); uint32_t mutable_data_count = 0; diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index dc9b2029ff408c..7a6967e9bcc323 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -38,7 +38,7 @@ program_node::program_node(std::shared_ptr prim, program& prog) num_outputs = prim->num_outputs; for (size_t i = 0 ; i < num_outputs; ++i) { layout output_layout = layout{ov::PartialShape{}, data_types::f32, format::bfyx}; - output_layout.data_padding = prim->output_paddings[i]; + output_layout.data_padding = prim->get_output_padding(i); output_layouts.push_back(output_layout); valid_output_layouts.push_back(false); } diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index 7d47d1127fe57d..ba238e111c70d1 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -29,7 +29,7 @@ static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ } } config.set_property(ov::intel_gpu::max_dynamic_batch(1)); - config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic())); + config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic() || p.use_new_shape_infer())); ProgramBuilder prog(internal_body, p.get_engine(), config, false, false, p.get_task_executor(), p.get_compilation_context(), true); branch.inner_program = prog.get_compiled_program(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp index b12536b10ccb9a..dddabb355ad203 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp @@ -100,12 +100,17 @@ static void create_data(ProgramBuilder& p, const ov::Shape& const_shape, const s p.primitive_ids[initialconstPrimID] = constPrimID; p.profiling_ids.push_back(initialconstPrimID); } else { - if (constLayout.count() == 0) { - // Convert zero dimension constant layout to 1 dimension to fix the issue - // that memory allocation is failed on windows when constant layout is zero dimension. - constLayout = cldnn::layout(ov::PartialShape({1}), constLayout.data_type, constLayout.format); + cldnn::memory::ptr mem = nullptr; + if (constLayout.bytes_count() > 0) { + mem = p.get_engine().allocate_memory(constLayout, false); + } else { + // In the case of empty const data with {0} shape, it has zero byte. + // To avoid zero byte memory allocation issue, reinterpret one dimension memory to zero dimension memory. + auto one_dim_layout = cldnn::layout(ov::PartialShape({1}), constLayout.data_type, constLayout.format); + auto one_dim_mem = p.get_engine().allocate_memory(one_dim_layout, false); + mem = p.get_engine().reinterpret_buffer(*one_dim_mem, constLayout); } - cldnn::memory::ptr mem = p.get_engine().allocate_memory(constLayout, false); + GPU_DEBUG_LOG << "[" << initialconstPrimID << ": constant] layout: " << constLayout.to_short_string() << ", mem_ptr(" << mem << ", " << mem->size() << " bytes)"<< std::endl; auto& stream = p.get_engine().get_service_stream(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index af93885a5d949c..0a2a971a46be78 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -238,12 +238,12 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptrset_output_type(0, prec, shape); - current_iteration_input_op->set_partial_shape(shape); - current_iteration_input_op->set_element_type(prec); + OPENVINO_ASSERT(current_iteration_input_op->get_partial_shape().is_static(), "current_iteration should be static layout"); + shape = is_dynamic? current_iteration_input_op->get_partial_shape().to_shape() : shape; + prec = current_iteration_input_op->get_element_type(); auto increment_value_id = current_iteration_input_op->get_friendly_name() + "_inc"; auto increment_value_op = std::make_shared(prec, shape, 1); diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp index a8be347616b33a..1efbbaf7c8b6ec 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/condition.cpp @@ -260,13 +260,11 @@ class InnerBodyType05 : public InnerBodyGenerator { class InnerBodyType06 : public InnerBodyGenerator { protected: std::shared_ptr generate(ov::PartialShape& input_shape, ngraph::element::Type prc) override { - auto constant = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 0), {2.0f}); - constant->set_friendly_name("body1_constant"); - // constant->get_rt_info().emplace(ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding{}); - // constant->get_rt_info().emplace("can_be_folded", false); + auto constant = ngraph::opset9::Constant::create(prc, ov::Shape(input_shape.rank().get_length(), 1), {2.0f}); + constant->set_friendly_name("body6_constant"); auto result = std::make_shared(constant); auto o_layout = result->get_layout(); - result->set_friendly_name("body1_result"); + result->set_friendly_name("body6_result"); auto body = std::make_shared( ngraph::OutputVector {result}, ngraph::ParameterVector{}, diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/loop.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/loop.cpp index 8c7de510531348..1ca60efa2ff1e6 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/loop.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/loop.cpp @@ -115,7 +115,7 @@ class DynamicShapeLoopTest : public testing::WithParamInterfaceset_friendly_name("start"); + auto start_add = cond_input_create(prc, inputShape, start_value); + start_add->set_friendly_name("start_add"); + auto start_mul = cond_input_create(prc, inputShape, 1); + start_mul->set_friendly_name("start_mul"); auto count = cond_input_create(ngraph::element::i64, scalarShape, max_iter_num, static_iter_num); count->set_friendly_name("count"); auto skip = cond_input_create(ngraph::element::boolean, scalarShape, true, static_continue_cond); skip->set_friendly_name("skip"); - // - // count skip start count skip start - // / / - // ___*___*____ __________*___*____ | idx | data | out | - // | idx in | | ex_val idx in | | 0 | 7 | 7 | - // | | / | | | / | / | | 1 | 7 | 8 | - // | add | | less add | | 2 | 8 | 10 | - // | | true | | | | | | 3 | 10 | 13 | - // | | | | | | | | ~~~~~ * * * ~~~~~ - // | out cnd | | cnd out | - // |___*____*___| |____*_____*________| - // Full loop Dynamic exit loop - // n_iter = count n_iter = ex_val - // auto b_indx = std::make_shared(ngraph::element::i64, ngraph::Shape{}); b_indx->set_friendly_name("body_index"); - auto b_data = std::make_shared(prc, inputShape); - b_data->set_friendly_name("body_data"); + auto b_data_add = std::make_shared(prc, inputShape); + b_data_add->set_friendly_name("b_data_add"); + auto b_data_mul = std::make_shared(prc, inputShape); + b_data_mul->set_friendly_name("b_data_mul"); auto b_indx_cast = std::make_shared(b_indx, prc); b_indx_cast->set_friendly_name("body_index_cast"); - auto b_add = std::make_shared(b_data, b_indx_cast); - b_add->set_friendly_name("body_addition"); + auto b_add = std::make_shared(b_data_add, b_indx_cast); + b_add->set_friendly_name("body_add"); + auto b_mul = std::make_shared(b_data_mul, b_indx_cast); + b_mul->set_friendly_name("body_mul"); std::shared_ptr b_cond; if (dynamic_exit == -1) { @@ -170,22 +162,32 @@ class DynamicShapeLoopTest : public testing::WithParamInterface( - ngraph::OutputVector {b_cond, b_add}, // TODO: check with reverse - ngraph::ParameterVector {b_indx, b_data}); // TODO: check with reverse + ngraph::OutputVector {b_cond, b_add, b_mul}, // TODO: check with reverse + ngraph::ParameterVector {b_indx, b_data_add, b_data_mul}); // TODO: check with reverse body->set_friendly_name("body_network"); auto loop = std::make_shared(count, skip); loop->set_friendly_name("loop"); loop->set_function(body); loop->set_special_body_ports({0, 0}); - loop->set_merged_input(b_data, start, b_add); - if (axis == -1) + loop->set_merged_input(b_data_add, start_add, b_add); + loop->set_merged_input(b_data_mul, start_mul, b_mul); + if (axis == -1) { loop->get_iter_value(b_add, -1); - else + loop->get_iter_value(b_mul, -1); + } else { loop->get_concatenated_slices(b_add, 0, 1, 1, -1, axis); + loop->get_concatenated_slices(b_mul, 0, 1, 1, -1, axis); + } + ngraph::ResultVector results; + for (size_t i = 0; i < loop->get_output_size(); i++) { + auto res = std::make_shared(loop->output(i)); + res->set_friendly_name("loop_output_" + std::to_string(i)); + results.push_back(res); + } function = std::make_shared( - ngraph::OutputVector {loop}, + results, params); function->set_friendly_name("outer_body_network"); } From fbcb58127ae65e5613501cc8813e699a05c1fb69 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Fri, 27 Oct 2023 10:41:54 +0200 Subject: [PATCH 094/275] fix headers (#20728) --- .../installing-openvino-macos-header.md | 2 +- .../installing-openvino-windows-header.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header.md index aa9697f1a34ec4..1cf8476ddcaed4 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-macos-header.md @@ -1,4 +1,4 @@ -# Install and OpenVINO™ Runtime for macOS {#openvino_docs_install_guides_installing_openvino_macos_header} +# Install OpenVINO™ Runtime for macOS {#openvino_docs_install_guides_installing_openvino_macos_header} @sphinxdirective diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header.md index 73b11591046804..1328f79974d387 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header.md @@ -1,4 +1,4 @@ -# Install and OpenVINO™ Runtime on Windows {#openvino_docs_install_guides_installing_openvino_windows_header} +# Install OpenVINO™ Runtime on Windows {#openvino_docs_install_guides_installing_openvino_windows_header} @sphinxdirective From 9e987a43410dfe10404ddc1e534bc2ed8e57c912 Mon Sep 17 00:00:00 2001 From: River Li Date: Fri, 27 Oct 2023 16:58:20 +0800 Subject: [PATCH 095/275] [CC] solve assert issue due to cannot create convolution_backward_data::primitive in CC selective build binaries (#20571) Deconvolution::createDescriptor will call createDescriptorInternalDefault() to create fwd_conv_pd, sometimes ref_convolution_fwd_t will be chosen to return its primitive_desc, but ref_convolution_fwd_t primitive will not be created finally, then CC will not put this primitive into convolution_impl_list in selective build stage, the final CC package will fail due to cannot create fwd_conv_pd of ref_convolution_fwd_t. --- src/plugins/intel_cpu/src/nodes/deconv.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 34cfbfa698669a..4d1824757535c4 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -937,6 +937,10 @@ void Deconvolution::prepareParams() { } else { std::tie(desc, fwd_conv_pd) = createDefaultMkldnnDeconvDesc(key.inp0->getDnnlDesc(), key.inp1->getDnnlDesc(), key.out->getDnnlDesc(), key.stride, key.dilation, key.paddingL, key.paddingR, key.attr, engine); +#if defined(SELECTIVE_BUILD_ANALYZER) + // Create dummy primitive to WA CC issue. + OPENVINO_ASSERT(dnnl::primitive(fwd_conv_pd)); +#endif } primitive_desc_iterator itpd = desc; @@ -989,6 +993,10 @@ void Deconvolution::prepareParams() { } else { std::tie(anyDeconvDesc, fwdConvPd) = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, key.stride, key.dilation, key.paddingL, key.paddingR, key.attr, engine); +#if defined(SELECTIVE_BUILD_ANALYZER) + // Create dummy primitive to WA CC issue. + OPENVINO_ASSERT(dnnl::primitive(fwd_conv_pd)); +#endif } if (anyDeconvDesc) { @@ -1083,10 +1091,10 @@ void Deconvolution::createDescriptor(const std::vector &inputDesc std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, dnnl::algorithm::convolution_direct, deconvAttrs.stride, deconvAttrs.dilation, deconvAttrs.paddingL, deconvAttrs.paddingR, *attr, getEngine()); - IE_ASSERT(fwd_conv_pd && deconv_desc && deconv_desc.get(true) != nullptr) - << "Failed to create convolution_backward_data::primitive_desc: " << "Node: ##" << getName(); - fwdConvPD.push_back(fwd_conv_pd); // oneDNN requires forward pd to exists until primitive is created - descs.push_back(deconv_desc); + if (fwd_conv_pd && deconv_desc && deconv_desc.get(true) != nullptr) { + fwdConvPD.push_back(fwd_conv_pd); // oneDNN requires forward pd to exists until primitive is created + descs.push_back(deconv_desc); + } } } From e177412615065a939f09957b636bca297107ee08 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Fri, 27 Oct 2023 11:03:16 +0200 Subject: [PATCH 096/275] [CPU] Added several ops to type_relaxed_opset (#18872) --- src/plugins/intel_cpu/src/extension.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index 1da8e866f7c4a1..a3c0e7cda7510f 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -114,6 +114,10 @@ std::map Extension::getOpSets() { NGRAPH_OP(MVN, ngraph::op::v6) NGRAPH_OP(Select, ngraph::op::v1) NGRAPH_OP(ConvolutionBackpropData, ngraph::op::v1) + NGRAPH_OP(LSTMSequence, ngraph::op::v5) + NGRAPH_OP(GRUSequence, ngraph::op::v5) + NGRAPH_OP(NonMaxSuppression, ngraph::op::v5) + NGRAPH_OP(NonMaxSuppression, ngraph::op::v9) #undef NGRAPH_OP return opset; From b1ce297bdef77cfd9e8aa05eb71225b169a11cc2 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Fri, 27 Oct 2023 10:25:01 +0100 Subject: [PATCH 097/275] [CI] [GHA] Add system information print action (#20710) * added action * test action * fixed typo * move action to test flow * fixed pipeline * changed description * add action to common pipeline * changed actions path * use bash syntax * path * fix * reordered * update * revert unused changes * update path * Revert "update path" This reverts commit bff8ac23969dc6ce83631ec4d6997ecdff677b31. * mac and win * print system info * correct pathg * use relative path * run mac * add print sysinfo step, enable triggers * use win agnostic func * rm triggers * mv sysinfo check after checkouts; rm tools versions info printing * correct desc * add sysinfo dep for fedora * mv pre-requisite installation --------- Co-authored-by: Mikhail Ryzhov --- .github/actions/system_info/action.yml | 34 +++++++++++++++++++ .github/workflows/android_arm64.yml | 7 ++++ .github/workflows/fedora.yml | 7 ++++ .github/workflows/linux.yml | 7 ++++ .../linux_conditional_compilation.yml | 7 ++++ .github/workflows/linux_riscv.yml | 7 ++++ .github/workflows/mac.yml | 7 ++++ .github/workflows/windows.yml | 7 ++++ .../windows_conditional_compilation.yml | 7 ++++ 9 files changed, 90 insertions(+) create mode 100644 .github/actions/system_info/action.yml diff --git a/.github/actions/system_info/action.yml b/.github/actions/system_info/action.yml new file mode 100644 index 00000000000000..fb4ff1a9520c95 --- /dev/null +++ b/.github/actions/system_info/action.yml @@ -0,0 +1,34 @@ +name: 'System Information' +description: 'Information about the system' +runs: + using: "composite" + steps: + - if: runner.os == 'Linux' + shell: bash + run: | + # Install pre-requisites for Fedora + if [[ -e /etc/fedora-release ]]; then + yum update -y -q && yum install -y -q procps + fi + + echo "System: ${{ runner.os }}" + echo "System Architecture: ${{ runner.arch }}" + echo "CPU Info: "; lscpu + echo "RAM Info: "; free -h --si + echo "MEMORY Info: "; df -h + + - if: runner.os == 'macOS' + shell: bash + run: | + echo "System: ${{ runner.os }}" + echo "System Architecture: ${{ runner.arch }}" + echo "CPU and RAM Info: "; system_profiler SPHardwareDataType + echo "MEMORY Info: "; df -h + + - if: runner.os == 'Windows' + shell: pwsh + run: | + echo "System: ${{ runner.os }}" + echo "System Architecture: ${{ runner.arch }}" + echo "CPU Info: "; Get-CimInstance –ClassName Win32_Processor | Select-Object -Property Name, NumberOfCores, NumberOfLogicalProcessors + echo "RAM info: $(systeminfo | Select-String 'Total Physical Memory:')" diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 34487b04903d70..deacec70e344ea 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -77,6 +77,13 @@ jobs: path: 'vcpkg' fetch-depth: '0' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + # # Dependencies # diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index 3bb6b69c76d1f1..b7ae5765971206 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -59,6 +59,13 @@ jobs: path: ${{ env.OPENVINO_REPO }} submodules: 'true' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + # # Dependencies # diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ae512a64393a19..888358d10b1182 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -81,6 +81,13 @@ jobs: submodules: 'true' ref: 'master' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + # # Dependencies # diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 74bfd4e2e203ce..7fac5d9a1bde98 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -75,6 +75,13 @@ jobs: lfs: 'true' ref: 'master' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + # # Dependencies # diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 0b181c3c8ff0ed..25528d96e151db 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -61,6 +61,13 @@ jobs: with: path: 'openvino' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + - name: Init submodules for non-Conan dependencies run: | pushd ${OPENVINO_REPO} diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index c39df9691fd78b..86bee3e82e8df5 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -74,6 +74,13 @@ jobs: repository: 'openvinotoolkit/openvino_contrib' path: 'openvino_contrib' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + # # Dependencies # diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 84e9209aecb092..4984826bfba7fd 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -62,6 +62,13 @@ jobs: path: 'openvino_contrib' ref: 'master' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + # # Dependencies # diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index d506272d0ad510..976daa18272796 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -64,6 +64,13 @@ jobs: lfs: 'true' ref: 'master' + # + # Print system info + # + + - name: System info + uses: ./openvino/.github/actions/system_info + # # Dependencies # From 4e416785020ba6402339b3955b4c740739956698 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 27 Oct 2023 13:50:51 +0400 Subject: [PATCH 098/275] [Snippets] Fixed Convert elimination in AlignElementType (#20701) --- .../snippets/src/pass/align_element_types.cpp | 30 +++++++++++++++++++ .../shared_tests_instances/snippets/mha.cpp | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/common/snippets/src/pass/align_element_types.cpp b/src/common/snippets/src/pass/align_element_types.cpp index da1ab1cb2c038f..ebf0580ae8fc1b 100644 --- a/src/common/snippets/src/pass/align_element_types.cpp +++ b/src/common/snippets/src/pass/align_element_types.cpp @@ -3,6 +3,8 @@ // #include "snippets/pass/align_element_types.hpp" + +#include "snippets/pass/propagate_precision.hpp" #include "snippets/itt.hpp" namespace ov { @@ -40,6 +42,20 @@ bool pass::AlignElementTypes::run_on_model(const std::shared_ptr& m) consumer = transpose; } + // If there is already Convert[needed_in_type->original_type] and this node has only one consumer, we can remove the Convert, + // since the sequence existing Convert[needed_in_type->original_type] -> new Convert[original_type->needed_in_type] is redundant + if (const auto existing_convert = ov::as_type_ptr(parent_output.get_node_shared_ptr())) { + const auto actual_before = existing_convert->get_input_element_type(0); + const auto actual_after = existing_convert->get_output_element_type(0); + const auto required_after = needed_out_type; + if (ov::snippets::pass::PropagatePrecision::can_be_removed(actual_before, actual_after, required_after) && + parent_output.get_target_inputs().size() == 1) { + // remove existing convert + existing_convert->output(0).replace(existing_convert->input_value(0)); + continue; + } + } + const auto convert = std::make_shared(parent_output, needed_out_type); ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); @@ -85,6 +101,20 @@ bool pass::AlignElementTypes::run_on_model(const std::shared_ptr& m) consumer_inputs = parent_output.get_target_inputs(); } + // If there is already Convert[original_type->needed_in_type] and this node is alone consumer, we can remove the Convert, + // since the sequence new Convert[needed_in_type->original_type] -> existing Convert[original_type->needed_in_type] is redundant + if (const auto existing_convert = ov::as_type_ptr(consumer_inputs.cbegin()->get_node()->shared_from_this())) { + const auto actual_before = needed_in_type; + const auto actual_after = original_type; + const auto required_after = existing_convert->get_element_type(); + if (ov::snippets::pass::PropagatePrecision::can_be_removed(actual_before, actual_after, required_after) && + consumer_inputs.size() == 1) { + // remove existing convert + existing_convert->output(0).replace(parent_output); + continue; + } + } + const auto& convert = std::make_shared(parent_output, original_type); ov::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp index b05bf845538859..0aae38f4f482bc 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp @@ -128,7 +128,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, MHA, ::testing::ValuesIn({false}), ::testing::Values(MHA::default_thread_count), ::testing::Values(7), - ::testing::Values(7), + ::testing::Values(6), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::Values(CPUTestUtils::cpuBF16PluginConfig)), MHA::getTestCaseName); From 9decbb538bcd791cbc394cd18f8754370e1b300a Mon Sep 17 00:00:00 2001 From: Nesterov Alexander Date: Fri, 27 Oct 2023 12:01:38 +0200 Subject: [PATCH 099/275] [ARM CPU] Avg Pooling, ROI Pooling fix for fp16 precision (#20658) --- src/plugins/intel_cpu/src/nodes/pooling.cpp | 6 +++--- .../intel_cpu/src/nodes/roi_pooling.cpp | 21 ++++++++++++------- .../skip_tests_config.cpp | 6 ------ 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 42aa97d062702b..5a8abd39e3aa53 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -322,7 +322,7 @@ void Pooling::getSupportedDescriptors() { // WA: LPT transformation has WA which allows average pooling has I8/U8 output precision instead of FP32, // so we explicitly set output precision as FP32 - if (outputPrecision != Precision::I8 && inputPrecision != Precision::BF16) { + if (!one_of(outputPrecision, Precision::I8, Precision::BF16, Precision::FP16)) { if (getAlgorithm() == Algorithm::PoolingMax) { // oneDNN supports only equal precisions for input and output outputPrecision = inputPrecision; @@ -330,7 +330,7 @@ void Pooling::getSupportedDescriptors() { outputPrecision = Precision::FP32; } } - if (inputPrecision == Precision::BF16) { + if (one_of(inputPrecision, Precision::BF16, Precision::FP16)) { outputPrecision = inputPrecision; } @@ -351,7 +351,7 @@ void Pooling::getSupportedDescriptors() { if (inputPrecision == Precision::I8 || inputPrecision == Precision::U8) { // We have to extend i8i8_pooling_fwd_t from oneDNN to support BF16 output data type - if (outputDataType == memory::data_type::bf16) + if (one_of(outputDataType, memory::data_type::bf16, memory::data_type::f16)) outputDataType = memory::data_type::f32; // i8 layers supports only ndhwc and nhwc layouts const auto in_candidate = std::make_shared(parentShape, inputDataType, inputRank == 3 ? diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index 091cc56f0da46b..a5d7b1c8dceba1 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -434,13 +434,6 @@ void ROIPooling::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - refParams.src_prc = getOriginalInputPrecisionAtPort(0); - - if (!mayiuse(avx512_core)) { - if (refParams.src_prc == Precision::BF16) - refParams.src_prc = Precision::FP32; - } - auto format = mayiuse(avx512_core) ? LayoutType::nCsp16c : LayoutType::nCsp8c; impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_core)) { @@ -453,6 +446,17 @@ void ROIPooling::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } + refParams.src_prc = getOriginalInputPrecisionAtPort(0); + + if (!mayiuse(avx512_core)) { + if (refParams.src_prc == Precision::BF16) + refParams.src_prc = Precision::FP32; + } + + if (impl_type != impl_desc_type::ref && refParams.src_prc == Precision::FP16) { + refParams.src_prc = Precision::FP32; + } + addSupportedPrimDesc({{format, refParams.src_prc}, {LayoutType::ncsp, refParams.src_prc}}, {{format, refParams.src_prc}}, @@ -826,7 +830,8 @@ std::shared_ptr ROIPooling::ROIPoolingExecutor:: OV_SWITCH(intel_cpu, ROIPoolingExecutorCreation, ctx, jpp.src_prc, OV_CASE(Precision::FP32, float), - OV_CASE(Precision::BF16, bfloat16_t)) + OV_CASE(Precision::BF16, bfloat16_t), + OV_CASE(Precision::FP16, float16_t)) return ctx.executor; } diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index c60fbe478af286..927c06ed13400d 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -229,15 +229,9 @@ std::vector disabledTestPatterns() { #if defined(OV_CPU_ARM_ENABLE_FP16) // Issue: 123019 - retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_CeilRounding.*modelType=f16.*)"); - retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_FloorRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)"); - retVector.emplace_back(R"(smoke_AvgPool_SameUpperPad_FloorRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)"); - retVector.emplace_back(R"(smoke_AvgPool_SameLowerPad_CeilRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)"); retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); - // Issue: 123064 - retVector.emplace_back(R"(smoke_TestsROIPooling_.*/ROIPoolingLayerTest.*modelType=f16.*)"); #endif #endif From 603d61b4884451f2319a1ad3ccadd519115ea1fc Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Fri, 27 Oct 2023 12:50:36 +0200 Subject: [PATCH 100/275] GNA plugin transformations documentation (#19232) * add transformations.md * code review fixes * Apply suggestions from code review Co-authored-by: Ryszard Jezierski * Update src/plugins/intel_gna/docs/transformations.md * Update src/plugins/intel_gna/docs/transformations.md * Apply suggestions from code review * Update src/plugins/intel_gna/docs/transformations.md Co-authored-by: Mikhail Ryzhov * Update src/plugins/intel_gna/docs/transformations.md Co-authored-by: Mikhail Ryzhov --------- Co-authored-by: Mikhail Ryzhov Co-authored-by: Ryszard Jezierski --- src/plugins/intel_gna/docs/transformations.md | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 src/plugins/intel_gna/docs/transformations.md diff --git a/src/plugins/intel_gna/docs/transformations.md b/src/plugins/intel_gna/docs/transformations.md new file mode 100644 index 00000000000000..66510780a319e2 --- /dev/null +++ b/src/plugins/intel_gna/docs/transformations.md @@ -0,0 +1,97 @@ +# GNA transformations documentation + +GNA Plugin provides implementation of multiple methods required by OpenVINO plugin API. Original model usually consists of variety of operations, i.e. Convolution, Add, Gather, LSTMSequence and so on. GNA Hardware is its own limitation and not all operations can be executed on GNA Hardware. +One of the main functionalities for GNA Plugin is conversion of source network to equivalent network which could be executed on the GNA hardware. This conversion is done in LoadNetwok method. + +## LoadNetwork + +GNAPlugin::LoadNetwork in the future should execute following stages: +- Converting input graph to fully GNA-supported graph (all in ngraph) +- Creating and connecting GNA primitives within libGNA from ngraph-based network + +These stages include: +- Obtain ngraph-based network from the CNNNetwork argument (if input is not ngraph-based, proceed to CNNNetwork passes stage) +- Pass ngraph-based network through ngraph-based transformations. +- Convert ngraph-based network to CNNNetwork-based +- Pass network through CNNNetwork-based transformations. +- Creating and connecting GNA primitives withing libGNA from CNNNetwork-bases graph +Transformations are the way of modifying input graph. Ngraph-based transformations usually are of the following types: +- inherited from ov::pass::ModelPass. They implement run_on_model method. It allows them to be a container of other transformations. For example, ngraph::pass::CommonOptimizations executes multiple transformations in it. Each of them do some basic transformations. +- inherited from ov::pass::MatcherPass. Such transformations usually have a constructor. That constructor defines a pattern with the several connected together layers and a function that modifies found group of layers. The pattern can also handle additional predicates that do any checks on the traversed nodes. It is preferable to use that predicates explicitly rather than check and return from the transform function. +GNA-specific ngraph-based transformations are placed in src/plugins/intel_gna/src/transformations. All transformations should have brief comments in their headers. That brief should describe what pattern transformation handles and what modifications do. +There is also a directory src/plugins/intel_gna/src/transformations/rt_info with auxiliary runtime attributes. That attributes could be added into node rt_info map. That attributes can be read/write in transformations which is useful in some cases. For example, transformation can proceed some node in a special way if the node has special attribute. +All new transformations should have unit tests, that are placed in src/plugins/intel_gna/tests/unit/transformations. All that unit tests are compiled in ov_gna_unit_tests binary. +CNNNetwork transformations are so-called passes. They are placed in src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp. Passes proceed network as a +``` +std::vector * pLayers +``` +It is preferrable to write new transformations as nGraph passes and avoid implementing CNNNetwork passes. All CNNNetwork related code is considered as a legacy. Existed CNNNetwork passes are ported to ngraph. + +## GNA ngraph-based layers + +OpenVino allows to work with graph nodes as ov::Node class instances. Most of them are stored in src/core/include/openvino/op directory and could be used by all plugins. GNA plugin stores own (GNA-specific) layer types. +1. src/plugins/intel_gna/legacy/include/legacy/ngraph_ops +Here there are legacy layer types. Their names ends with “IE”. These types cannot be in graph, that pass to GNA plugin. All of these types are created within GNA transformations and used in GNA graph compiler for creating libGNA primitives. There are plans to rewrite all legacy code. These legacy types should be removed after that. +2. src/plugins/intel_gna/src/ops +GNA-specific operations. For example, GNAConvolution type describes convolution layers. It differs from common OpenVino Convolution type as it handles NHWC data layout instead of NCHW. +Ngraph-based transformations +1. Transformations that are common for all OpenVino plugins (are placed outside GNA plugin directory). These transformations perform different optimizations. For example, ov::pass::ConvertDivide transforms Divide operation into the sequence of nodes with Power layer. LSTMCellDecomposition extracts LSTMCell into subgraph of mathematical operations. +2. Transformations that are specific for the GNA plugin (are placed inside GNA plugin directory) +They also include src/plugins/intel_gna/legacy/include/legacy/transformations/convert_opset1_to_legacy directory with ngraph-based legacy transformations. These transformations produce “IE” layers. After rewriting GNA legacy code these transformations should be removed. + +### "Layout transformations" +There are group of transformations that work with data layout. GNA-hardware supports MaxPool and Convolution operations in a different way in comparison to OpenVino common types. GNA supports NHWC layout, OpenVino supports NCHW layout. +There are group of transformations ReplaceGnaNHWCLayers that substitutes common types with NCHW layout to GNA-specific types with NHWC layout. It is done with wrapping GNA-types with transpose operations, that converts layout on input and output of GNA-types. Unfortunately, in most situations GNA hardware cannot execute these transpose operations. To solve this issue, there are transformations that allows to push transposes through layers from GNA-specific NHWC layers to the start and end of the graph, exchanging Transpose/Gather layer with neighbor layer. Some of them (for example, TransposeSinking group of transformations) allows to push transpose layers through multiple layer types. These transformations are common for all OpenVino and stores outside GNA plugin code. They are not able to push Transpose layer through Reshape type nodes due to mathematical reasons. +To push Transpose operation through Reshape nodes there are transformations that substitute Transpose + Reshape pattern with Reshape + Gather. Gather operation is not supported by the GNA hardware and it should also be pushed through the graph to the start and end. There are group of transformations that does it. +Transpose/Gather sinking consists of multiple transformations. Each of these transformations works with a small pattern consisting of Transpose/Gather and a node with a specific kind of layers (for example, with binary elementwise operations). Sinking transformation interchanges layers. After each sinking transformation execution Transpose/Gather layer moves through one layer in the graph. There are multiple nodes between start/end of the graph and initial Transpose/Gather layer position. Node types can repeat multiple times while sinking and are going in a arbitrary order. The same Transpose/Sinking transformation should be executed multiple times. They use register_new_node functionality. This method adds new created Transpose/Gather node at the end of the matcher pass queue to allow the same transformation be executed once again without necessity to call it implicitly once again. +TransposeSinking changes Concat and Split axis while pushing Transpose nodes through them. GNA doesn't support all possible Concat and Split axis. Some TransposeSinking transformations support callbacks. These callbacks are executed inside transformations and allow to add plugin specific checks. In these checks, GNA plugin prevents sinking transposes that would make some Split/Concats unsupported. +As Transpose and Gather layers are moved to start and end of the graph they are cut from the graph and moved to ov::intel_gna::PrePostProcessModels structure as separate models. On each network inference plugin searches in this structure model for input/output, executes this model on CPU and copy resulted data as input/output of the entire model. + TransposeSinking group of transformations doesn’t support currently StridedSlice layer. It leads to the next problem. +GNA plugin has the following Slice layer flow: +- SliceToStridedSlice transformation in CommonOptimizations converts Slice to StridedSlice +- ConvertStridedSliceToCropMatcher transformation convers StridedSlice to CropIE +- convertFunctionToICNNNetwork converts CropIE to CNNNetwork CropLayer +- GNA graph compiler converts CropLayer into affine layer +Since TransposeSInking is called after common optimizations it cannot push Transpose through the StridedSlice. If we have Slice operation in the original model we should prevent converting Slice to StridedSlice in common optimization. It is done by next steps: +- Disable execution of SliceToStridedSlice transformation +- Execute entire set of ngraph-based transformations +- Execute a set of transformations to convert Slice -> StridedSlice -> CropIE nodes +When StridedSlice layer will be supported by TransposeSInking these steps could be removed from GNA plugin pipeline. + +## CNNNetwork based passes + +After running ngraph-based transformations model is converted with function convertFunctionToICNNNetwork into CNNNetwork-based function. The next step is the model transformation with the CNNNetwork-based passes. +All the legacy CNNNetwork-based passes are stored in src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp. One of the main difference between legacy passes and ngraph transformations is that legacy passes doesn’t have pattern matching functionality. Each of the passes iterating through the graph nodes (previously sorting toplogical) searching for sought sequence of layers and modify them. +It should be mentioned that ngraph API stores constant data as input nodes with type Constant, but CNNNetwork API stores data as a BLOB in layer info. + +## Debugging + +There is an ability to dump model between transformations/passes. +To dump CNNNetwork passes use -DENABLE_INTEL_GNA_DEBUG=ON option to cmake build configuration. After plugin execution, *.dot files representing the final graph will be saved in the current working directory; *.dot files can be converted to an image with the graphviz dot executable, for example: +``` +dot -Tpng -o +``` +To dump CNNNetwork-based model in xml add +``` +#define ENABLE_V7_SERIALIZE +``` +to src/plugins/intel_gna/src/log/debug.hpp + +To dump model between ngraph-based transformations use VisualizeTree and Serialize transformations. + +### VisualizeTree + +VisualizeTree transformation allows to dump model as image. +``` +#include "openvino/pass/visualize_tree.hpp" +manager.register_pass("./dump.png"); +``` + +### Serialize + +Serialize transformation allows to dump model as xml and binary files that could be loaded in neutron web application +``` +#include "openvino/pass/serialize.hpp" +manager.register_pass("./dump.xml", "./dump.bin"); +``` +Where, manager is the ov::pass::Manager instance. From ae15f35f07383f9004e25d19614cf29ba101b40c Mon Sep 17 00:00:00 2001 From: Siddhant Chauhan Date: Fri, 27 Oct 2023 16:34:57 +0530 Subject: [PATCH 101/275] [PT FE] Add aten::is_nonzero (#20589) * Add is_nonzero operator and test * fix * Update is_nonzero.cpp * Update is_nonzero.cpp * requested changes * Update is_nonzero.cpp * Update is_nonzero.cpp --------- Co-authored-by: Maxim Vafin --- src/frontends/pytorch/src/op/is_nonzero.cpp | 34 +++++++++++++++++++ src/frontends/pytorch/src/op_table.cpp | 2 ++ .../pytorch_tests/test_is_nonzero.py | 32 +++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 src/frontends/pytorch/src/op/is_nonzero.cpp create mode 100644 tests/layer_tests/pytorch_tests/test_is_nonzero.py diff --git a/src/frontends/pytorch/src/op/is_nonzero.cpp b/src/frontends/pytorch/src/op/is_nonzero.cpp new file mode 100644 index 00000000000000..22fa00e66935f8 --- /dev/null +++ b/src/frontends/pytorch/src/op/is_nonzero.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/not_equal.hpp" +#include "pt_framework_node.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_is_nonzero(const NodeContext& context) { + num_inputs_check(context, 1, 1); + auto input = context.get_input(0); + + auto zero_tensor = context.mark_node(v0::Constant::create(element::boolean, Shape{1}, {false})); + + zero_tensor = context.mark_node(std::make_shared(zero_tensor, input)); + auto result = context.mark_node(std::make_shared(input, zero_tensor)); + + return {result}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index bd625b9643770c..de030ade50d797 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -91,6 +91,7 @@ OP_CONVERTER(translate_index_put_); OP_CONVERTER(translate_index_select); OP_CONVERTER(translate_instance_norm); OP_CONVERTER(translate_int); +OP_CONVERTER(translate_is_nonzero); OP_CONVERTER(translate_layer_norm); OP_CONVERTER(translate_len); OP_CONVERTER(translate_linalg_norm); @@ -364,6 +365,7 @@ const std::map get_supported_ops_ts() { {"aten::Int", op::translate_int}, {"aten::IntImplicit", op::translate_int}, {"aten::is_grad_enabled", op::return_false_scalar}, + {"aten::is_nonzero", op::translate_is_nonzero}, {"aten::item", op::translate_1to1_match_1_inputs}, {"aten::layer_norm", op::translate_layer_norm}, {"aten::le", op::translate_1to1_match_2_inputs_align_types}, diff --git a/tests/layer_tests/pytorch_tests/test_is_nonzero.py b/tests/layer_tests/pytorch_tests/test_is_nonzero.py new file mode 100644 index 00000000000000..0b9dbf1d410e9e --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_is_nonzero.py @@ -0,0 +1,32 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +@pytest.mark.parametrize('input_tensor', (np.array([0.]), np.array([1.5]), np.array([False]), np.array([3]))) +class TestIsNonZero(PytorchLayerTest): + + def _prepare_input(self): + input_tensor = self.input_tensor + return (input_tensor.astype(np.int64),) + + def create_model(self): + class aten_is_nonzero(torch.nn.Module): + + def forward(self, input_tensor): + return torch.is_nonzero(input_tensor) + + ref_net = None + + return aten_is_nonzero(), ref_net, "aten::is_nonzero" + + @pytest.mark.nightly + @pytest.mark.precommit + def test_is_nonzero(self, ie_device, precision, ir_version, input_tensor): + self.input_tensor = input_tensor + self._test(*self.create_model(), ie_device, precision, ir_version) From b75f2e67decbb3b0582f5bc68e649af6d4bcb0fc Mon Sep 17 00:00:00 2001 From: River Li Date: Fri, 27 Oct 2023 19:45:37 +0800 Subject: [PATCH 102/275] [CAPI] add correct return value for wait_for() (#20723) --- src/bindings/c/src/ov_infer_request.cpp | 6 +++--- src/bindings/c/tests/ov_compiled_model_test.cpp | 2 +- src/bindings/c/tests/ov_core_test.cpp | 2 +- src/bindings/c/tests/ov_infer_request_test.cpp | 8 ++++++-- src/bindings/c/tests/ov_model_test.cpp | 2 +- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/bindings/c/src/ov_infer_request.cpp b/src/bindings/c/src/ov_infer_request.cpp index 3ba853679abda6..b8f9c7dc4e335c 100644 --- a/src/bindings/c/src/ov_infer_request.cpp +++ b/src/bindings/c/src/ov_infer_request.cpp @@ -289,13 +289,13 @@ ov_status_e ov_infer_request_wait_for(ov_infer_request_t* infer_request, const i if (!infer_request) { return ov_status_e::INVALID_C_PARAM; } - + bool ret = true; try { - infer_request->object->wait_for(std::chrono::milliseconds(timeout)); + ret = infer_request->object->wait_for(std::chrono::milliseconds(timeout)); } CATCH_OV_EXCEPTIONS - return ov_status_e::OK; + return ret ? ov_status_e::OK : ov_status_e::UNEXPECTED; } ov_status_e ov_infer_request_set_callback(ov_infer_request_t* infer_request, const ov_callback_t* callback) { diff --git a/src/bindings/c/tests/ov_compiled_model_test.cpp b/src/bindings/c/tests/ov_compiled_model_test.cpp index e5904400af6d4e..b066d60533b9ad 100644 --- a/src/bindings/c/tests/ov_compiled_model_test.cpp +++ b/src/bindings/c/tests/ov_compiled_model_test.cpp @@ -16,7 +16,7 @@ class ov_compiled_model_test : public ov_capi_test_base { } }; -INSTANTIATE_TEST_SUITE_P(device_name, ov_compiled_model_test, ::testing::Values("CPU")); +INSTANTIATE_TEST_SUITE_P(ov_compiled_model, ov_compiled_model_test, ::testing::Values("CPU")); TEST_P(ov_compiled_model_test, ov_compiled_model_inputs_size) { auto device_name = GetParam(); diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index f6203da682ff97..5e116e7326d34e 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -33,7 +33,7 @@ class ov_core_test : public ov_capi_test_base { ov_capi_test_base::TearDown(); } }; -INSTANTIATE_TEST_SUITE_P(device_name, ov_core_test, ::testing::Values("CPU")); +INSTANTIATE_TEST_SUITE_P(ov_core, ov_core_test, ::testing::Values("CPU")); TEST_P(ov_core_test, ov_core_create_with_config) { std::string plugins_xml = TestDataHelpers::generate_test_xml_file(); diff --git a/src/bindings/c/tests/ov_infer_request_test.cpp b/src/bindings/c/tests/ov_infer_request_test.cpp index c2ff469f34c2cc..09a8bccf7643d4 100644 --- a/src/bindings/c/tests/ov_infer_request_test.cpp +++ b/src/bindings/c/tests/ov_infer_request_test.cpp @@ -203,8 +203,8 @@ class ov_infer_request_ppp : public ov_capi_test_base { ov_preprocess_input_model_info_t* input_model; }; -INSTANTIATE_TEST_SUITE_P(device_name, ov_infer_request_test, ::testing::Values("CPU")); -INSTANTIATE_TEST_SUITE_P(device_name, ov_infer_request_ppp, ::testing::Values("CPU")); +INSTANTIATE_TEST_SUITE_P(ov_infer_request, ov_infer_request_test, ::testing::Values("CPU")); +INSTANTIATE_TEST_SUITE_P(ov_infer_request, ov_infer_request_ppp, ::testing::Values("CPU")); TEST_P(ov_infer_request_test, set_tensor) { OV_EXPECT_OK(ov_infer_request_set_tensor(infer_request, in_tensor_name, input_tensor)); @@ -341,6 +341,10 @@ TEST_P(ov_infer_request_test, infer_async_wait_for) { } } +TEST_P(ov_infer_request_test, infer_async_wait_for_return_fail) { + OV_EXPECT_NOT_OK(ov_infer_request_wait_for(infer_request, 10)); +} + TEST_P(ov_infer_request_ppp, infer_async_ppp) { OV_EXPECT_OK(ov_infer_request_set_input_tensor_by_index(infer_request, 0, input_tensor)); diff --git a/src/bindings/c/tests/ov_model_test.cpp b/src/bindings/c/tests/ov_model_test.cpp index c4f2e743d13286..228eada97b0476 100644 --- a/src/bindings/c/tests/ov_model_test.cpp +++ b/src/bindings/c/tests/ov_model_test.cpp @@ -13,7 +13,7 @@ class ov_model_test : public ov_capi_test_base { } }; -INSTANTIATE_TEST_SUITE_P(device_name, ov_model_test, ::testing::Values("")); +INSTANTIATE_TEST_SUITE_P(ov_model, ov_model_test, ::testing::Values("")); TEST_P(ov_model_test, ov_model_const_input) { ov_core_t* core = nullptr; From 620a0fc289379aab32991d921afbf768ecdce5c1 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 27 Oct 2023 16:29:40 +0400 Subject: [PATCH 103/275] Fixed compilation with C++23 (#20724) --- src/bindings/c/src/CMakeLists.txt | 3 ++- src/common/preprocessing/tests/CMakeLists.txt | 2 +- src/core/src/type.cpp | 2 +- src/frontends/onnx/frontend/src/ops_bridge.hpp | 1 + src/inference/src/dev/make_tensor.cpp | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/bindings/c/src/CMakeLists.txt b/src/bindings/c/src/CMakeLists.txt index a6a649ada6e990..edc29574fe1a3b 100644 --- a/src/bindings/c/src/CMakeLists.txt +++ b/src/bindings/c/src/CMakeLists.txt @@ -5,7 +5,8 @@ set(TARGET_NAME openvino_c) # Suppress warnings due to catch macro with legacy exception types -ov_deprecated_no_errors() +ov_disable_deprecated_warnings() + add_definitions(-DIN_OV_COMPONENT) file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.h ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) diff --git a/src/common/preprocessing/tests/CMakeLists.txt b/src/common/preprocessing/tests/CMakeLists.txt index 9518b6f3e41195..f3a754e7673657 100644 --- a/src/common/preprocessing/tests/CMakeLists.txt +++ b/src/common/preprocessing/tests/CMakeLists.txt @@ -4,7 +4,7 @@ set(TARGET fluid_preproc_tests) -ov_deprecated_no_errors() +ov_disable_deprecated_warnings() find_package(OpenCV QUIET COMPONENTS gapi core imgproc) if(NOT OpenCV_FOUND OR NOT OpenCV_VERSION VERSION_GREATER_EQUAL 4) diff --git a/src/core/src/type.cpp b/src/core/src/type.cpp index 7d6aef2c46bff0..c75d9a7476dcfb 100644 --- a/src/core/src/type.cpp +++ b/src/core/src/type.cpp @@ -37,7 +37,7 @@ std::string DiscreteTypeInfo::get_version() const { if (version_id) { return std::string(version_id); } - return nullptr; + return {}; } DiscreteTypeInfo::operator std::string() const { diff --git a/src/frontends/onnx/frontend/src/ops_bridge.hpp b/src/frontends/onnx/frontend/src/ops_bridge.hpp index bbd6bfd129c7ec..4e2d2edb2b57ff 100644 --- a/src/frontends/onnx/frontend/src/ops_bridge.hpp +++ b/src/frontends/onnx/frontend/src/ops_bridge.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp index 2c0f33b352bcf6..3a646789f4cdcb 100644 --- a/src/inference/src/dev/make_tensor.cpp +++ b/src/inference/src/dev/make_tensor.cpp @@ -397,7 +397,7 @@ class TensorMemoryBlob : public InferenceEngine::TBlob { } void allocate() noexcept override { - if (InferenceEngine::TBlob::buffer() != tensor->data()) { + if ((void*)InferenceEngine::TBlob::buffer() != tensor->data()) { InferenceEngine::TBlob::_allocator = InferenceEngine::details::make_pre_allocator(static_cast(tensor->data()), tensor->get_byte_size()); InferenceEngine::TBlob::allocate(); From 69e1258cc5fd05dac1aaec5639454652a011e0a9 Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Fri, 27 Oct 2023 16:25:52 +0330 Subject: [PATCH 104/275] Support dynamic seq lenghts in ConvertSequenceToTensorIterator transformation (#20671) --- .../include/transformations/utils/utils.hpp | 3 +- .../convert_sequences_to_tensor_iterator.cpp | 5 +- .../src/transformations/utils/utils.cpp | 51 +++++++- .../convert_sequences_to_ti_test.cpp | 112 ++++++++++++++++++ src/plugins/intel_cpu/src/nodes/rnn.cpp | 5 +- .../src/plugin/transformations_pipeline.cpp | 4 +- 6 files changed, 169 insertions(+), 11 deletions(-) diff --git a/src/common/transformations/include/transformations/utils/utils.hpp b/src/common/transformations/include/transformations/utils/utils.hpp index 9a2036fff1b20d..1961c35ef16594 100644 --- a/src/common/transformations/include/transformations/utils/utils.hpp +++ b/src/common/transformations/include/transformations/utils/utils.hpp @@ -182,7 +182,8 @@ TRANSFORMATIONS_API bool check_for_broadcast(const PartialShape& ref_shape, cons TRANSFORMATIONS_API std::shared_ptr activation(const std::string& activation_name, const Output& apply_to); -TRANSFORMATIONS_API bool is_seq_len_provided(const std::shared_ptr& seq_len_input, int64_t max_seq_len); +TRANSFORMATIONS_API bool is_seq_len_provided(const std::shared_ptr& X, + const std::shared_ptr& seq_len_input); TRANSFORMATIONS_API std::shared_ptr try_fold_unary_output(const std::shared_ptr& node); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp b/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp index 7d7cc8049883d0..a7e7b3c1ae1880 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp @@ -88,12 +88,11 @@ bool convert_sequence_to_ti(const std::shared_ptr& sequence, const ov::Output& B, const ov::op::RecurrentSequenceDirection& direction) { auto X_pshape = X.get_partial_shape(); - if (X_pshape.size() < 2 || X_pshape[1].is_dynamic()) { + if (X_pshape.size() < 2) { return false; } - auto max_seq_len = X_pshape[1].get_length(); - bool enable_mask = ov::op::util::is_seq_len_provided(seq_lengths.get_node_shared_ptr(), max_seq_len); + bool enable_mask = ov::op::util::is_seq_len_provided(X.get_node_shared_ptr(), seq_lengths.get_node_shared_ptr()); const bool is_reverse = direction == ov::op::RecurrentSequenceDirection::REVERSE; std::shared_ptr reverse_seq_before; diff --git a/src/common/transformations/src/transformations/utils/utils.cpp b/src/common/transformations/src/transformations/utils/utils.cpp index b7cde395a66eb5..9e8a6fad92e4be 100644 --- a/src/common/transformations/src/transformations/utils/utils.cpp +++ b/src/common/transformations/src/transformations/utils/utils.cpp @@ -132,11 +132,56 @@ std::shared_ptr activation(const std::string& activation_name, const o } } -bool is_seq_len_provided(const std::shared_ptr& seq_len_input, int64_t max_seq_len) { +bool is_seq_len_provided(const std::shared_ptr& X, const std::shared_ptr& seq_len_input) { + auto max_seq_dim = X->get_output_partial_shape(0)[1]; + if (max_seq_dim.is_dynamic()) { + // if values in seq_len input are equal to max_seq_len dim in X input + // then we don't need to insert Select operations + // supported seq_len_input: + // X -> ShapeOf -> Gather (max_seq_dim) -> Optional (Broadcast) + std::shared_ptr input = seq_len_input; + auto broadcast = ov::as_type_ptr(input); + if (broadcast) { + input = seq_len_input->input_value(0).get_node_shared_ptr(); + } + + auto gather = ov::as_type_ptr(input); + bool valid_gather = false; + if (gather) { + auto indices = gather->input_value(1).get_node_shared_ptr(); + auto axis = gather->input_value(2).get_node_shared_ptr(); + auto indices_const = ov::as_type_ptr(indices); + auto axis_const = ov::as_type_ptr(axis); + if (indices_const && axis_const) { + auto ind_values = indices_const->cast_vector(); + auto axis_values = axis_const->cast_vector(); + if (ind_values.size() == 1 && ind_values[0] == 1 && axis_values.size() == 1 && axis_values[0] == 0) { + valid_gather = true; + } + } + } + + if (!valid_gather) { + return true; + } + + auto shape_of = ov::as_type_ptr(gather->input_value(0).get_node_shared_ptr()); + if (!shape_of) { + return true; + } + + if (shape_of->input_value(0).get_node_shared_ptr() != X) { + return true; + } + + return false; + } + + auto max_seq_len_val = max_seq_dim.get_length(); if (const auto& seq_len_const = std::dynamic_pointer_cast(seq_len_input)) { const auto& seq_len_values = seq_len_const->cast_vector(); - return std::any_of(seq_len_values.begin(), seq_len_values.end(), [max_seq_len](const int64_t val) { - return val != max_seq_len; + return std::any_of(seq_len_values.begin(), seq_len_values.end(), [max_seq_len_val](const int64_t val) { + return val != max_seq_len_val; }); } return true; diff --git a/src/common/transformations/tests/op_conversions/convert_sequences_to_ti_test.cpp b/src/common/transformations/tests/op_conversions/convert_sequences_to_ti_test.cpp index e140087c2dd2e8..7220157efbf781 100644 --- a/src/common/transformations/tests/op_conversions/convert_sequences_to_ti_test.cpp +++ b/src/common/transformations/tests/op_conversions/convert_sequences_to_ti_test.cpp @@ -798,3 +798,115 @@ TEST(TransformationTests, ConvertQuantizedGRUSequenceToTensorIterator) { auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; } + +TEST(TransformationTests, ConvertLSTMSequenceWithDynSeqLenToTensorIterator) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, PartialShape{1, -1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 1, 128}); + auto Z = std::make_shared(element::f32, Shape{1, 1, 128}); + auto shape_of = std::make_shared(X); + auto indices = opset5::Constant::create(element::i32, {1}, {1}); + auto axis = opset5::Constant::create(element::i32, {}, {0}); + auto seq_lengths = std::make_shared(shape_of, indices, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = opset5::Constant::create(element::f32, Shape{1, 512, 16}, w_val); + auto R = opset5::Constant::create(element::f32, Shape{1, 512, 128}, r_val); + auto B = opset5::Constant::create(element::f32, Shape{1, 512}, b_val); + + auto rnn_sequence = std::make_shared(X, + Y, + Z, + seq_lengths, + W, + R, + B, + 128, + op::RecurrentSequenceDirection::FORWARD); + auto Y_out = std::make_shared(rnn_sequence->output(0)); + auto Ho = std::make_shared(rnn_sequence->output(1)); + auto Co = std::make_shared(rnn_sequence->output(2)); + Y_out->set_friendly_name("Y_out"); + Ho->set_friendly_name("Ho"); + Co->set_friendly_name("Co"); + + f = std::make_shared(NodeVector{Y_out, Ho, Co}, ParameterVector{X, Y, Z}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto X = std::make_shared(element::f32, PartialShape{1, -1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 1, 128}); + auto Z = std::make_shared(element::f32, Shape{1, 1, 128}); + auto squeeze_pattern = opset5::Constant::create(element::i64, Shape{1}, {1}); + auto squeeze_y = std::make_shared(Y, squeeze_pattern); + auto squeeze_z = std::make_shared(Z, squeeze_pattern); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + auto Zi = std::make_shared(element::f32, Shape{1, 128}); + auto seq_body_param = std::make_shared(element::i32, PartialShape{1}); + + // Body + auto squeeze_x = std::make_shared(Xi, squeeze_pattern); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = opset5::Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = opset5::Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = opset5::Constant::create(element::f32, Shape{512}, b_val); + + auto rnn_cell = std::make_shared(squeeze_x, Yi, Zi, W, R, B, 128); + + auto unsqueeze_pattern = opset5::Constant::create(element::i64, Shape{1}, {1}); + auto Ho = std::make_shared(rnn_cell->output(0)); + + auto Co = std::make_shared(rnn_cell->output(1)); + + auto unsqueeze_y = std::make_shared(rnn_cell->output(0), unsqueeze_pattern); + auto Y_out = std::make_shared(unsqueeze_y); + + auto body = std::make_shared(OutputVector{Y_out, Ho, Co}, ParameterVector{Xi, Yi, Zi, seq_body_param}); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 1); + tensor_iterator->get_concatenated_slices(Y_out, 0, 1, 1, -1, 1); + + tensor_iterator->set_merged_input(Yi, squeeze_y, Ho); + tensor_iterator->set_merged_input(Zi, squeeze_z, Co); + + auto shape_of = std::make_shared(X); + auto indices = opset5::Constant::create(element::i32, {1}, {1}); + auto axis = opset5::Constant::create(element::i32, {}, {0}); + auto seq_lengths = std::make_shared(shape_of, indices, axis); + tensor_iterator->set_invariant_input(seq_body_param, seq_lengths); + + tensor_iterator->get_iter_value(Ho); + tensor_iterator->get_iter_value(Co); + + auto res_ti_Y = std::make_shared( + std::make_shared(tensor_iterator->output(0), unsqueeze_pattern)); + auto res_ti_H = std::make_shared( + std::make_shared(tensor_iterator->output(1), unsqueeze_pattern)); + auto res_ti_C = std::make_shared( + std::make_shared(tensor_iterator->output(2), unsqueeze_pattern)); + res_ti_Y->set_friendly_name("Y_out"); + res_ti_H->set_friendly_name("Ho"); + res_ti_C->set_friendly_name("Co"); + f_ref = std::make_shared(NodeVector{res_ti_Y, res_ti_H, res_ti_C}, ParameterVector{X, Y, Z}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index f453b7a5a51e0b..158b1f65967215 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -318,8 +318,9 @@ bool RNN::isSupportedOperation(const std::shared_ptr& op, std::s errorMessage = "Max sequence length dimension is dynamic"; return false; } - auto maxSeqLen = data_pshape[maxSeqLenDimIdx].get_length(); - if (ov::op::util::is_seq_len_provided(op->get_input_node_shared_ptr(seqLenIdx), maxSeqLen)) { + + if (ov::op::util::is_seq_len_provided(op->get_input_node_shared_ptr(0), + op->get_input_node_shared_ptr(seqLenIdx))) { errorMessage = "Unsupported sequence length."; return false; } diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 99623a72c32811..f96122a6531ea5 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -379,8 +379,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return lstm_seq->get_clip() == 0.0f && lstm_seq->get_activations() == std::vector{"sigmoid", "tanh", "tanh"} && max_seq_len < 16 && - !ov::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3), - max_seq_len); + !ov::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(0), + lstm_seq->get_input_node_shared_ptr(3)); } return false; }; From 751d844b24583338226232617d855adb9c7c6217 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 27 Oct 2023 16:56:25 +0200 Subject: [PATCH 105/275] [shape infer]Review Reshape class for shape inference aspects (#18679) * Add static shape adapter - Adapters holds CPU dimension which can be reference to it or vector - Add ov::optional for holding optional result from shape inference - Add new `infer` function in `IStaticShapeInfer` * Temporary support of StaticShape * Minor corrections in ShapeInferenceTA * Migrate shape_infer to new interface version * Replace StaticShape by adapter implementation * Replace IShapeInferCommon by IStaticShapeInfer * Correct code formatting * Fix build issues * NodeValidationFailure::create for StaticShapeRef * Review shape inference for reshape operator - review shape_infer implementation - add more unit test for static and dynamic shapes * Fix build issues * Correct minus one dim calculation * Fix build issues on windows * Improve resolving special minus one * Use NODE_SHAPE_INFER_CHECK * Update product in/out calculations * Temporary add ngraph header to solve build issue * Correct minus one dim calc when static part same * Add check for scalar input * Remove debug message * Fix `minus one` dynamic dimension calculation * Fix `minus one` dynamic dimension calculation * Fix merge issues in reshape Minor refactor reshape evaluate * Don't pass input label on minus one pattern when input dimension will be modified. --- .../tests/utils/convert_precision.cpp | 8 +- .../include/reshape_shape_inference.hpp | 364 ++++++++++ .../shape_inference/include/shape_nodes.hpp | 87 --- src/core/shape_inference/include/utils.hpp | 15 +- src/core/src/op/reshape.cpp | 385 +---------- src/core/tests/eval.cpp | 2 +- src/core/tests/type_prop/reshape.cpp | 654 ++++++++++++++++-- .../src/shape_inference/shape_inference.cpp | 1 + .../reshape_shape_inference_test.cpp | 105 +++ .../shape_inference_test/shape_node_tests.cpp | 24 - .../tests/unit/shape_inference_test/utils.hpp | 1 + src/plugins/intel_gpu/src/graph/reshape.cpp | 4 +- 12 files changed, 1126 insertions(+), 524 deletions(-) create mode 100644 src/core/shape_inference/include/reshape_shape_inference.hpp create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/reshape_shape_inference_test.cpp diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 1dac080461d16b..608af4926bc942 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -2192,7 +2192,7 @@ TEST(TransformationTests, align_mixed_fp16_fp32_with_parameter_for_shape_1) { auto upscale_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {2.0f}); auto mul_1 = make_shared(shape_input, upscale_const); auto axis_const = ov::op::v0::Constant::create(element::i64, Shape{1}, {0}); - auto final_float_shape = make_shared(mul_1, axis_const); + auto final_float_shape = make_shared(mul_1, axis_const, true); auto final_int_shape = make_shared(final_float_shape, element::i64); auto reshape_1 = make_shared(input_1, final_int_shape, false); @@ -2214,7 +2214,7 @@ TEST(TransformationTests, align_mixed_fp16_fp32_with_parameter_for_shape_1) { auto upscale_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {2.0f}); auto mul_1 = make_shared(shape_input, upscale_const); auto axis_const = ov::op::v0::Constant::create(element::i64, Shape{1}, {0}); - auto final_float_shape = make_shared(mul_1, axis_const); + auto final_float_shape = make_shared(mul_1, axis_const, true); auto final_int_shape = make_shared(final_float_shape, element::i64); auto reshape_1 = make_shared(input_1, final_int_shape, false); @@ -2235,7 +2235,7 @@ TEST(TransformationTests, align_mixed_fp16_fp32_with_parameter_for_shape_2) { auto upscale_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {2.0f}); auto mul_1 = make_shared(shape_input, upscale_const); auto axis_const = ov::op::v0::Constant::create(element::i64, Shape{1}, {0}); - auto final_float_shape = make_shared(mul_1, axis_const); + auto final_float_shape = make_shared(mul_1, axis_const, true); auto final_int_shape = make_shared(final_float_shape, element::i64); auto reshape_1 = make_shared(input_1, final_int_shape, false); @@ -2260,7 +2260,7 @@ TEST(TransformationTests, align_mixed_fp16_fp32_with_parameter_for_shape_2) { auto upscale_const = ov::op::v0::Constant::create(element::f32, Shape{1}, {2.0f}); auto mul_1 = make_shared(shape_input, upscale_const); auto axis_const = ov::op::v0::Constant::create(element::i64, Shape{1}, {0}); - auto final_float_shape = make_shared(mul_1, axis_const); + auto final_float_shape = make_shared(mul_1, axis_const, true); auto final_int_shape = make_shared(final_float_shape, element::i64); auto reshape_1 = make_shared(convert_to_f16, final_int_shape, false); auto convert_to_f32 = make_shared(reshape_1, element::f32); diff --git a/src/core/shape_inference/include/reshape_shape_inference.hpp b/src/core/shape_inference/include/reshape_shape_inference.hpp new file mode 100644 index 00000000000000..30c9d58b5a09a3 --- /dev/null +++ b/src/core/shape_inference/include/reshape_shape_inference.hpp @@ -0,0 +1,364 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include "compare.hpp" +#include "dimension_util.hpp" +#include "openvino/core/dimension_tracker.hpp" +#include "openvino/op/reshape.hpp" +#include "utils.hpp" + +namespace ov { +namespace op { +namespace reshape { +template +struct Product {}; + +/** \brief Helper to resolve the input and output product for static dimensions. */ +template +struct Product::value>::type> { + T in{1}; + T out{1}; + + void update_in(const T& in_dim) { + in *= in_dim; + } + + void update_out(const T& out_dim) { + out *= out_dim; + } + + void set_inf() { + in = T(-1); + out = T(-1); + } + + const T& get_static_in() const { + return in; + } + + const T& get_static_out() const { + return out; + } + + void calculate() {} +}; + +/** \brief Helper to resolve the input and output product for ov::Dimension (dynamic) dimensions. */ +template +struct Product::value>::type> { + std::pair in{1, 1}; + std::pair out{1, 1}; + + void update_in(const T& in_dim) { + inputs.emplace_back(in_dim); + } + + void update_out(const T& out_dim) { + outputs.emplace_back(out_dim); + } + + void set_inf() { + in.second = T(-1); + out.second = T(-1); + } + + const T& get_static_in() const { + return in.first; + } + + const T& get_static_out() const { + return out.first; + } + + const T& get_dynamic_in() const { + return in.second; + } + + const T& get_dynamic_out() const { + return out.second; + } + + void calculate() { + // dimensions compare to remove same from product calculation + auto dim_full_eq = [](const T& lhs, const T& rhs) -> bool { + return (lhs == rhs) && DimensionTracker::get_label(lhs) == DimensionTracker::get_label(rhs) && + (lhs.is_static() || DimensionTracker::has_label(lhs)); + }; + + auto outs = outputs; + + // calculate input product + for (const auto& d : inputs) { + auto out_it = std::find_if(outs.begin(), outs.end(), [&](const T& p) { + return dim_full_eq(d, p) && (d != 0); + }); + + if (out_it == outs.end()) { + mul(in, d); + } else if (!outs.empty()) { + outs.erase(out_it); + } + } + + // calculate output product + for (const auto& o : outs) { + mul(out, o); + } + + if (in.first != out.first) { + in.second *= in.first; + out.second *= out.first; + } else if (in.first == 1 && in.second == 1) { + // If dynamic product is one (no dynamic) and static is also one use static + in.second = in.first; + } + } + +private: + void mul(std::pair& prod, const T& value) { + if (value.is_static()) { + prod.first = value * prod.first; + } else { + prod.second = value * prod.second; + } + } + + std::vector inputs{}; + std::vector outputs{}; +}; + +// resolve minus one dimension for ov::Dimension +template ::type, Dimension>::value>::type* = nullptr> +TDim resolve_minus_one_dim(const Product& product) { + auto minus_one_dim = product.get_dynamic_in(); + auto& product_out = product.get_dynamic_out(); + + if (minus_one_dim.is_static() && product_out.is_static()) { + minus_one_dim /= product_out.get_length(); + } else { + using namespace ov::util; + auto& minus_one_interval = minus_one_dim.get_interval(); + + if (minus_one_interval.has_upper_bound() && product_out.get_min_length() != 0 && product_out != TDim{}) { + minus_one_interval.set_max_val(minus_one_interval.get_max_val() / product_out.get_min_length()); + } else { + minus_one_interval.set_max_val(Interval::s_max); + } + + if (product_out.get_max_length() != 0) { + minus_one_interval.set_min_val( + ceil_div(minus_one_interval.get_min_val(), product_out.get_interval().get_max_val())); + } + + if (product_out.get_min_length() != 1 || product_out.get_max_length() != 1) { + DimensionTracker::reset_tracking_info(minus_one_dim); + } + } + return minus_one_dim; +} + +// resolve minus one dimension for static dimension +template ::type, Dimension>::value>::type* = nullptr> +TDim resolve_minus_one_dim(const Product& product) { + return product.get_static_in() / product.get_static_out().get_length(); +} + +/** + * @brief Get the pattern and minus one idx from input bounds. + * + * @param op Pointer to reshape node. + * @param bounds Vector of reshape pattern bounds. + * + * @return Pair which got bounds converted to shape and `minus_one` index in pattern (-1 if not found). + */ +template +std::pair get_pattern_and_minus_one_idx(const Node* const op, + const std::vector>& bounds) { + using namespace ov::util; + const auto minus_one_bound = std::make_pair(dim::inf_bound, dim::inf_bound); + + auto result = std::make_pair(TShape{}, dim::inf_bound); + auto& shape = std::get<0>(result); + shape.reserve(bounds.size()); + + auto& minus_one_idx = std::get<1>(result); + auto bounds_iter = bounds.begin(); + + for (size_t i = 0; i < bounds.size(); ++i, ++bounds_iter) { + if (*bounds_iter == minus_one_bound) { + NODE_VALIDATION_CHECK(op, minus_one_idx == dim::inf_bound, "More than one dimension has size of -1"); + minus_one_idx = static_cast(i); + } + NODE_VALIDATION_CHECK(op, *bounds_iter >= minus_one_bound, "Dim size cannot be less than -1"); + shape.emplace_back(bounds_iter->first, bounds_iter->second); + } + + return result; +} + +/** + * @brief Set the pattern labels on pattern shape if this input is labeled. + * + * @param op Pointer to reshape node. + * @param shape Pointer to shape for labels set. + */ +template ::value>::type* = nullptr> +void set_pattern_labels(const Node* const op, TShape& shape) { + if (op->get_input_size() > 0) { + auto labels = op->get_input_source_output(1).get_tensor().get_value_label(); + + if (!labels.empty()) { + auto label_iter = labels.begin(); + for (auto& d : shape) { + if (*label_iter != no_label) { + DimensionTracker::set_label(d, *label_iter); + } + ++label_iter; + } + } + } +} + +/** @brief Shapes other than PartialShape have no labels. */ +template ::value>::type* = nullptr> +void set_pattern_labels(const Node* const, TShape&) {} + +} // namespace reshape + +namespace v1 { +template > +std::vector shape_infer(const Reshape* op, + const std::vector& input_shapes, + const ITensorAccessor& ta = make_tensor_accessor()) { + NODE_VALIDATION_CHECK(op, input_shapes.size() == 2); + + using namespace ov::util; + using TDim = typename T::value_type; + + const auto& input_shape = input_shapes[0]; + const auto& pattern_shape = input_shapes[1]; + const auto input_rank = input_shape.rank(); + const auto pattern_shape_rank = pattern_shape.rank(); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + pattern_shape_rank.compatible(0) || pattern_shape_rank.compatible(1), + "Pattern shape must have rank 1 or be empty"); + + auto output_shapes = std::vector(1); + auto& output_shape = output_shapes[0]; + + if (const auto output_bounds = get_input_bounds(op, 1, ta)) { + auto pattern_and_minus_one_idx = reshape::get_pattern_and_minus_one_idx(op, *output_bounds); + auto& output_pattern = pattern_and_minus_one_idx.first; + const auto minus_one_idx = pattern_and_minus_one_idx.second; + + reshape::set_pattern_labels(op, output_pattern); + + if (pattern_shape_rank.get_max_length() == 0) { + NODE_VALIDATION_CHECK(op, + output_pattern[0] == 1, + "The value of scalar shape pattern should be equal to 1!"); + output_pattern.resize(0); + } + + const auto special_zero = op->get_special_zero(); + + reshape::Product product; + + if (input_rank.is_dynamic()) { + for (const auto& pattern : output_pattern) { + if (special_zero && pattern == 0) { + output_shape.emplace_back(dim::inf_bound); + product.set_inf(); + } else { + output_shape.emplace_back(pattern); + product.update_out(pattern); + } + } + } else { + auto input_iter = input_shape.begin(); + auto input_last = input_shape.end(); + + for (size_t i = 0; i < output_pattern.size(); ++i) { + const auto& pattern_dim = output_pattern[i]; + auto ignore_pattern_dim = special_zero && (pattern_dim == 0); + + if (static_cast(i) == minus_one_idx) { + output_shape.emplace_back(); + } else if (ignore_pattern_dim) { + NODE_SHAPE_INFER_CHECK(op, input_shapes, i < input_shape.size(), "'0' dimension is out of range"); + output_shape.push_back(*input_iter); + // Exclude special zero dimension from product calculation + } else { + output_shape.push_back(pattern_dim); + product.update_out(pattern_dim); + } + + if (input_iter != input_last) { + if (!ignore_pattern_dim) { + product.update_in(*input_iter); + } + ++input_iter; + } + } + + // update input product by remaining input dimensions. + for (; input_iter != input_last; ++input_iter) { + product.update_in(*input_iter); + } + } + product.calculate(); + + // resolving -1 masked dimension + const auto has_minus_one_idx = !dim::is_inf_bound(minus_one_idx); + if (has_minus_one_idx) { + auto& minus_one_dim = output_shape[minus_one_idx]; + minus_one_dim = reshape::resolve_minus_one_dim(product); + + if (product.get_static_out() == 0) { + NODE_VALIDATION_CHECK(op, + product.get_static_in() == 0, + "Cannot infer '-1' dimension with zero-size output dimension unless at least one " + "input dimension is also zero-size"); + } else { + NODE_VALIDATION_CHECK(op, + !dim::is_empty(minus_one_dim), + "Non-'-1' output dimensions do not evenly divide the input dimensions"); + } + } + + if (input_shape.is_static() && output_shape.is_static()) { + const auto zero_dims = std::any_of(output_pattern.begin(), output_pattern.end(), cmp::Equal(0)); + const auto backward_compatible_check = (zero_dims && special_zero) || has_minus_one_idx; + const auto in_out_elements_equal = (product.get_static_in() == product.get_static_out()); + + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + backward_compatible_check || in_out_elements_equal, + "Requested output shape ", + output_shape, + " is incompatible with input shape"); + } + } else if (pattern_shape_rank.is_static()) { + if (pattern_shape_rank.get_length() == 0) { + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + input_rank.compatible(0), + "Input must be scalar as pattern is scalar!"); + } else { + output_shape = + PartialShape::dynamic(Rank(pattern_shape[0].get_min_length(), pattern_shape[0].get_max_length())); + } + } else { + output_shape = PartialShape::dynamic(); + } + return output_shapes; +} +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/shape_inference/include/shape_nodes.hpp b/src/core/shape_inference/include/shape_nodes.hpp index eaa7e5ba0f04d0..f7cce159d27bd4 100644 --- a/src/core/shape_inference/include/shape_nodes.hpp +++ b/src/core/shape_inference/include/shape_nodes.hpp @@ -10,93 +10,6 @@ #include "utils.hpp" -template > -std::vector shape_infer(const ov::op::v1::Reshape* op, - const std::vector& input_shapes, - const ov::ITensorAccessor& ta = ov::make_tensor_accessor()) { - NODE_VALIDATION_CHECK(op, input_shapes.size() == 2); - auto output_pattern = ov::op::get_input_const_data_as(op, 1, ta); - NODE_VALIDATION_CHECK(op, output_pattern, "Shape inference lacks input data"); - - auto& input_shape = input_shapes[0]; - OPENVINO_ASSERT(input_shape.is_static()); - auto output_shapes = std::vector(1); - auto& output_shape = output_shapes[0]; - output_shape.resize(output_pattern->size()); - - auto output_rank = input_shapes[1].size() == 0 ? 0 : input_shapes[1][0]; - if (output_rank == 0 && output_shape.size() != 0) { - output_pattern->clear(); - OPENVINO_ASSERT(output_pattern->size() == 1); - NODE_VALIDATION_CHECK(op, (*output_pattern)[0] == 1, "The value of scalar shape pattern should be equal to 1!"); - } - - auto special_zero = op->get_special_zero(); - - size_t output_product(1); - int64_t minus_one_idx = -1; - for (size_t i = 0; i < output_pattern->size(); ++i) { - if ((*output_pattern)[i] == -1) { // resolving everything except -1 - NODE_VALIDATION_CHECK(op, - minus_one_idx == -1, - "More than one element of output shape pattern has value of -1"); - minus_one_idx = static_cast(i); - continue; - } - - auto pattern_dim = (*output_pattern)[i]; - if (pattern_dim == 0 && special_zero) { - NODE_VALIDATION_CHECK(op, i < input_shape.size(), "'0' dimension is out of range"); - output_shape[i] = input_shape[i]; - // we do not include dimension to output product here and won't include in input - // product later because we will divide output_product by input_product. This - // dimension contributes to both products equally - } else { - output_shape[i] = pattern_dim; - output_product *= pattern_dim; - } - } - size_t input_product(1); - for (size_t i = 0; i < input_shape.size(); ++i) { - if (i < output_pattern->size() && (*output_pattern)[i] == 0 && special_zero) - continue; - input_product = input_shape[i].get_length() * input_product; - } - - if (minus_one_idx != -1) // resolving -1 masked dimension - { - if (output_product == 0) { - NODE_VALIDATION_CHECK(op, - input_product == 0, - "Cannot infer '-1' dimension with zero-size output " - "dimension unless at least one input dimension is " - "also zero-size"); - output_shape[minus_one_idx] = 0; - } else { - NODE_VALIDATION_CHECK(op, - input_product % output_product == 0, - "Non-'-1' output dimensions do not evenly divide the input dimensions"); - output_shape[minus_one_idx] = input_product / output_product; - } - } - - size_t zero_dims = std::count_if(output_pattern->begin(), output_pattern->end(), [](const int64_t& dim) { - return dim == 0; - }); - - bool backward_compatible_check = (zero_dims && special_zero) || minus_one_idx != -1; - bool in_out_elements_equal = input_product == output_product; - - NODE_VALIDATION_CHECK(op, - backward_compatible_check || in_out_elements_equal, - "Requested output shape ", - output_shape, - " is incompatible with input shape ", - input_shape); - - return output_shapes; -} - namespace ov { namespace op { namespace shape_of { diff --git a/src/core/shape_inference/include/utils.hpp b/src/core/shape_inference/include/utils.hpp index cac12973a18179..308a7f84594eca 100644 --- a/src/core/shape_inference/include/utils.hpp +++ b/src/core/shape_inference/include/utils.hpp @@ -385,19 +385,20 @@ ov::optional get_input_bounds(const ov::Node* op, size_t port, const IT }; }; + constexpr auto cast = ov::util::Cast(); ov::optional out; - if (auto lowers = op::get_input_const_data_as(op, port, ta)) { - const auto& et = get_input_const_element_type(op, port, ta); + if (const auto t = ta(port)) { + const auto& et = t.get_element_type(); + const auto lowers = get_tensor_data_as(t, cast); out.emplace(); - out->reserve(lowers->size()); - std::transform(lowers->cbegin(), lowers->cend(), lowers->begin(), std::back_inserter(*out), make_bound(et)); + out->reserve(lowers.size()); + std::transform(lowers.cbegin(), lowers.cend(), lowers.cbegin(), std::back_inserter(*out), make_bound(et)); } else { auto bounds = ov::evaluate_both_bounds(op->get_input_source_output(port)); if (bounds.first && bounds.second) { const auto& et = bounds.first.get_element_type(); - constexpr auto cast = ov::util::Cast(); auto lowers = get_tensor_data_as(bounds.first, cast); auto uppers = get_tensor_data_as(bounds.second, cast); @@ -406,6 +407,10 @@ ov::optional get_input_bounds(const ov::Node* op, size_t port, const IT std::transform(lowers.begin(), lowers.end(), uppers.begin(), std::back_inserter(*out), make_bound(et)); } } + + if (!std::is_same::value) { + NODE_VALIDATION_CHECK(op, out, "Static shape inference lacks constant data on port ", port); + } return out; } diff --git a/src/core/src/op/reshape.cpp b/src/core/src/op/reshape.cpp index 279a06350ee7ea..10c126c9f5c5b0 100644 --- a/src/core/src/op/reshape.cpp +++ b/src/core/src/op/reshape.cpp @@ -5,221 +5,95 @@ #include "openvino/op/reshape.hpp" #include -#include #include "bound_evaluate.hpp" -#include "compare.hpp" #include "itt.hpp" #include "ngraph/util.hpp" #include "openvino/core/dimension_tracker.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/util/precision_sensitive_attribute.hpp" #include "openvino/reference/reshape.hpp" +#include "reshape_shape_inference.hpp" -using namespace std; -using namespace ov; +namespace ov { +namespace op { +namespace v1 { -namespace reshapeop { -namespace { - -template -void compute_output_shape(const ov::Tensor& shape_pattern, std::vector& output_shape) { - size_t output_rank; - if (shape_pattern.get_size() != 0) { - output_rank = shape_pattern.get_shape().empty() ? 0 : shape_pattern.get_shape()[0]; - } else { - // Can be dynamic during shape infer as conversion result from empty ov::Tensor - output_rank = 0; - } - - for (size_t i = 0; i < output_rank; i++) { - output_shape.push_back(shape_pattern.data::value_type>()[i]); - } -} -} // namespace -} // namespace reshapeop - -op::v1::Reshape::Reshape(const Output& arg, const Output& shape_pattern, bool zero_flag) +Reshape::Reshape(const Output& arg, const Output& shape_pattern, bool zero_flag) : Op({arg, shape_pattern}), m_special_zero(zero_flag) { ov::mark_as_precision_sensitive(input(1)); constructor_validate_and_infer_types(); } -bool op::v1::Reshape::visit_attributes(AttributeVisitor& visitor) { +bool Reshape::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_Reshape_visit_attributes); visitor.on_attribute("special_zero", m_special_zero); return true; } -void op::v1::Reshape::validate_and_infer_types() { +void Reshape::validate_and_infer_types() { OV_OP_SCOPE(v1_Reshape_validate_and_infer_types); - auto shape_pattern_et = get_input_element_type(1); + const auto& shape_pattern_et = get_input_element_type(1); // check data types NODE_VALIDATION_CHECK(this, shape_pattern_et.is_integral_number(), "PartialShape pattern must be an integral number."); - // check shapes - const ov::PartialShape& input_pshape = get_input_partial_shape(0); - const ov::PartialShape& shape_pattern_shape = get_input_partial_shape(1); - NODE_VALIDATION_CHECK(this, - shape_pattern_shape.rank().compatible(1) || - (shape_pattern_shape.rank().is_static() && shape_pattern_shape.rank().get_length() == 0), - "Pattern shape must have rank 1 or be empty, got ", - shape_pattern_shape.rank(), - "."); - Rank output_rank = shape_pattern_shape.rank().is_dynamic() - ? Rank::dynamic() - : shape_pattern_shape.rank().get_length() == 0 ? 0 : shape_pattern_shape[0]; - set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic(output_rank)); - set_input_is_relevant_to_shape(1); - - std::vector reshape_pattern; - bool shape_can_be_calculated = false; - int64_t minus_one_idx = -1; - - ov::Tensor lb, ub; - std::tie(lb, ub) = evaluate_both_bounds(get_input_source_output(1)); - if (lb && ub) { - const auto lower_bound = std::make_shared(lb.get_element_type(), lb.get_shape(), lb.data()) - ->cast_vector(); - auto upper_bound = std::make_shared(ub.get_element_type(), ub.get_shape(), ub.data()) - ->cast_vector(); - shape_can_be_calculated = true; - OPENVINO_ASSERT(lower_bound.size() == upper_bound.size()); - const TensorLabel& labels = get_input_source_output(1).get_tensor().get_value_label(); - OPENVINO_ASSERT(labels.empty() || lower_bound.size() == labels.size()); - - for (size_t i = 0; i < lower_bound.size(); ++i) { - NODE_VALIDATION_CHECK(this, - lower_bound[i] >= -1 && upper_bound[i] >= -1, - "Dim size cannot be less than -1"); - - if (lower_bound[i] == -1 && - upper_bound[i] == -1) { // ctor of Dimension(-1) would turn input Dimension(0, max_int) - NODE_VALIDATION_CHECK(this, minus_one_idx == -1, "More than one dimension has size of -1"); - minus_one_idx = static_cast(i); - } - - // We must handle i32 fully dynamic dimension in a special way - if (get_input_element_type(1) == element::i32 && - upper_bound[i] == std::numeric_limits::max()) { - upper_bound[i] = std::numeric_limits::max(); - } - auto d = Dimension(lower_bound[i], upper_bound[i]); - if (!labels.empty() && labels[i]) - ov::DimensionTracker::set_label(d, labels[i]); - reshape_pattern.emplace_back(d); - } - // For scalar case reshape_patter should be empty but scalar reshape pattern should be empty - // or equal to 1 - if (output_rank.is_static() && output_rank.get_length() == 0 && !lower_bound.empty()) { - reshape_pattern.clear(); - OPENVINO_ASSERT(lower_bound.size() == 1); - NODE_VALIDATION_CHECK(this, - lower_bound[0] == 1 && upper_bound[0] == 1, - "The value of scalar shape pattern should be equal to 1!"); - } - } - - if (shape_can_be_calculated) { - std::vector output_shape(output_rank.get_length()); - calculate_output_shape(reshape_pattern, minus_one_idx, input_pshape, output_shape); - set_output_type(0, get_input_element_type(0), output_shape); - } + OPENVINO_SUPPRESS_DEPRECATED_START + auto input_shapes = ov::get_node_input_partial_shapes(*this); + OPENVINO_SUPPRESS_DEPRECATED_START + const auto output_shapes = shape_infer(this, input_shapes); + set_output_type(0, get_input_element_type(0), output_shapes.front()); } -shared_ptr op::v1::Reshape::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Reshape::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Reshape_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), m_special_zero); + return std::make_shared(new_args.at(0), new_args.at(1), m_special_zero); } -#define COMPUTE_OUT_SHAPE_CASE(a, ...) \ - case element::Type_t::a: { \ - OV_OP_SCOPE(OV_PP_CAT3(compute_reshape_out_shape, _, a)); \ - reshapeop::compute_output_shape(__VA_ARGS__); \ - } break; - -bool op::v1::Reshape::evaluate_reshape(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { - // infer and set output shape if the output shape contain -1 - // and zero value dimension - std::vector out_shape_val; - - switch (inputs[1].get_element_type()) { - COMPUTE_OUT_SHAPE_CASE(i8, inputs[1], out_shape_val); - COMPUTE_OUT_SHAPE_CASE(i16, inputs[1], out_shape_val); - COMPUTE_OUT_SHAPE_CASE(i32, inputs[1], out_shape_val); - COMPUTE_OUT_SHAPE_CASE(i64, inputs[1], out_shape_val); - COMPUTE_OUT_SHAPE_CASE(u8, inputs[1], out_shape_val); - COMPUTE_OUT_SHAPE_CASE(u16, inputs[1], out_shape_val); - COMPUTE_OUT_SHAPE_CASE(u32, inputs[1], out_shape_val); - COMPUTE_OUT_SHAPE_CASE(u64, inputs[1], out_shape_val); - default: - OPENVINO_THROW("shape_pattern element type is not integral data type"); +bool Reshape::evaluate_reshape(TensorVector& outputs, const TensorVector& inputs) const { + std::vector input_shapes; + input_shapes.reserve(inputs.size()); + for (const auto& in : inputs) { + input_shapes.push_back(in.get_shape()); } - std::vector reshape_pattern; - int64_t minus_one_idx = -1; - for (size_t i = 0; i < out_shape_val.size(); ++i) { - NODE_VALIDATION_CHECK(this, out_shape_val[i] >= -1, "Dim size cannot be less than -1"); - if (out_shape_val[i] == -1) { // ctor of Dimension(-1) would turn input Dimension(0, max_int) - NODE_VALIDATION_CHECK(this, minus_one_idx == -1, "More than one dimension has size of -1"); - minus_one_idx = static_cast(i); - } - reshape_pattern.emplace_back(out_shape_val[i]); + const auto output_shape = shape_infer(this, input_shapes, make_tensor_accessor(inputs)).front().to_shape(); + if (outputs.empty()) { + outputs.emplace_back(inputs[0].get_element_type(), output_shape); + } else { + OPENVINO_ASSERT(outputs.size() == 1); + outputs[0].set_shape(output_shape); } - std::vector output_shape(out_shape_val.size()); - calculate_output_shape(reshape_pattern, minus_one_idx, inputs[0].get_shape(), output_shape); - OPENVINO_ASSERT(ov::PartialShape(output_shape).is_static()); - outputs[0].set_shape(ov::PartialShape(output_shape).to_shape()); - - ov::reference::reshape(static_cast(inputs[0].data()), + ov::reference::reshape(static_cast(inputs[0].data()), static_cast(outputs[0].data()), inputs[0].get_shape(), inputs[0].get_element_type().size()); return true; } -bool op::v1::Reshape::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { +bool Reshape::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Reshape_evaluate); - OPENVINO_ASSERT(inputs.size() == 2); - if (outputs.empty()) - outputs.emplace_back(ov::Tensor(inputs[0].get_element_type(), {0})); - else - OPENVINO_ASSERT(outputs.size() == 1); return evaluate_reshape(outputs, inputs); } -bool op::v1::Reshape::has_evaluate() const { +bool Reshape::has_evaluate() const { OV_OP_SCOPE(v1_Reshape_has_evaluate); - switch (get_input_element_type(1)) { - case ov::element::i8: - case ov::element::i16: - case ov::element::i32: - case ov::element::i64: - case ov::element::u8: - case ov::element::u16: - case ov::element::u32: - case ov::element::u64: - return true; - default: - break; - } - return false; + const auto& shape_pattern_et = get_input_element_type(1); + return shape_pattern_et.is_integral_number() && (shape_pattern_et.bitwidth() >= 8); } -bool op::v1::Reshape::evaluate_lower(ov::TensorVector& output_values) const { +bool Reshape::evaluate_lower(ov::TensorVector& output_values) const { return get_input_tensor(1).has_and_set_bound() && default_lower_bound_evaluator(this, output_values); } -bool op::v1::Reshape::evaluate_upper(ov::TensorVector& output_values) const { +bool Reshape::evaluate_upper(ov::TensorVector& output_values) const { return get_input_tensor(1).has_and_set_bound() && default_upper_bound_evaluator(this, output_values); } -bool op::v1::Reshape::evaluate_label(TensorLabelVector& output_labels) const { +bool Reshape::evaluate_label(TensorLabelVector& output_labels) const { if (!get_input_tensor(1).has_and_set_bound()) return false; OPENVINO_SUPPRESS_DEPRECATED_START @@ -227,7 +101,7 @@ bool op::v1::Reshape::evaluate_label(TensorLabelVector& output_labels) const { OPENVINO_SUPPRESS_DEPRECATED_END } -bool op::v1::Reshape::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool Reshape::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { return false; } @@ -240,189 +114,6 @@ bool op::v1::Reshape::constant_fold(OutputVector& output_values, const OutputVec } return false; } - -namespace { -bool fully_eq(const Dimension& rhs, const Dimension& lhs) { - return rhs == lhs && ov::DimensionTracker::get_label(rhs) == ov::DimensionTracker::get_label(lhs) && - (ov::DimensionTracker::get_label(rhs) || rhs.is_static()); -} - -Dimension resolve_minus_one(const Node* reshape_node, - vector& input_product, - vector& output_product) { - std::vector to_delete_from_output, to_delete_from_input; - Dimension input_const_part(1), output_const_part(1); - - for (const auto& dim : output_product) - if (dim.is_static()) { - output_const_part *= dim; - to_delete_from_output.push_back(dim); - } - - for (const auto& dim : input_product) - if (dim.is_static()) { - input_const_part *= dim; - to_delete_from_input.push_back(dim); - } - - for (const auto& dim : to_delete_from_input) { - input_product.erase(std::remove_if(input_product.begin(), - input_product.end(), - [=](const Dimension& d) { - return fully_eq(dim, d); - }), - input_product.end()); - } - for (const auto& dim : to_delete_from_output) { - output_product.erase(std::remove_if(output_product.begin(), - output_product.end(), - [=](const Dimension& d) { - return fully_eq(dim, d); - }), - output_product.end()); - } - - to_delete_from_input.clear(); - to_delete_from_output.clear(); - - if (input_const_part != output_const_part) { - input_product.push_back(input_const_part); - output_product.push_back(output_const_part); - } - - for (const auto& out_dim : output_product) { - const auto& it = std::find_if(input_product.begin(), input_product.end(), [out_dim](const Dimension& in_dim) { - return fully_eq(out_dim, in_dim); - }); - if (it != input_product.end()) { - to_delete_from_output.push_back(out_dim); - to_delete_from_input.push_back(out_dim); - } - } - for (const auto& dim : to_delete_from_input) { - input_product.erase(std::remove_if(input_product.begin(), - input_product.end(), - [=](const Dimension& d) { - return fully_eq(dim, d); - }), - input_product.end()); - } - for (const auto& dim : to_delete_from_output) { - output_product.erase(std::remove_if(output_product.begin(), - output_product.end(), - [=](const Dimension& d) { - return fully_eq(dim, d); - }), - output_product.end()); - } - - if (output_product.empty() && input_product.size() == 1) - return input_product[0]; - - Dimension input_dim(1), output_dim(1); - for (const auto& i : input_product) { - input_dim *= i; - } - for (const auto& i : output_product) { - output_dim *= i; - } - - if (output_dim == 0) { - NODE_VALIDATION_CHECK(reshape_node, - input_dim == 0, - "Cannot infer '-1' dimension with zero-size output " - "dimension unless at least one input dimension is " - "also zero-size"); - return Dimension(0); - } else { - if (input_dim.is_static() && output_dim.is_static()) { - NODE_VALIDATION_CHECK(reshape_node, - input_dim.get_length() % output_dim.get_length() == 0, - "Non-'-1' output dimensions do not evenly divide the input dimensions"); - } - - if (output_dim == Dimension() || input_dim == Dimension()) { - return Dimension::dynamic(); - } else { - auto in_min = input_dim.get_min_length(), in_max = input_dim.get_max_length(); - auto out_min = output_dim.get_min_length(), out_max = output_dim.get_max_length(); - - Dimension::value_type lower; - if (in_min == -1 || out_max == -1) - lower = -1; // dynamic - else - lower = static_cast(ceil(static_cast(in_min) / (out_max ? out_max : 1))); - - Dimension::value_type upper; - if (in_max == -1 || out_min == -1) - upper = -1; // dynamic - else - upper = - static_cast(floor(static_cast(in_max) / (out_min ? out_min : 1))); - - if (lower == -1 || (lower > upper && upper > -1)) - return Dimension::dynamic(); - else - return {lower, upper}; - } - } -} -} // namespace - -void op::v1::Reshape::calculate_output_shape(vector& reshape_pattern, - const int64_t& minus_one_idx, - const ov::PartialShape& input_pshape, - vector& output_shape) const { - std::vector output_product; - for (int64_t i = 0; i < static_cast(reshape_pattern.size()); ++i) { - if (i == minus_one_idx) // resolving everything except -1 - continue; - - auto pattern_dim = reshape_pattern[i]; - if (pattern_dim == 0 && get_special_zero()) { - if (input_pshape.rank().is_dynamic()) { - output_shape[i] = Dimension::dynamic(); - output_product.push_back(Dimension::dynamic()); - } else { - NODE_VALIDATION_CHECK(this, i < input_pshape.rank().get_length(), "'0' dimension is out of range"); - output_shape[i] = input_pshape[i]; - // we do not include dimension to output product here and won't include in input - // product later because we will divide output_product by input_product. This - // dimension contributes to both products equally, but in case this dimension - // is dynamic and others are not we could fully define output dimension that - // is masked by -1 - } - } else { - output_shape[i] = pattern_dim; - output_product.push_back(pattern_dim); - } - } - std::vector input_product; - if (input_pshape.rank().is_static()) - for (int64_t i = 0; i < input_pshape.rank().get_length(); ++i) { - if (i < static_cast(reshape_pattern.size()) && reshape_pattern[i].get_min_length() == 0 && - reshape_pattern[i].get_max_length() == 0) - continue; - input_product.push_back(input_pshape[i]); - } - else - input_product.push_back(Dimension::dynamic()); - - if (minus_one_idx != -1) // resolving -1 masked dimension - output_shape[minus_one_idx] = resolve_minus_one(this, input_product, output_product); - - ov::PartialShape output_pshape(output_shape); - if (input_pshape.is_static() && output_pshape.is_static()) { - size_t zero_dims = std::count_if(reshape_pattern.begin(), reshape_pattern.end(), cmp::Equal(0)); - - bool backward_compatible_check = (zero_dims && get_special_zero()) || minus_one_idx != -1; - bool in_out_elements_equal = shape_size(input_pshape.get_shape()) == shape_size(output_pshape.to_shape()); - - NODE_VALIDATION_CHECK(this, - backward_compatible_check || in_out_elements_equal, - "Requested output shape ", - output_shape, - " is incompatible with input shape ", - input_pshape); - } -} +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp index fabf47f0f2f248..1edce9028bba01 100644 --- a/src/core/tests/eval.cpp +++ b/src/core/tests/eval.cpp @@ -758,7 +758,7 @@ TEST(eval, evaluate_reshape_v1_not_backward_compatible_and_in_out_size_not_eq) { OV_EXPECT_THROW(model->evaluate(out_vector, in_vector), NodeValidationFailure, - HasSubstr("Requested output shape [2,1,1,1,1] is incompatible with input shape [2,2,2]")); + HasSubstr("Requested output shape [2,1,1,1,1] is incompatible with input shape")); } TEST(eval, evaluate_convert) { diff --git a/src/core/tests/type_prop/reshape.cpp b/src/core/tests/type_prop/reshape.cpp index 97ee3ab76edced..ea70f5c9f646af 100644 --- a/src/core/tests/type_prop/reshape.cpp +++ b/src/core/tests/type_prop/reshape.cpp @@ -4,6 +4,7 @@ #include "openvino/op/reshape.hpp" +#include "common_test_utils/test_assertions.hpp" #include "common_test_utils/type_prop.hpp" #include "openvino/core/dimension_tracker.hpp" #include "openvino/op/broadcast.hpp" @@ -18,8 +19,12 @@ #include "openvino/op/squeeze.hpp" #include "openvino/op/unsqueeze.hpp" -using namespace std; using namespace ov; +using std::ignore; +using std::make_shared; +using testing::Each; +using testing::ElementsAre; +using testing::HasSubstr; TEST(type_prop, static_value_propagation) { auto param = make_shared(element::f32, Shape{1, 2, 3}); @@ -263,10 +268,10 @@ TEST(type_prop, interval_value_propagation_reshape_zero_special_value) { PartialShape({Dimension(1, 8), 3, Dimension(16, 64), Dimension(200, 400)})); } -TEST(type_prop, interval_value_propagation_reshape_zero_minus_one_special_values) { +TEST(type_prop, reshape_interval_value_propagation_reshape_zero_minus_one_special_values) { auto param = - make_shared(element::f32, - PartialShape{Dimension(1, 8), Dimension(16, 64), 6, Dimension(200, 400)}); + make_shared(element::f32, + PartialShape{Dimension(1, 8), Dimension(16, 64), 6, Dimension(200, 400)}); auto shape_of = make_shared(param); auto dim_0 = make_shared(shape_of, @@ -325,20 +330,6 @@ TEST(type_prop, reshape_deduce_zero_special) { ASSERT_EQ(r->get_shape(), (Shape{6, 2, 5})); } -TEST(type_prop, reshape_deduce_wrong_output_shape) { - auto param = make_shared(element::f32, Shape{3, 4, 5}); - try { - auto r = - make_shared(param, ov::op::v0::Constant::create(element::u64, {3}, Shape{3, 3, 3}), false); - // Should have thrown, so fail if it didn't - FAIL() << "No exception was thrown"; - } catch (const NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("is incompatible with input shape")); - } catch (...) { - FAIL() << "Deduced type check failed for unexpected reason"; - } -} - // // Input shape rank dynamic, so we should set the desired output shape // @@ -615,12 +606,14 @@ TEST(type_prop, reshape_to_zero_shape_dynamic) { } TEST(type_prop, reshape_to_zero_shape_incorrect) { - auto param = make_shared(element::f32, Shape{2, 1}); - ASSERT_THROW(const auto unused = make_shared( - param, - ov::op::v0::Constant::create(element::i64, {1}, std::vector{0}), - false), - std::exception); + auto param = make_shared(element::f32, Shape{2, 1}); + + OV_EXPECT_THROW( + ignore = make_shared(param, + op::v0::Constant::create(element::i64, {1}, std::vector{0}), + false), + NodeValidationFailure, + HasSubstr("Requested output shape [0] is incompatible with input shape")); } TEST(type_prop, reshape_to_zero) { @@ -651,23 +644,36 @@ TEST(type_prop, reshape_to_scalar_2) { } TEST(type_prop, reshape_to_scalar_3) { - auto param = make_shared(element::f32, Shape{1, 2, 3}); - ASSERT_THROW(const auto unused = make_shared( - param, - ov::op::v0::Constant::create(element::i64, {}, std::vector{100}), - false), - std::exception); + auto param = make_shared(element::f32, Shape{1, 2, 3}); + + OV_EXPECT_THROW( + ignore = make_shared(param, + op::v0::Constant::create(element::i64, {}, std::vector{100}), + false), + NodeValidationFailure, + HasSubstr("The value of scalar shape pattern should be equal to 1")); } -TEST(type_prop, dynamic_shape_propagation_with_i32_precision) { - auto param = make_shared(element::f32, PartialShape{1, -1, -1}); - auto shape_of = std::make_shared(param, element::i32); +TEST(type_prop, reshape_to_scalar_4) { + auto param = make_shared(element::f32, Shape{1, 2, 3}); - auto indices = ov::op::v0::Constant::create(element::i32, {3}, {1, 2, 0}); - auto axis = ov::op::v0::Constant::create(element::i32, {1}, {0}); - auto gather = std::make_shared(shape_of, indices, axis); + OV_EXPECT_THROW( + ignore = make_shared(param, + op::v0::Constant::create(element::i64, {}, std::vector{1}), + false), + NodeValidationFailure, + HasSubstr("Requested output shape [] is incompatible with input shape")); +} + +TEST(type_prop, reshape_dynamic_shape_propagation_with_i32_precision) { + auto param = make_shared(element::f32, PartialShape{1, -1, -1}); + auto shape_of = make_shared(param, element::i32); + + auto indices = op::v0::Constant::create(element::i32, {3}, {1, 2, 0}); + auto axis = op::v0::Constant::create(element::i32, {1}, {0}); + auto gather = make_shared(shape_of, indices, axis); - auto reshape = std::make_shared(param, gather, true); + auto reshape = make_shared(param, gather, true); ASSERT_EQ(reshape->get_element_type(), element::f32); ASSERT_EQ(reshape->get_output_partial_shape(0), (PartialShape{-1, -1, 1})); @@ -684,33 +690,573 @@ TEST(type_prop, reshape_dynamic_value_and_label_propagation) { const auto& et = element::i64; std::vector zero{0}; - const auto indices = std::make_shared(et, Shape{}, zero); - const auto axis = std::make_shared(et, Shape{}, zero); - const auto gather = std::make_shared(shape_0, indices, axis); + const auto indices = make_shared(et, Shape{}, zero); + const auto axis = make_shared(et, Shape{}, zero); + const auto gather = make_shared(shape_0, indices, axis); - const auto output_pattern = std::make_shared(et, Shape{1}, std::vector{-1}); - const auto unsqueeze = std::make_shared(gather, output_pattern, false); + const auto output_pattern = make_shared(et, Shape{1}, std::vector{-1}); + const auto unsqueeze = make_shared(gather, output_pattern, false); - auto bc = std::make_shared(param, unsqueeze); + auto bc = make_shared(param, unsqueeze); ASSERT_EQ(bc->get_shape(), (Shape{3})); const auto& output_shape = bc->get_output_partial_shape(0); - ASSERT_EQ(ov::DimensionTracker::get_label(output_shape[0]), 10); + EXPECT_EQ(output_shape, PartialShape({3})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(10)); } -TEST(type_prop, reshape_label_shape_propagation_minus_one) { - Dimension marked_0 = Dimension(-1); - ov::DimensionTracker::set_label(marked_0, 10); +TEST(type_prop, reshape_when_pattern_has_static_shape_only) { + auto param = make_shared(element::f32, Shape{3, 4}); + auto shape_pattern = make_shared(element::u64, PartialShape{3}); + auto r = make_shared(param, shape_pattern, false); + + EXPECT_EQ(r->get_element_type(), element::f32); + EXPECT_EQ(r->get_output_partial_shape(0), PartialShape::dynamic(3)); +} + +TEST(type_prop, reshape_when_pattern_has_interval_shape_only) { + auto param = make_shared(element::f32, Shape{3, 4}); + auto shape_pattern = make_shared(element::u64, PartialShape{{1, 3}}); + auto r = make_shared(param, shape_pattern, false); + + EXPECT_EQ(r->get_element_type(), element::f32); + EXPECT_EQ(r->get_output_partial_shape(0), PartialShape::dynamic()); +} + +TEST(type_prop, reshape_when_pattern_has_scalar_shape_only) { + auto param = make_shared(element::f32, Shape{3, 4}); + auto shape_pattern = make_shared(element::u64, PartialShape{}); + + OV_EXPECT_THROW(ignore = make_shared(param, shape_pattern, false), + NodeValidationFailure, + HasSubstr("Input must be scalar as pattern is scalar!")); +} + +TEST(type_prop, reshape_label_propagation) { + auto param_shape = PartialShape{{1, 2}, {2, 4}, 6, {2, 4}, 8}; + auto out_shape = PartialShape{{3, 5}, 0, 1, 0}; + set_shape_labels(param_shape, 10); + set_shape_labels(out_shape, 20); + + const auto data = make_shared(element::f32, param_shape); + const auto out = make_shared(element::f32, out_shape); + const auto shape_of = make_shared(out); + const auto special_volume = op::v0::Constant::create(element::i64, {1}, {-1}); + const auto shape = make_shared(OutputVector{shape_of, special_volume}, 0); + + const auto op = make_shared(data, shape, true); + + EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{3, 5}, {2, 4}, 1, {2, 4}, {10, 32}})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, 22, 13, ov::no_label)); +} + +TEST(type_prop, reshape_label_propagation_dynamic_pattern_got_same_label_as_input) { + auto param_shape = PartialShape{{1, 2}, {2, 4}, {3, 5}, {2, 4}, 8}; + auto out_shape = PartialShape{{3, 5}, 0, 1, 0, 8}; + set_shape_labels(param_shape, 10); + set_shape_labels(out_shape, {12, 21, 22, 23, 24}); + + const auto data = make_shared(element::f32, param_shape); + const auto out = make_shared(element::f32, out_shape); + const auto shape_of = make_shared(out); + const auto special_volume = op::v0::Constant::create(element::i64, {1}, {-1}); + const auto shape = make_shared(OutputVector{shape_of, special_volume}, 0); + + const auto op = make_shared(data, shape, true); + + EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{3, 5}, {2, 4}, 1, {2, 4}, 8, {1, 2}})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(12, 11, 22, 13, 24, 10)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_corner_case_zero_div_by_inf) { + auto param_shape = PartialShape{0, 0}; + auto out_shape = PartialShape{-1, 2}; + set_shape_labels(param_shape, 10); + set_shape_labels(out_shape, 20); + + const auto data = make_shared(element::f32, param_shape); + const auto out = make_shared(element::f32, out_shape); + const auto shape_of = make_shared(out); + const auto special_volume = op::v0::Constant::create(element::i64, {1}, {-1}); + const auto shape = make_shared(OutputVector{special_volume, shape_of}, 0); + + const auto op = make_shared(data, shape, true); + + EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({-1, -1, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(ov::no_label, 20, 21)); +} + +TEST(type_prop, reshape_default_ctor) { + auto param_shape = PartialShape{{1, 2}, {2, 4}, 6, {2, 4}, 8}; + auto out_shape = PartialShape{{3, 5}, 0, 1, 0}; + set_shape_labels(param_shape, 10); + set_shape_labels(out_shape, 20); + + const auto data = make_shared(element::f32, param_shape); + const auto out = make_shared(element::f32, out_shape); + const auto shape_of = make_shared(out); + const auto special_volume = op::v0::Constant::create(element::i64, {1}, {-1}); + const auto shape = make_shared(OutputVector{shape_of, special_volume}, 0); + + const auto op = make_shared(); + op->set_arguments(OutputVector{data, shape}); + op->set_special_zero(true); + op->validate_and_infer_types(); + + EXPECT_EQ(op->get_output_partial_shape(0), PartialShape({{3, 5}, {2, 4}, 1, {2, 4}, {10, 32}})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, 22, 13, ov::no_label)); +} + +TEST(type_prop, reshape_deduce_wrong_output_shape) { + auto param = make_shared(element::f32, Shape{3, 4, 5}); + + OV_EXPECT_THROW( + ignore = make_shared(param, op::v0::Constant::create(element::u64, {3}, {3, 3, 3}), false), + NodeValidationFailure, + HasSubstr("is incompatible with input shape")); +} + +TEST(type_prop, reshape_pattern_shape_not_1d) { + auto param = make_shared(element::f32, Shape{3, 4, 5}); + + OV_EXPECT_THROW( + ignore = + make_shared(param, op::v0::Constant::create(element::u64, {3, 1}, Shape{3, 5, 4}), false), + NodeValidationFailure, + HasSubstr("Pattern shape must have rank 1 or be empty")); +} + +TEST(type_prop, reshape_multiple_minus_one_no_special_zero) { + const auto data = make_shared(element::f32, PartialShape{{1, 2}, {2, 4}, 6, {2, 4}, 8}); + + OV_EXPECT_THROW( + ignore = make_shared(data, op::v0::Constant::create(element::i64, {3}, {-1, 5, -1}), false), + NodeValidationFailure, + HasSubstr("More than one dimension has size of -1")); +} + +TEST(type_prop, reshape_multiple_minus_one_special_zero_set) { + const auto data = make_shared(element::f32, PartialShape{{1, 2}, {2, 4}, 6, {2, 4}, 8}); + + OV_EXPECT_THROW( + ignore = make_shared(data, op::v0::Constant::create(element::i64, {3}, {-1, 5, -1}), true), + NodeValidationFailure, + HasSubstr("More than one dimension has size of -1")); +} + +TEST(type_prop, reshape_special_zero_out_of_data_rank) { + const auto data = make_shared(element::f32, PartialShape{{1, 2}, {2, 4}, 8}); + + OV_EXPECT_THROW( + ignore = make_shared(data, op::v0::Constant::create(element::i64, {4}, {5, 1, 1, 0}), true), + NodeValidationFailure, + HasSubstr("'0' dimension is out of range")); +} + +TEST(type_prop, reshape_special_zero_cannot_div) { + const auto data = make_shared(element::f32, PartialShape{2, 5, 4}); + + OV_EXPECT_THROW( + ignore = make_shared(data, op::v0::Constant::create(element::i64, {3}, {10, -1, 3}), false), + NodeValidationFailure, + HasSubstr("Non-'-1' output dimensions do not evenly divide the input dimensions")); +} + +TEST(type_prop, reshape_zero_dim_in_output_pattern_but_not_in_data_shape) { + const auto data = make_shared(element::f32, PartialShape{2, 5, 4}); + + OV_EXPECT_THROW( + ignore = make_shared(data, op::v0::Constant::create(element::i64, {3}, {5, 0, -1}), false), + NodeValidationFailure, + HasSubstr("Cannot infer '-1' dimension with zero-size output dimension unless at least one input dimension is " + "also zero-size")); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_input_has_zero) { + auto data_shape = PartialShape{4, 0, 2, 1, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{3}, std::vector{12, 0, 1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({12, 0, 1})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 21, 22)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_1) { + auto data_shape = PartialShape{4, -1, 2, 1, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{3}, std::vector{-1, 12, 2}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({-1, 12, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(11, 21, 22)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_2) { + auto data_shape = PartialShape{4, 2, {2, 6}, 1, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{3}, std::vector{-1, 12, 2}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({{2, 6}, 12, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(12, 21, 22)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_3) { + auto data_shape = PartialShape{{2, 4}, 2, {2, 6}, 1, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{3}, std::vector{-1, 12, 2}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({{1, 6}, 12, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(no_label, 21, 22)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_4) { + PartialShape data_shape = PartialShape{2, {2, 4}, 2, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = + make_shared(element::i64, Shape{6}, std::vector{1, 4, 3, 1, 1, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, 24, 25}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 4, 3, 1, 1, {2, 4}})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 21, 22, 23, 24, 11)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_5) { + PartialShape data_shape = PartialShape{2, 4, 2, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = + make_shared(element::i64, Shape{6}, std::vector{1, 4, 3, 1, 1, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, 24, 25}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 4, 3, 1, 1, 4})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 21, 22, 23, 24, no_label)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_6) { + PartialShape data_shape = PartialShape{2, 3, 2, 1, 4}; + DimensionTracker::set_label(data_shape[1], 11); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{5}, std::vector{4, 1, -1, 1, 4}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, 24}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({4, 1, 3, 1, 4})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 21, no_label, 23, 24)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_7) { + PartialShape data_shape = PartialShape{{1, 2}, 4, 2, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{4}, std::vector{4, 2, 3, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23}); - PartialShape initial_shape = PartialShape{marked_0, 4, 3, 1}; + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({4, 2, 3, {1, 2}})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 21, 22, 10)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_8) { + PartialShape data_shape = PartialShape{{1, 2}, 4, 2, 3}; + DimensionTracker::set_label(data_shape[0], 121); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{4}, std::vector{4, 2, 3, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({4, 2, 3, {1, 2}})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 21, 22, 121)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_9) { + PartialShape data_shape = PartialShape{2, 4, 2, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{4}, std::vector{4, 2, 3, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({4, 2, 3, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 21, 22, no_label)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_no_special_zero_case_10) { + PartialShape data_shape = PartialShape{1, {1, -1}, {1, -1}, 512}; + set_shape_labels(data_shape, 10); + constexpr int64_t squeeze_dim = 7 * 7 * 512; + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{2}, std::vector{-1, squeeze_dim}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({{1, -1}, squeeze_dim})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(no_label, 21)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_1) { + auto data_shape = PartialShape{4, -1, 2, 1, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{3}, std::vector{-1, 12, 0}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({-1, 12, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(11, 21, 12)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_2) { + auto data_shape = PartialShape{{2, 4}, 8, {2, 6}, 1, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{4}, std::vector{0, -1, 12, 2}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({{2, 4}, {2, 6}, 12, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(10, 12, 22, 23)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_3) { + auto data_shape = PartialShape{{2, 4}, 8, 6, 1, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{4}, std::vector{0, -1, 12, 2}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({{2, 4}, 6, 12, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(10, no_label, 22, 23)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_4) { + PartialShape data_shape = PartialShape{2, 10, 4, {1, 5}, {1, 2}, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = + make_shared(element::i64, Shape{7}, std::vector{1, 0, 4, 0, 6, 1, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, 24, 25, 26}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 10, 4, {1, 5}, 6, 1, {1, 2}})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 11, 22, 13, 24, 25, 14)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_5) { + PartialShape data_shape = PartialShape{2, 10, 4, {1, 5}, 2, 3}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = + make_shared(element::i64, Shape{7}, std::vector{1, 0, 4, 0, 6, 1, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, 24, 25, 26}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 10, 4, {1, 5}, 6, 1, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, 11, 22, 13, 24, 25, no_label)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_6) { + PartialShape data_shape = PartialShape{2, 3, 2, 1, 4}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{5}, std::vector{0, 0, -1, 0, 0}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, 24}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({2, 3, 2, 1, 4})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(10, 11, 12, 13, 14)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_7) { + auto data_shape = PartialShape{{2, 4}, 12, -1, 1, 2}; + DimensionTracker::set_label(data_shape[2], 121); + DimensionTracker::set_label(data_shape[0], 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{5}, std::vector{0, -1, 3, 4, 3}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, no_label}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({{2, 4}, -1, 3, 4, 3})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(10, no_label, 22, 23, no_label)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_8) { + auto data_shape = PartialShape{{2, 4}, 4, -1, 1, 3, 3}; + DimensionTracker::set_label(data_shape[2], 121); + DimensionTracker::set_label(data_shape[0], 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{5}, std::vector{0, -1, 3, 4, 3}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, no_label}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({{2, 4}, -1, 3, 4, 3})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(10, 121, 22, 23, no_label)); +} + +TEST(type_prop, reshape_label_propagation_minus_one_special_zero_case_9) { + PartialShape data_shape = PartialShape{2, 3, {2, 4}, 1, 4}; + set_shape_labels(data_shape, 10); + + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{5}, std::vector{0, 0, -1, 1, 0}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21, 22, 23, 24}); + + const auto reshape = make_shared(input, output_pattern, true); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({2, 3, {2, 4}, 1, 4})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(10, 11, 12, 23, 14)); +} + +TEST(type_prop, reshape_tricky_label_propagation_for_auto_batch_case_1) { + auto shape = PartialShape({1, 1280, 1, 1}); + DimensionTracker::set_label(shape[0], 1); + auto param = make_shared(element::f32, shape); + auto pattern = op::v0::Constant::create(element::i64, {2}, {-1, 1280}); + auto r = make_shared(param, pattern, false); + + auto output_shape = r->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 1280})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(no_label, no_label)); +} + +TEST(type_prop, reshape_tricky_label_propagation_for_auto_batch_case_2) { + auto shape = ov::PartialShape({1, 1280, 1, 1}); + DimensionTracker::set_label(shape[2], 2); + auto param = make_shared(element::f32, shape); + auto pattern = op::v0::Constant::create(element::i64, {2}, {-1, 1280}); + auto r = make_shared(param, pattern, false); + + auto output_shape = r->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 1280})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(no_label, no_label)); +} + +TEST(type_prop, reshape_tricky_label_propagation_for_auto_batch_case_3) { + auto shape = PartialShape({1, 1280, 1, 1}); + DimensionTracker::set_label(shape[0], 1); + DimensionTracker::set_label(shape[2], 2); + auto param = make_shared(element::f32, shape); + auto pattern = op::v0::Constant::create(element::i64, {2}, {-1, 1280}); + auto r = make_shared(param, pattern, false); + + auto output_shape = r->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 1280})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(no_label, no_label)); +} + +TEST(type_prop, reshape_tricky_label_propagation_for_auto_batch_case_4) { + auto shape = PartialShape({1, 1280}); + DimensionTracker::set_label(shape[0], 1); + auto param = make_shared(element::f32, shape); + auto pattern = op::v0::Constant::create(element::i64, {2}, {-1, 1280}); + auto r = make_shared(param, pattern, false); + + auto output_shape = r->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({1, 1280})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(1, no_label)); +} + +TEST(type_prop, reshape_resolve_minus_one_when_static_product_same_value) { + auto data_shape = PartialShape{2, 3, 4, 5}; + set_shape_labels(data_shape, 10); + auto input = make_shared(element::f32, data_shape); + auto output_pattern = make_shared(element::i64, Shape{2}, std::vector{120, -1}); + output_pattern->get_default_output().get_tensor().set_value_label({20, 21}); + + const auto reshape = make_shared(input, output_pattern, false); + + auto output_shape = reshape->get_output_partial_shape(0); + EXPECT_EQ(output_shape, PartialShape({120, 1})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(20, no_label)); +} - auto input = std::make_shared(element::f32, initial_shape); - auto output_pattern = std::make_shared(element::i64, Shape{2}, std::vector{-1, 12}); +TEST(type_prop, reshape_label_not_propagated_on_minus_one_dim_as_not_same_dynamic_dim) { + auto data_shape = PartialShape{-1, 2}; + auto pattern_shape = PartialShape{-1, -1, 2}; + set_shape_labels(data_shape, {90, no_label}); + set_shape_labels(pattern_shape, {37, 87, 98}); - const auto reshape = std::make_shared(input, output_pattern, false); + auto pattern = make_shared(element::i32, pattern_shape); + auto pattern_shape_of = make_shared(pattern, element::i32); + auto dim_minus_one = ov::op::v0::Constant::create(element::i32, {1}, {-1}); + dim_minus_one->get_default_output().get_tensor().set_value_label({93}); + auto output_pattern = make_shared(OutputVector{dim_minus_one, pattern_shape_of}, 0); + auto input = make_shared(element::f32, data_shape); + const auto reshape = make_shared(input, output_pattern, false); auto output_shape = reshape->get_output_partial_shape(0); - ASSERT_EQ(output_shape, PartialShape({-1, 12})); - ASSERT_EQ(ov::DimensionTracker::get_label(output_shape[0]), 10); - ASSERT_EQ(ov::DimensionTracker::get_label(output_shape[1]), 0); + EXPECT_EQ(output_shape, PartialShape({-1, -1, -1, 2})); + EXPECT_THAT(get_shape_labels(output_shape), ElementsAre(no_label, 37, 87, 98)); } diff --git a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp index cec8be37896516..fb7d9008b551d3 100644 --- a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp @@ -82,6 +82,7 @@ #include "reduce_shape_inference.hpp" #include "region_yolo_shape_inference.hpp" #include "reorg_yolo_shape_inference.hpp" +#include "reshape_shape_inference.hpp" #include "reverse_sequence_shape_inference.hpp" #include "reverse_shape_inference.hpp" #include "rnn_cell_shape_inference.hpp" diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reshape_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reshape_shape_inference_test.cpp new file mode 100644 index 00000000000000..43644979b4cf79 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reshape_shape_inference_test.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "openvino/op/reshape.hpp" +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; +using namespace testing; + +class ReshapeV1StaticShapeInferenceTest : public OpStaticShapeInferenceTest {}; + +TEST_F(ReshapeV1StaticShapeInferenceTest, default_ctor_no_args) { + op = make_op(); + op->set_special_zero(true); + + int64_t shape_pattern[] = {2, 4, 0, 1, -1}; + auto const_data = std::unordered_map{{1, Tensor(element::i64, ov::Shape{5}, shape_pattern)}}; + input_shapes = ShapeVector{{2, 9, 12, 8}, {5}}; + + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({2, 4, 12, 1, 18})); +} + +TEST_F(ReshapeV1StaticShapeInferenceTest, all_inputs_are_dynamic_rank) { + int64_t shape_pattern[] = {2, 4, 0, 1, -1}; + auto const_data = std::unordered_map{{1, Tensor(element::i64, ov::Shape{5}, shape_pattern)}}; + + const auto data = std::make_shared(element::i16, PartialShape::dynamic()); + const auto pattern = std::make_shared(element::i64, PartialShape::dynamic()); + op = make_op(data, pattern, true); + + input_shapes = ShapeVector{{9, 24, 8}, {5}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({2, 4, 8, 1, 27})); +} + +TEST_F(ReshapeV1StaticShapeInferenceTest, all_inputs_are_static_rank) { + int64_t shape_pattern[] = {2, 4, 1, -1}; + auto const_data = std::unordered_map{{1, Tensor(element::i64, ov::Shape{4}, shape_pattern)}}; + + const auto data = std::make_shared(element::i16, PartialShape::dynamic(5)); + const auto pattern = std::make_shared(element::i64, PartialShape::dynamic(1)); + op = make_op(data, pattern, false); + + input_shapes = ShapeVector{{9, 24, 8}, {4}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({2, 4, 1, 216})); +} + +TEST_F(ReshapeV1StaticShapeInferenceTest, pattern_with_special_values) { + const auto data = std::make_shared(element::f32, PartialShape::dynamic(4)); + const auto pattern = op::v0::Constant::create(element::i32, ov::Shape{2}, {0, -1}); + + op = make_op(data, pattern, true); + + input_shapes = ShapeVector{{3, 6, 5, 5}, {2}}; + output_shapes = shape_inference(op.get(), input_shapes); + + EXPECT_EQ(output_shapes.front(), StaticShape({3, 150})); +} + +TEST_F(ReshapeV1StaticShapeInferenceTest, reshape_to_empty_volume) { + const auto data = std::make_shared(element::f32, PartialShape{-1, 2, 2}); + const auto pattern = op::v0::Constant::create(element::i32, ov::Shape{2}, {0, 4}); + + op = make_op(data, pattern, false); + + input_shapes = ShapeVector{{0, 2, 2}, {2}}; + output_shapes = shape_inference(op.get(), input_shapes); + + EXPECT_EQ(output_shapes.front(), StaticShape({0, 4})); +} + +TEST_F(ReshapeV1StaticShapeInferenceTest, reshape_pattern_not_defined) { + const auto data = std::make_shared(element::i16, PartialShape::dynamic()); + const auto pattern = std::make_shared(element::i64, PartialShape::dynamic()); + op = make_op(data, pattern, true); + + input_shapes = ShapeVector{{9, 24, 8}, {5}}; + OV_EXPECT_THROW(std::ignore = shape_inference(op.get(), input_shapes), + NodeValidationFailure, + HasSubstr("Static shape inference lacks constant data on port 1")); +} + +TEST_F(ReshapeV1StaticShapeInferenceTest, shape_pattern_as_constant) { + const auto data = std::make_shared(element::i16, PartialShape::dynamic(5)); + const auto pattern = op::v0::Constant::create(element::i32, ov::Shape{3}, {2, 4, 1}); + op = make_op(data, pattern, false); + + input_shapes = ShapeVector{{9, 24, 8}, {4}}; + OV_EXPECT_THROW(std::ignore = shape_inference(op.get(), input_shapes), + NodeValidationFailure, + HasSubstr("is incompatible with input shape")); +} diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp index 51379da0339e1a..40f08467847a8a 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp @@ -9,30 +9,6 @@ using namespace ov; using namespace ov::intel_cpu; -TEST(StaticShapeInferenceTest, ReshapeTest) { - auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); - auto pattern = std::make_shared(element::i32, Shape{2}, std::vector{0, -1}); - - auto reduce = std::make_shared(data, pattern, true); - - std::vector static_input_shapes = {StaticShape{3, 6, 5, 5}, StaticShape{2}}; - const auto static_output_shapes = shape_inference(reduce.get(), static_input_shapes); - - ASSERT_EQ(static_output_shapes[0], StaticShape({3, 150})); -} - -TEST(StaticShapeInferenceTest, ReshapeEmptyTest) { - auto data = std::make_shared(element::f32, PartialShape{-1, 2, 2}); - auto pattern = std::make_shared(element::i32, Shape{2}, std::vector{0, 4}); - - auto reduce = std::make_shared(data, pattern, false); - - std::vector static_input_shapes = {StaticShape{0, 2, 2}, StaticShape{2}}; - const auto static_output_shapes = shape_inference(reduce.get(), static_input_shapes); - - ASSERT_EQ(static_output_shapes[0], StaticShape({0, 4})); -} - TEST(StaticShapeInferenceTest, ShapeOf5DTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp index 0cbe8685af77b0..6dcb4e658e244d 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp @@ -11,6 +11,7 @@ #include "shape_inference/static_shape.hpp" namespace ov { + namespace intel_cpu { using ShapeVector = std::vector; diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index c377ef45384b44..ecfd4a471194e6 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -8,7 +8,7 @@ #include "json_object.h" #include "primitive_type_base.h" #include "reshape_inst.h" -#include "shape_nodes.hpp" +#include "reshape_shape_inference.hpp" #include "squeeze_shape_inference.hpp" #include "unsqueeze_shape_inference.hpp" @@ -94,7 +94,7 @@ std::vector reshape_inst::calc_output_layouts(reshape_node const& /*node ov::op::v1::Reshape op; op.set_special_zero(prim->special_zero); op.set_friendly_name(prim->id.c_str()); - output_shapes = shape_infer(&op, input_shapes, ta); + output_shapes = ov::op::v1::shape_infer(&op, input_shapes, ta); break; } case reshape::reshape_mode::squeeze: { From 79f90839b99cf8ef1263e36ba78f28bec58ed82f Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 27 Oct 2023 20:14:08 +0400 Subject: [PATCH 106/275] [GPU] Int4 utils fix (#20726) --- .../include/batch_headers/int4_utils.cl | 72 ++++++++++--------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl index db786b30c15f4a..66bb5faa12ac06 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/int4_utils.cl @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "common.cl" + typedef struct __attribute__ ((packed)) int4x2_t { char s0; } int4x2_t; typedef struct __attribute__ ((packed)) int4x4_t { int4x2_t s0; int4x2_t s1; } int4x4_t; typedef struct __attribute__ ((packed)) int4x8_t { int4x2_t s0; int4x2_t s1; int4x2_t s2; int4x2_t s3; } int4x8_t; @@ -26,55 +28,35 @@ inline char2 cvt_int4x2_to_int8x2(int4x2_t v) __attribute__((overloadable)) { return (char2)(v0, v1); } -inline half2 unpack_to_half(uint4x2_t v) __attribute__((overloadable)) { - return convert_half2(cvt_uint4x2_to_uint8x2(v)); -} - inline float2 unpack_to_float(uint4x2_t v) __attribute__((overloadable)) { return convert_float2(cvt_uint4x2_to_uint8x2(v)); } -inline half2 unpack_to_half(int4x2_t v) __attribute__((overloadable)) { - return convert_half2(cvt_int4x2_to_int8x2(v)); -} - inline float2 unpack_to_float(int4x2_t v) __attribute__((overloadable)) { return convert_float2(cvt_int4x2_to_int8x2(v)); } -inline half4 unpack_to_half(uint4x4_t v) __attribute__((overloadable)) { - half2 f0 = unpack_to_half(v.s0); - half2 f1 = unpack_to_half(v.s1); - return (half4)(f0.s0, f0.s1, f1.s0, f1.s1); -} - inline float4 unpack_to_float(uint4x4_t v) __attribute__((overloadable)) { float2 f0 = unpack_to_float(v.s0); float2 f1 = unpack_to_float(v.s1); return (float4)(f0.s0, f0.s1, f1.s0, f1.s1); } -inline half4 unpack_to_half(int4x4_t v) __attribute__((overloadable)) { - half2 f0 = unpack_to_half(v.s0); - half2 f1 = unpack_to_half(v.s1); - return (half4)(f0.s0, f0.s1, f1.s0, f1.s1); -} - inline float4 unpack_to_float(int4x4_t v) __attribute__((overloadable)) { float2 f0 = unpack_to_float(v.s0); float2 f1 = unpack_to_float(v.s1); return (float4)(f0.s0, f0.s1, f1.s0, f1.s1); } -inline half8 unpack_to_half(uint4x8_t v) __attribute__((overloadable)) { - half2 f0 = unpack_to_half(v.s0); - half2 f1 = unpack_to_half(v.s1); - half2 f2 = unpack_to_half(v.s2); - half2 f3 = unpack_to_half(v.s3); - return (half8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); +inline float8 unpack_to_float(uint4x8_t v) __attribute__((overloadable)) { + float2 f0 = unpack_to_float(v.s0); + float2 f1 = unpack_to_float(v.s1); + float2 f2 = unpack_to_float(v.s2); + float2 f3 = unpack_to_float(v.s3); + return (float8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); } -inline float8 unpack_to_float(uint4x8_t v) __attribute__((overloadable)) { +inline float8 unpack_to_float(int4x8_t v) __attribute__((overloadable)) { float2 f0 = unpack_to_float(v.s0); float2 f1 = unpack_to_float(v.s1); float2 f2 = unpack_to_float(v.s2); @@ -82,7 +64,28 @@ inline float8 unpack_to_float(uint4x8_t v) __attribute__((overloadable)) { return (float8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); } -inline half8 unpack_to_half(int4x8_t v) __attribute__((overloadable)) { +#if defined(cl_khr_fp16) +inline half2 unpack_to_half(uint4x2_t v) __attribute__((overloadable)) { + return convert_half2(cvt_uint4x2_to_uint8x2(v)); +} + +inline half2 unpack_to_half(int4x2_t v) __attribute__((overloadable)) { + return convert_half2(cvt_int4x2_to_int8x2(v)); +} + +inline half4 unpack_to_half(uint4x4_t v) __attribute__((overloadable)) { + half2 f0 = unpack_to_half(v.s0); + half2 f1 = unpack_to_half(v.s1); + return (half4)(f0.s0, f0.s1, f1.s0, f1.s1); +} + +inline half4 unpack_to_half(int4x4_t v) __attribute__((overloadable)) { + half2 f0 = unpack_to_half(v.s0); + half2 f1 = unpack_to_half(v.s1); + return (half4)(f0.s0, f0.s1, f1.s0, f1.s1); +} + +inline half8 unpack_to_half(uint4x8_t v) __attribute__((overloadable)) { half2 f0 = unpack_to_half(v.s0); half2 f1 = unpack_to_half(v.s1); half2 f2 = unpack_to_half(v.s2); @@ -90,12 +93,13 @@ inline half8 unpack_to_half(int4x8_t v) __attribute__((overloadable)) { return (half8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); } -inline float8 unpack_to_float(int4x8_t v) __attribute__((overloadable)) { - float2 f0 = unpack_to_float(v.s0); - float2 f1 = unpack_to_float(v.s1); - float2 f2 = unpack_to_float(v.s2); - float2 f3 = unpack_to_float(v.s3); - return (float8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); +inline half8 unpack_to_half(int4x8_t v) __attribute__((overloadable)) { + half2 f0 = unpack_to_half(v.s0); + half2 f1 = unpack_to_half(v.s1); + half2 f2 = unpack_to_half(v.s2); + half2 f3 = unpack_to_half(v.s3); + return (half8)(f0.s0, f0.s1, f1.s0, f1.s1, f2.s0, f2.s1, f3.s0, f3.s1); } +#endif // defined(cl_khr_fp16) #define UNPACK_INT4x2(target_type, value) CAT(unpack_to_, target_type)(value) From cde757d66a26cdd46deca0321e9d92aa4c15b1bd Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 27 Oct 2023 20:54:57 +0400 Subject: [PATCH 107/275] [GPU] Adjust in/out layouts in codition prim to handle prealloc (#20740) --- src/plugins/intel_gpu/src/graph/condition.cpp | 8 ++ .../src/graph/impls/common/condition.cpp | 8 ++ .../unit/test_cases/condition_gpu_test.cpp | 104 +++++++++++++++++- 3 files changed, 118 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/condition.cpp b/src/plugins/intel_gpu/src/graph/condition.cpp index 737725ebcea8f6..c564623fcd0529 100644 --- a/src/plugins/intel_gpu/src/graph/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/condition.cpp @@ -258,6 +258,14 @@ void condition_inst::postprocess_output_memory(network::ptr executed_net, cldnn: auto out_mem_idx = out_mem_map.first; auto inner_out_id = out_mem_map.second; auto mem_ptr = executed_net->get_output(inner_out_id).get_memory(); + if (mem_ptr) { + auto layout = _impl_params->get_output_layout(out_mem_idx); + GPU_DEBUG_LOG << "Reshape output from " << mem_ptr->get_layout().to_short_string() + << " to " << layout.to_short_string() << std::endl; + // Preallocation logic may allocate more memory than actually produced on current iteration, so we need to adjust output buffers layout + mem_ptr = get_network().get_engine().reinterpret_buffer(*mem_ptr, layout); + } + _outputs[out_mem_idx] = mem_ptr; GPU_DEBUG_LOG << "Inner net - Outputs[" << out_mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl; } diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp index 0c9b7d843beffa..02c94ebf31e881 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp @@ -48,6 +48,14 @@ struct condition_impl : typed_primitive_impl { if (iter != branch.input_map.end()) { const primitive_id& input_internal_id = iter->second; auto mem_ptr = instance.input_memory_ptr(mem_idx); + if (mem_ptr) { + auto dep = instance.dependencies()[mem_idx]; + auto layout = dep.first->get_impl_params()->get_output_layout(dep.second); + GPU_DEBUG_LOG << "Reshape input from " << mem_ptr->get_layout().to_short_string() + << " to " << layout.to_short_string() << std::endl; + // Preallocation logic may allocate more memory than actually produced on current iteration, so we need to adjust input buffers layout + mem_ptr = instance.get_network().get_engine().reinterpret_buffer(*mem_ptr, layout); + } executed_net->set_input_data(input_internal_id, mem_ptr); GPU_DEBUG_LOG << "Inner net - Inputs[" << mem_idx << "]" << mem_ptr->get_layout().to_short_string() << std::endl; } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp index fda7c1c41e5c12..35a53c89d953c5 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp @@ -2,6 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/primitives/permute.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "random_generator.hpp" #include "test_utils.h" #include @@ -19,8 +22,9 @@ using namespace ::tests; namespace { template -bool is_output_equal(const cldnn::memory::ptr mem, const std::vector& ref) -{ +bool is_output_equal(const cldnn::memory::ptr mem, const std::vector& ref) { + if (mem->count() != ref.size()) + return false; cldnn::mem_lock ptr(mem, get_test_stream()); for (size_t i = 0; i < mem->get_layout().count(); i++) { if (!are_equal(ptr[i], ref[i])) return false; @@ -239,6 +243,102 @@ TEST(condition_gpu, basic_range_equal_comp) { ASSERT_TRUE(is_output_equal(out_data_false, pooling_when_false_data)); } +TEST(condition_gpu, dynamic_shapes) { + auto& engine = get_test_engine(); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + const int64_t d1 = 2; + const int64_t d2 = 4; + layout input_lay = {{-1, d1, -1, d2}, data_types::f32, format::bfyx}; + + auto predicate = engine.allocate_memory({{ 1 }, data_types::u8, format::bfyx }); + + const primitive_id condition_id = "condition"; + const primitive_id condition_id_true = condition_id + "_when_true"; + const primitive_id condition_id_false = condition_id + "_when_false"; + const primitive_id branch_input_id = "branch_input"; + const primitive_id model_input = "input"; + const primitive_id predicate_input = "predicate"; + const primitive_id tranpose = "transpose"; + + cldnn::topology topology; + topology.add(input_layout(model_input, input_lay)); + topology.add(input_layout(predicate_input, predicate->get_layout())); + topology.add(permute(tranpose, model_input, {1, 0, 2, 3})); + const float shift = 4.f; + + auto generate_simple_branch = [&](bool branch_true_false, const primitive_id& input_id, const data_types dt) { + auto mem = engine.allocate_memory(layout{{d1, 1, 1, d2}, dt, format::bfyx}); + { + cldnn::mem_lock l(mem, get_test_stream()); + for (size_t i = 0; i < mem->count(); i++) { + l.data()[i] = shift; + } + } + + primitive_id const_id = "const_input"; + eltwise_mode mode = branch_true_false ? eltwise_mode::sum : eltwise_mode::sub; + auto id = branch_true_false ? condition_id_true : condition_id_false; + cldnn::topology branch_topology(input_layout(input_id, { {d1, -1, -1, d2}, dt, format::bfyx }), + data(const_id, mem), + eltwise(id, {input_id, const_id}, mode) + ); + condition::branch branch; + branch.inner_program = program::build_program(engine, branch_topology, config, false, false, true); + branch.input_map.insert({tranpose, branch_input_id}); + branch.output_map.insert({0, id}); + + return branch; + }; + + condition::branch branch_true = generate_simple_branch(true, branch_input_id, data_types::f32); + condition::branch branch_false = generate_simple_branch(false, branch_input_id, data_types::f32); + + topology.add(condition(condition_id, { input_info(predicate_input), tranpose }, branch_true, branch_false)); + + tests::random_generator rg(GET_SUITE_NAME); + std::vector predicate_data_true = { 1 }; + std::vector predicate_data_false = { 0 }; + + network net(engine, topology, config); + + auto check_output = [](const cldnn::memory::ptr mem, const std::vector& ref, ov::Shape expected_shape) { + ASSERT_EQ(mem->get_layout().get_shape(), expected_shape); + ASSERT_EQ(mem->count(), ref.size()); + cldnn::mem_lock ptr(mem, get_test_stream()); + for (size_t i = 0; i < mem->get_layout().count(); i++) { + ASSERT_EQ(ptr[i], ref[i]) << "i = " << i; + } + }; + + for (size_t i = 0; i < 10; i++) { + layout l = {{1, d1, 1 + static_cast(i), d2}, data_types::f32, format::bfyx}; + std::vector input_data = rg.generate_random_1d(l.count(), -10, 10); + auto mem = engine.allocate_memory(l); + std::vector expected_result_when_true = input_data; + std::vector expected_result_when_false = input_data; + + for (size_t i = 0; i < input_data.size(); i++) { + expected_result_when_true[i] += shift; + expected_result_when_false[i] -= shift; + } + + set_values(mem, input_data); + set_values(predicate, predicate_data_true); + net.set_input_data(model_input, mem); + net.set_input_data(predicate_input, predicate); + auto outputs = net.execute(); + check_output(outputs.at(condition_id).get_memory(), expected_result_when_true, {d1, 1, 1+i, d2}); + + set_values(predicate, predicate_data_false); + net.set_input_data(model_input, mem); + net.set_input_data(predicate_input, predicate); + outputs = net.execute(); + check_output(outputs.at(condition_id).get_memory(), expected_result_when_false, {d1, 1, 1+i, d2}); + } +} + TEST(condition_gpu, basic_stacked_ifs) { /* From f029ebb8e240690209e4b97360463b30350216e6 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 27 Oct 2023 19:04:30 +0200 Subject: [PATCH 108/275] [PT FE] Fix mask2former model marks in tests (#20717) * [PT FE] Fix mask2former model marks in tests * Use better machine * Add more models * Update .github/workflows/linux.yml --- tests/model_hub_tests/torch_tests/hf_transformers_models | 7 +++++-- tests/model_hub_tests/torch_tests/test_hf_transformers.py | 7 ++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index 31a24b681eb4c5..dd41a18235b688 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -10,6 +10,7 @@ albert-base-v2,albert AlekseyKorshuk/test_reward_model,reward_model,skip,Load problem alibaba-damo/mgp-str-base,mgp-str,xfail,Compile error: unsupported Einsum allenai/hvila-block-layoutlm-finetuned-docbank,hierarchical_model,skip,Load problem +allenai/longformer-base-4096,longformer ameya772/sentence-t5-base-atis-fine-tuned,T5,skip,Load problem andreasmadsen/efficient_mlm_m0.40,roberta-prelayernorm anton-l/emformer-base-librispeech,emformer,skip,Load problem @@ -71,7 +72,7 @@ facebook/esm2_t6_8M_UR50D,esm facebook/flava-full,flava,xfail,Tracing problem facebook/flava-image-codebook,flava_image_codebook,skip,Load problem facebook/m2m100_418M,m2m_100 -facebook/mask2former-swin-base-coco-panoptic,mask2former,xfail,Accuracy validation failed +facebook/mask2former-swin-base-coco-panoptic,mask2former facebook/maskformer-swin-base-coco,maskformer facebook/mms-tts-eng,vits,skip,Load problem facebook/musicgen-small,musicgen,skip,Load problem @@ -92,6 +93,7 @@ Geor111y/flair-ner-addresses-extractor,flair,skip,Load problem gia-project/gia,gia,skip,Load problem gokuls/bert_12_layer_model_v1,hybridbert,skip,Load problem google/bigbird-roberta-base,big_bird +google/bigbird-pegasus-large-arxiv,bigbird-pegasus google/bit-50,bit google/canine-s,canine,xfail,aten::slice: Parameter axis 3 out of the tensor rank range google/efficientnet-b2,efficientnet,xfail,Compile error: AvgPool: Kernel after dilation has size (dim: 1408) larger than the data shape after padding (dim: 9) at axis 0. @@ -105,7 +107,7 @@ google/owlvit-base-patch32,owlvit google/pix2struct-docvqa-base,pix2struct,skip,Load problem google/realm-orqa-nq-openqa,realm,skip,Load problem google/reformer-crime-and-punishment,reformer,xfail,Tracing problem -google/tapas-large-finetuned-wtq,tapas,skip,Load problem +google/tapas-large-finetuned-wtq,tapas google/vit-hybrid-base-bit-384,vit-hybrid,skip,Load problem google/vivit-b-16x2-kinetics400,vivit Goutham-Vignesh/ContributionSentClassification-scibert,scibert,skip,Load problem @@ -300,6 +302,7 @@ pie/example-re-textclf-tacred,TransformerTextClassificationModel,skip,Load probl pleisto/yuren-baichuan-7b,multimodal_llama,skip,Load problem predictia/europe_reanalysis_downscaler_convbaseline,convbilinear,skip,Load problem predictia/europe_reanalysis_downscaler_convswin2sr,conv_swin2sr,skip,Load problem +pszemraj/led-large-book-summary,led qmeeus/whisper-small-ner-combined,whisper_for_slu,skip,Load problem raman-ai/pcqv2-tokengt-lap16,tokengt,skip,Load problem range3/pegasus-gpt2-medium,pegasusgpt2,skip,Load problem diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 8b595e5425668a..8e3ea5ecfdd2c9 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -276,7 +276,9 @@ def prepare_inputs(self, inputs_info): return [i.numpy() for i in self.example] def convert_model(self, model_obj): - ov_model = convert_model(model_obj, example_input=self.example) + ov_model = convert_model(model_obj, + example_input=self.example, + verbose=True) return ov_model def infer_fw_model(self, model_obj, inputs): @@ -297,8 +299,7 @@ def teardown_method(self): ("google/flan-t5-base", "t5"), ("google/tapas-large-finetuned-wtq", "tapas"), ("gpt2", "gpt2"), - ("openai/clip-vit-large-patch14", "clip"), - ("facebook/xmod-base","xmod") + ("openai/clip-vit-large-patch14", "clip") ]) @pytest.mark.precommit def test_convert_model_precommit(self, name, type, ie_device): From 1d4520e60d249f8e95a1dbf864270c1ba57e7417 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 27 Oct 2023 21:05:06 +0400 Subject: [PATCH 109/275] [Snippets][CPU] Disabled SplitDimensionM CPU Func tests (#20741) --- .../functional/shared_tests_instances/skip_tests_config.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 927c06ed13400d..21483175aed169 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -195,6 +195,8 @@ std::vector disabledTestPatterns() { R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=10.*relu.*)", // Issue: 123427 R"(.*RDFTLayerTest.*SignalSize=().*)", + // Issue: 123815 (Tests are sensintive to available thread count on testing machines) + R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)", }; #if defined(OPENVINO_ARCH_X86) From 539b5a83ba7fcbbd348e4dc308e4a0f2dee8343c Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Sat, 28 Oct 2023 00:41:37 +0200 Subject: [PATCH 110/275] keep disable_fp16_compression rt_info (#20625) * keep disable_fp16_compression rt_info * style fix * style fix 2 * cleanup init_node_info.cpp; redefining a class for rt_info in Serialize * move rt_info refreshing inside serialize.cpp * rename rt_info name in IR * add rt_info serialize test * add ticket number * updated comments * code style fix --------- Co-authored-by: Andrei Kochin --- .../rt_info/disable_fp16_compression.hpp | 9 ++++-- src/core/src/pass/serialize.cpp | 8 ++++++ .../serialization/rt_info_serialization.cpp | 28 +++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp b/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp index 0ecbd3641adc78..065e2ad29f7a21 100644 --- a/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp +++ b/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp @@ -25,14 +25,19 @@ TRANSFORMATIONS_API void do_not_postpone_fp16_compression(RTMap& rt_info); /** * @ingroup ie_runtime_attr_api * @brief DisableFP16Compression class represents runtime info attribute that marks operation - * as prohibitted to convert to FP16 as part of Compressed Only format. + * as prohibited to convert to lower precision (e.g. to FP16) and they should be inferred precisely in the original + * precision. */ class TRANSFORMATIONS_API DisableFP16Compression : public RuntimeAttribute { public: - OPENVINO_RTTI("disable_fp16_compression", "0"); + OPENVINO_RTTI("precise", "0"); DisableFP16Compression() = default; + bool visit_attributes(AttributeVisitor& visitor) override { + return true; + } + bool is_copyable() const override { return false; } diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index c879e8780f1370..beb3910291ae16 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -1208,6 +1208,14 @@ void serializeFunc(std::ostream& xml_file, namespace ov { bool pass::Serialize::run_on_model(const std::shared_ptr& model) { RUN_ON_FUNCTION_SCOPE(Serialize); + + // TODO xxx-105807: if rt_info is set in python api as a string ['precise_0'] = '', + // we need to convert value to a class in order to have rt_info in the IR. The code below will convert + // ['precise_0'] = '' into => rt_info['precise_0'] = DisableFP16Compression{} + for (auto& node : model->get_ops()) + if (fp16_compression_is_disabled(node)) + disable_fp16_compression(node); + if (m_xmlFile && m_binFile) { serializeFunc(*m_xmlFile, *m_binFile, model, m_version, m_custom_opsets); } else { diff --git a/src/core/tests/pass/serialization/rt_info_serialization.cpp b/src/core/tests/pass/serialization/rt_info_serialization.cpp index 78c25323a8c110..581b9a84457df4 100644 --- a/src/core/tests/pass/serialization/rt_info_serialization.cpp +++ b/src/core/tests/pass/serialization/rt_info_serialization.cpp @@ -112,6 +112,34 @@ TEST_F(RTInfoSerializationTest, all_attributes_latest) { check_info(add->output(0).get_rt_info()); } +TEST_F(RTInfoSerializationTest, rt_info_precise_test) { + auto init_info = [](ov::RTMap& info) { + info[ov::DisableFP16Compression::get_type_info_static()] = ov::DisableFP16Compression{}; + }; + auto check_info = [](const ov::RTMap& info) { + const std::string& key = ov::DisableFP16Compression::get_type_info_static(); + ASSERT_TRUE(info.count(key)); + }; + + std::shared_ptr function; + { + auto data_1 = std::make_shared(ov::element::Type_t::f32, ov::Shape{1, 10}); + auto data_2 = std::make_shared(ov::element::Type_t::f32, ov::Shape{10, 1}); + auto matmul_1 = std::make_shared(data_1, data_2); + init_info(matmul_1->get_rt_info()); + auto result = std::make_shared(matmul_1); + function = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{data_1, data_2}); + } + ov::pass::Manager m; + m.register_pass(m_out_xml_path, m_out_bin_path); + m.run_passes(function); + auto f = getWithIRFrontend(m_out_xml_path, m_out_bin_path); + ASSERT_NE(nullptr, f); + + auto matmul = f->get_results()[0]->get_input_node_ptr(0); + check_info(matmul->get_rt_info()); +} + TEST_F(RTInfoSerializationTest, all_attributes_v10) { auto init_info = [](ov::RTMap& info) { info[ov::FusedNames::get_type_info_static()] = ov::FusedNames("add"); From 53c9a0f3d4d7c62cc3afd010fd045ef2143e90c2 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Mon, 30 Oct 2023 08:30:01 +0400 Subject: [PATCH 111/275] update pytorch layer tests for torch 2.1 compatibility (#20264) * update pytorch layer tests for torch 2.1 compatibility --- tests/constraints.txt | 2 +- .../pytorch_tests/test_masked_fill.py | 18 ++++++++++++- .../pytorch_tests/test_masked_scatter.py | 27 +++++++++++++++++-- .../torch_tests/requirements.txt | 2 +- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/tests/constraints.txt b/tests/constraints.txt index fca15ab7cbe228..b877cd951f5a27 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -23,4 +23,4 @@ pytest-html==3.2.0 pytest-timeout==2.1.0 jax<=0.4.14 jaxlib<=0.4.14 -torch<2.1.0,>=1.13 \ No newline at end of file +torch>=1.13 diff --git a/tests/layer_tests/pytorch_tests/test_masked_fill.py b/tests/layer_tests/pytorch_tests/test_masked_fill.py index f86b6c4bbda16e..0f934843b077e8 100644 --- a/tests/layer_tests/pytorch_tests/test_masked_fill.py +++ b/tests/layer_tests/pytorch_tests/test_masked_fill.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np +import torch +from packaging.version import parse as parse_version import pytest from pytorch_layer_test_class import PytorchLayerTest @@ -48,7 +50,7 @@ def forward(self, x, mask): @pytest.mark.parametrize( "mask_fill", ['zeros', 'ones', 'random']) @pytest.mark.parametrize("input_dtype", [np.float32, np.float64, int, np.int32]) - @pytest.mark.parametrize("mask_dtype", [np.uint8, np.int32, bool]) # np.float32 incorrectly casted to bool + @pytest.mark.parametrize("mask_dtype", [bool]) # np.float32 incorrectly casted to bool @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @@ -56,3 +58,17 @@ def test_masked_fill(self, value, mask_fill, mask_dtype, input_dtype, inplace, i self._test(*self.create_model(value, inplace), ie_device, precision, ir_version, kwargs_to_prepare_input={'mask_fill': mask_fill, 'mask_dtype': mask_dtype, "input_dtype": input_dtype}) + + @pytest.mark.skipif(parse_version(torch.__version__) >= parse_version("2.1.0"), reason="pytorch 2.1 and above does not support nonboolean mask") + @pytest.mark.parametrize("value", [0.0, 1.0, -1.0, 2]) + @pytest.mark.parametrize( + "mask_fill", ['zeros', 'ones', 'random']) + @pytest.mark.parametrize("input_dtype", [np.float32, np.float64, int, np.int32]) + @pytest.mark.parametrize("mask_dtype", [np.uint8, np.int32]) # np.float32 incorrectly casted to bool + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_masked_fill_non_bool_mask(self, value, mask_fill, mask_dtype, input_dtype, inplace, ie_device, precision, ir_version): + self._test(*self.create_model(value, inplace), + ie_device, precision, ir_version, + kwargs_to_prepare_input={'mask_fill': mask_fill, 'mask_dtype': mask_dtype, "input_dtype": input_dtype}) diff --git a/tests/layer_tests/pytorch_tests/test_masked_scatter.py b/tests/layer_tests/pytorch_tests/test_masked_scatter.py index 81aab9774d7b58..30c41b5e7f942a 100644 --- a/tests/layer_tests/pytorch_tests/test_masked_scatter.py +++ b/tests/layer_tests/pytorch_tests/test_masked_scatter.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import pytest +import torch +from packaging.version import parse as parse_version from pytorch_layer_test_class import PytorchLayerTest @@ -45,7 +47,7 @@ def forward_inplace(self, x, mask, source): @pytest.mark.precommit @pytest.mark.parametrize("shape", [[2, 5], [10, 10], [2, 3, 4], [10, 5, 10, 3], [2, 6, 4, 1]]) @pytest.mark.parametrize("input_dtype", ["float32", "int32", "float", "int", "uint8"]) - @pytest.mark.parametrize("mask_dtype", ["bool", "uint8"]) + @pytest.mark.parametrize("mask_dtype", ["bool"]) @pytest.mark.parametrize("out", [True, False]) def test_masked_scatter(self, shape, input_dtype, mask_dtype, out, ie_device, precision, ir_version): self._test(*self.create_model(out), ie_device, precision, ir_version, @@ -55,7 +57,28 @@ def test_masked_scatter(self, shape, input_dtype, mask_dtype, out, ie_device, pr @pytest.mark.precommit @pytest.mark.parametrize("shape", [[2, 5], [10, 10], [2, 3, 4], [10, 5, 10, 3], [2, 6, 4, 1]]) @pytest.mark.parametrize("input_dtype", ["float32", "int32", "float", "int", "uint8"]) - @pytest.mark.parametrize("mask_dtype", ["bool", "uint8"]) + @pytest.mark.parametrize("mask_dtype", ["bool"]) def test_masked_scatter_inplace(self, shape, input_dtype, mask_dtype, ie_device, precision, ir_version): + self._test(*self.create_model(inplace=True), ie_device, precision, ir_version, + kwargs_to_prepare_input={"shape": shape, "x_dtype": input_dtype, "mask_dtype": mask_dtype}) + + @pytest.mark.skipif(parse_version(torch.__version__) >= parse_version("2.1.0"), reason="pytorch 2.1 and above does not support nonboolean mask") + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("shape", [[2, 5], [10, 10], [2, 3, 4], [10, 5, 10, 3], [2, 6, 4, 1]]) + @pytest.mark.parametrize("input_dtype", ["float32", "int32", "float", "int", "uint8"]) + @pytest.mark.parametrize("mask_dtype", ["uint8"]) + @pytest.mark.parametrize("out", [True, False]) + def test_masked_scatter_u8(self, shape, input_dtype, mask_dtype, out, ie_device, precision, ir_version): + self._test(*self.create_model(out), ie_device, precision, ir_version, + kwargs_to_prepare_input={"shape": shape, "x_dtype": input_dtype, "mask_dtype": mask_dtype, "out": out}) + + @pytest.mark.skipif(parse_version(torch.__version__) >= parse_version("2.1.0"), reason="pytorch 2.1 and above does not support nonboolean mask") + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("shape", [[2, 5], [10, 10], [2, 3, 4], [10, 5, 10, 3], [2, 6, 4, 1]]) + @pytest.mark.parametrize("input_dtype", ["float32", "int32", "float", "int", "uint8"]) + @pytest.mark.parametrize("mask_dtype", ["uint8"]) + def test_masked_scatter_inplace_u8(self, shape, input_dtype, mask_dtype, ie_device, precision, ir_version): self._test(*self.create_model(inplace=True), ie_device, precision, ir_version, kwargs_to_prepare_input={"shape": shape, "x_dtype": input_dtype, "mask_dtype": mask_dtype}) \ No newline at end of file diff --git a/tests/model_hub_tests/torch_tests/requirements.txt b/tests/model_hub_tests/torch_tests/requirements.txt index 406607b69d2d89..707fa63716ade2 100644 --- a/tests/model_hub_tests/torch_tests/requirements.txt +++ b/tests/model_hub_tests/torch_tests/requirements.txt @@ -2,7 +2,7 @@ numpy pytest pytest-html -torch +torch<2.1 torchvision av transformers From 5c6b7a5ed45bafd9a7409e0aed4361394593b621 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Mon, 30 Oct 2023 09:11:32 +0400 Subject: [PATCH 112/275] [GPU] Allow softmax_bf kernel for axis=X 4d case (#20699) --- .../kernels/softmax/softmax_kernel_base.cpp | 8 ++++++-- .../shared_tests_instances/single_layer_tests/softmax.cpp | 7 ++++++- .../tests/unit/dynamic_execution/memory_realloc_test.cpp | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp index 2287562a11c7f5..87361fab0052c4 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_base.cpp @@ -92,9 +92,9 @@ bool SoftmaxKernelBaseBF::Validate(const Params& p, const optional_params& o) co switch (params.dim) { case SoftmaxDim::X: - return !input.Y().is_dynamic && input.Y().v == 1 && + return ((!input.Y().is_dynamic && input.Y().v == 1) || input.GetLayout() == DataLayout::bfyx) && !input.Z().is_dynamic && input.Z().v == 1 && - !input.Feature().is_dynamic && input.Feature().v == 1; + ((!input.Feature().is_dynamic && input.Feature().v == 1) || input.GetLayout() == DataLayout::bfyx); case SoftmaxDim::Y: return !input.X().is_dynamic && input.X().v == 1 && !input.Z().is_dynamic && input.Z().v == 1 && @@ -122,6 +122,10 @@ SoftmaxKernelBase::DispatchData SoftmaxKernelBaseBF::SetDefault(const softmax_pa OPENVINO_ASSERT(input.X().v == 1, "[GPU] SoftmaxKernelBaseBF: input.X() is expected to be 1 while actual value is ", input.X().v); dispatchData.dataSetSize = input.Y().v; dispatchData.dataSetsCount = input.Batch().v * input.Feature().v; + } else if (params.dim == SoftmaxDim::X && (input.Feature().v > 1 || input.Y().v > 1) && input.GetLayout() == DataLayout::bfyx) { + // Flatten BFY for such case + dispatchData.dataSetSize = input.X().v; + dispatchData.dataSetsCount = input.Batch().v * input.Feature().v * input.Y().v; } else { auto flatten_input = input.FlattenFeatureAndSpatials(); dispatchData.dataSetSize = flatten_input.Feature().v; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp index e0757e28927316..92da7f1b0b44c8 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp @@ -88,13 +88,18 @@ INSTANTIATE_TEST_SUITE_P( testing::Values(ov::AnyMap())), SoftMax8LayerTest::getTestCaseName); +const std::vector stableDiffusionShapes = { + {16, 4096, 4096}, + {2, 8, 4096, 4096} +}; + INSTANTIATE_TEST_SUITE_P( smoke_SoftMaxStableDiffusion, SoftMax8LayerTest, testing::Combine(testing::ValuesIn(netPrecisions), ::testing::Values(ov::element::undefined), ::testing::Values(ov::element::undefined), - ::testing::ValuesIn(ov::test::static_shapes_to_test_representation({{16, 4096, 4096}})), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(stableDiffusionShapes)), testing::Values(-1), testing::Values(ov::test::utils::DEVICE_GPU), testing::Values(ov::AnyMap())), diff --git a/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp b/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp index b81a87650a436c..1febfc4cd135ab 100644 --- a/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/dynamic_execution/memory_realloc_test.cpp @@ -151,6 +151,8 @@ TEST(softmax_gpu_dynamic_f32_test_upper_bound, input_same_values) { format::bfyx); auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + ov::intel_gpu::ImplementationDesc softmax_impl = { format::bfyx, "softmax_gpu_ref" }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "softmax", softmax_impl } })); network network(engine, topology(input_layout("input", in_layout), reorder("reorder", input_info("input"), format::bfyx, data_types::f16), softmax("softmax", input_info("reorder"), 3), From cec6535eaa4b90218f08ae24e14841bb69791684 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 30 Oct 2023 07:05:17 +0100 Subject: [PATCH 113/275] [core]Migrate Transpose operator to new API (#20647) * Migrate Transpose to new API * Move shape validation to shape_infer * Remove visit_attributes is same as base * Correct transpose order shape check for static shapes - correct creation of order shape for static shape tests --------- Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/transpose.hpp | 6 +- .../openvino/reference/interpolate.hpp | 4 +- .../include/openvino/reference/transpose.hpp | 15 ++- src/core/reference/src/op/einsum.cpp | 2 +- src/core/reference/src/op/transpose.cpp | 8 +- .../include/transpose_shape_inference.hpp | 15 ++- src/core/src/op/transpose.cpp | 95 ++++++++----------- .../transpose_shape_infernece_test.cpp | 6 +- 8 files changed, 80 insertions(+), 71 deletions(-) diff --git a/src/core/include/openvino/op/transpose.hpp b/src/core/include/openvino/op/transpose.hpp index 2b4af853893270..133b19074ae013 100644 --- a/src/core/include/openvino/op/transpose.hpp +++ b/src/core/include/openvino/op/transpose.hpp @@ -27,15 +27,11 @@ class OPENVINO_API Transpose : public Op { /// Transpose(const Output& arg, const Output& input_order); - bool visit_attributes(AttributeVisitor& visitor) override; void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END - + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool evaluate_upper(TensorVector& output_values) const override; bool evaluate_lower(TensorVector& output_values) const override; bool has_evaluate() const override; diff --git a/src/core/reference/include/openvino/reference/interpolate.hpp b/src/core/reference/include/openvino/reference/interpolate.hpp index 13fb11c16206fc..17b42e0af7ecb9 100644 --- a/src/core/reference/include/openvino/reference/interpolate.hpp +++ b/src/core/reference/include/openvino/reference/interpolate.hpp @@ -636,7 +636,7 @@ void InterpolateEval::multidim_pil_func(const T* input_data, T* out, const in reinterpret_cast(transposed_in.data()), m_input_data_shape, sizeof(T), - in_transp_axes_order.data(), + in_transp_axes_order, transp_input_shape); std::vector transposed_out(shape_size(m_out_shape)); @@ -667,7 +667,7 @@ void InterpolateEval::multidim_pil_func(const T* input_data, T* out, const in reinterpret_cast(out), transp_output_shape, sizeof(T), - out_transp_axes_order.data(), + out_transp_axes_order, m_out_shape); } } diff --git a/src/core/reference/include/openvino/reference/transpose.hpp b/src/core/reference/include/openvino/reference/transpose.hpp index 6d91676dab9aa6..a1a160307f9d17 100644 --- a/src/core/reference/include/openvino/reference/transpose.hpp +++ b/src/core/reference/include/openvino/reference/transpose.hpp @@ -13,11 +13,22 @@ namespace ov { namespace reference { + +/** + * @brief Reference implementation of Transpose operator. + * + * @param data Pointer to input data. + * @param out Pointer to output data. + * @param data_shape Input data shape. + * @param element_size Element size in bytes for input and output. + * @param axes_order Transpose order. + * @param out_shape Output data shape. + */ void transpose(const char* data, char* out, const Shape& data_shape, size_t element_size, - const int64_t* axes_order, - Shape out_shape); + const std::vector& axes_order, + const Shape& out_shape); } // namespace reference } // namespace ov diff --git a/src/core/reference/src/op/einsum.cpp b/src/core/reference/src/op/einsum.cpp index 271982c3986e7f..25bebdf1dd81bc 100644 --- a/src/core/reference/src/op/einsum.cpp +++ b/src/core/reference/src/op/einsum.cpp @@ -404,7 +404,7 @@ void transpose_input(ov::TensorVector& inputs, reinterpret_cast(output_ptr.data()), input_shape, element_type.size(), - permutation.data(), + permutation, output_shape); // update a vector of inputs and input subscripts diff --git a/src/core/reference/src/op/transpose.cpp b/src/core/reference/src/op/transpose.cpp index fbc38ebde38012..c20ff0e0f8dcab 100644 --- a/src/core/reference/src/op/transpose.cpp +++ b/src/core/reference/src/op/transpose.cpp @@ -18,12 +18,12 @@ void transpose(const char* data, char* out, const Shape& data_shape, size_t element_size, - const int64_t* axes_order, - Shape out_shape) { + const std::vector& axes_order, + const Shape& out_shape) { // To reuse reference::reshape axes order vector has to be converted to AxisVector // Negative axes are not supported, it is validated by transpose evaluate method - std::vector axis_vector(axes_order, axes_order + data_shape.size()); - reshape(data, out, data_shape, axis_vector, out_shape, element_size); + const AxisVector axes_vector(axes_order.begin(), axes_order.end()); + reshape(data, out, data_shape, axes_vector, out_shape, element_size); } } // namespace reference } // namespace ov diff --git a/src/core/shape_inference/include/transpose_shape_inference.hpp b/src/core/shape_inference/include/transpose_shape_inference.hpp index 46e41896023c94..05b6ab2b402d61 100644 --- a/src/core/shape_inference/include/transpose_shape_inference.hpp +++ b/src/core/shape_inference/include/transpose_shape_inference.hpp @@ -62,12 +62,25 @@ template > std::vector shape_infer(const Transpose* op, const std::vector& input_shapes, const ITensorAccessor& tensor_accessor = make_tensor_accessor()) { + OPENVINO_ASSERT(input_shapes.size() == 2); + const auto& input_shape = input_shapes[Transpose::ARG]; + const auto& input_order_shape = input_shapes[Transpose::ORDER]; + const auto input_rank = input_shape.rank(); + + if (input_order_shape.rank().is_static()) { + NODE_SHAPE_INFER_CHECK(op, input_shapes, input_order_shape.size() == 1, "Input order must be a vector."); + NODE_SHAPE_INFER_CHECK( + op, + input_shapes, + input_order_shape[0].compatible(input_rank.get_max_length()) || input_order_shape[0] == 0, + "Input order must have shape [n], where n is the rank of arg."); + } const auto axes = get_input_const_data_as(op, Transpose::ORDER, tensor_accessor); auto output_shapes = std::vector(); - if (axes && input_shape.rank().is_static()) { + if (axes && input_rank.is_static()) { output_shapes.push_back(calc_output_shape(op, input_shape, *axes)); } else if (axes) { output_shapes.push_back(ov::PartialShape::dynamic(axes->size())); diff --git a/src/core/src/op/transpose.cpp b/src/core/src/op/transpose.cpp index c0a5a4dd141097..c91c1a6cadc6f7 100644 --- a/src/core/src/op/transpose.cpp +++ b/src/core/src/op/transpose.cpp @@ -2,99 +2,88 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/transpose.hpp" +#include "openvino/op/transpose.hpp" #include "bound_evaluate.hpp" #include "itt.hpp" -#include "ngraph/validation_util.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/reference/transpose.hpp" #include "transpose_shape_inference.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace v1 { -op::v1::Transpose::Transpose(const Output& arg, const Output& input_order) : Op({arg, input_order}) { +Transpose::Transpose(const Output& arg, const Output& input_order) : Op({arg, input_order}) { constructor_validate_and_infer_types(); } -bool op::v1::Transpose::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v1_Transpose_visit_attributes); - return true; -} - -void op::v1::Transpose::validate_and_infer_types() { +void Transpose::validate_and_infer_types() { OV_OP_SCOPE(v1_Transpose_validate_and_infer_types); const auto& input_order_et = get_input_element_type(ORDER); NODE_VALIDATION_CHECK(this, input_order_et.is_dynamic() || input_order_et.is_integral_number(), "Input order must have an integral number element type."); - const auto& input_order_shape = get_input_partial_shape(ORDER); - NODE_VALIDATION_CHECK(this, input_order_shape.rank().compatible(1), "Input order must be a vector."); - - const auto& arg_shape = get_input_partial_shape(ARG); - NODE_VALIDATION_CHECK( - this, - input_order_shape.compatible(ov::PartialShape{arg_shape.rank()}) || - (input_order_shape.is_static() && input_order_shape.rank() == 1 && input_order_shape[0] == 0), - "Input order must have shape [n], where n is the rank of arg."); - set_input_is_relevant_to_shape(ORDER); - std::vector input_shapes{arg_shape, input_order_shape}; - std::vector output_shapes = shape_infer(this, input_shapes); + OPENVINO_SUPPRESS_DEPRECATED_START + const auto input_shapes = get_node_input_partial_shapes(*this); + OPENVINO_SUPPRESS_DEPRECATED_END + const auto output_shapes = shape_infer(this, input_shapes); set_output_type(ARG, get_input_element_type(ARG), output_shapes[ARG_T]); } -shared_ptr op::v1::Transpose::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Transpose::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Transpose_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args[ARG], new_args[ORDER]); + return std::make_shared(new_args[ARG], new_args[ORDER]); } -OPENVINO_SUPPRESS_DEPRECATED_START -bool op::v1::Transpose::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const { +bool Transpose::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Transpose_evaluate); - - const auto& order = input_values[ORDER]; - OPENVINO_ASSERT(order->get_element_type().is_integral_number(), - "Transpose axis element type has to be integral data type."); - - const auto& arg = input_values[ARG]; - OPENVINO_SUPPRESS_DEPRECATED_START - std::vector axes_order = host_tensor_2_vector(order); - OPENVINO_SUPPRESS_DEPRECATED_END - auto out_shape = calc_output_shape(this, arg->get_shape(), axes_order); - - auto& out = output_values[ARG_T]; - out->set_shape(out_shape); - out->set_element_type(arg->get_element_type()); - ov::reference::transpose(arg->get_data_ptr(), - out->get_data_ptr(), - arg->get_shape(), - arg->get_element_type().size(), - axes_order.data(), + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 2); + + const auto& order = inputs[ORDER]; + if (order.get_element_type().is_integral()) { + const auto& arg = inputs[ARG]; + auto axes_order = ov::get_tensor_data_as(order); + const auto out_shape = calc_output_shape(this, arg.get_shape(), axes_order); + + auto& out = outputs[ARG_T]; + out.set_shape(out_shape); + reference::transpose(static_cast(arg.data()), + static_cast(out.data()), + arg.get_shape(), + arg.get_element_type().size(), + axes_order, out_shape); - return true; + return true; + } else { + return false; + } } -OPENVINO_SUPPRESS_DEPRECATED_END -bool op::v1::Transpose::has_evaluate() const { +bool Transpose::has_evaluate() const { OV_OP_SCOPE(v1_Transpose_has_evaluate); - return get_input_element_type(1).is_integral_number(); + return get_input_element_type(ORDER).is_integral_number(); } -bool op::v1::Transpose::evaluate_lower(ov::TensorVector& output_values) const { +bool Transpose::evaluate_lower(ov::TensorVector& output_values) const { return get_input_tensor(ORDER).has_and_set_bound() && default_lower_bound_evaluator(this, output_values); } -bool op::v1::Transpose::evaluate_upper(ov::TensorVector& output_values) const { +bool Transpose::evaluate_upper(ov::TensorVector& output_values) const { return get_input_tensor(ORDER).has_and_set_bound() && default_upper_bound_evaluator(this, output_values); } -bool op::v1::Transpose::evaluate_label(TensorLabelVector& output_labels) const { +bool Transpose::evaluate_label(TensorLabelVector& output_labels) const { OPENVINO_SUPPRESS_DEPRECATED_START return get_input_tensor(ORDER).has_and_set_bound() && default_label_evaluator(this, output_labels); OPENVINO_SUPPRESS_DEPRECATED_END } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp index 4461ab3e7b80ec..f01a9b45075fc5 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp @@ -68,7 +68,7 @@ INSTANTIATE_TEST_SUITE_P( /** \brief Check shape_infer for transpose on static shapes. */ TEST_P(StaticShapeInferenceTest, transpose_static) { - auto output_shapes = shape_inference(transpose.get(), {input_shape, transpose_order}); + auto output_shapes = shape_inference(transpose.get(), {input_shape, StaticShape{transpose_order.size()}}); ASSERT_EQ(output_shapes[op::v1::Transpose::ARG_T], exp_shape); } @@ -79,7 +79,7 @@ TEST(StaticShapeInferenceTest, transpose_input_shape_dim_dynamic) { const auto order = std::vector{1, 2, 0}; const auto transpose = make_transpose(input_shape, order); - auto output_shapes = shape_inference(transpose.get(), {StaticShape{2, 6, 3}, order}); + auto output_shapes = shape_inference(transpose.get(), {StaticShape{2, 6, 3}, StaticShape{order.size()}}); ASSERT_EQ(output_shapes[op::v1::Transpose::ARG_T], StaticShape({6, 3, 2})); } @@ -96,7 +96,7 @@ TEST(StaticShapeInferenceTest, transpose_order_in_constant_map) { const std::unordered_map const_map = {{1, const_tensor}}; auto output_shapes = std::vector{StaticShape{}}; - output_shapes = shape_inference(transpose.get(), {StaticShape({2, 4, 6, 8}), StaticShape()}, const_map); + output_shapes = shape_inference(transpose.get(), {StaticShape({2, 4, 6, 8}), StaticShape({0})}, const_map); ASSERT_EQ(output_shapes[op::v1::Transpose::ARG_T], StaticShape({4, 6, 2, 8})); } From 82f191b0e779e5468ac4594921027d30068335a3 Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Mon, 30 Oct 2023 16:24:36 +0800 Subject: [PATCH 114/275] choose Pcore to compile model for GPU plugin (#20472) * choose Pcore to compile model for GPU plugin * provide function to update executor config * set callback executor to nullptr for GPU plugin * fix code style * fix warning * optimize duplicate code * set callback executor to nullptr for another gpu compile_model * add description for new function * add smoke test * fix code style * modify function definition --------- Co-authored-by: Wanglei Shen --- .../runtime/threading/istreams_executor.hpp | 13 ++ .../src/dev/threading/istreams_executor.cpp | 102 +++++++++++ .../unit/update_executor_config_test.cpp | 165 ++++++++++++++++++ src/plugins/intel_gpu/src/graph/program.cpp | 26 +-- .../intel_gpu/src/plugin/compiled_model.cpp | 6 +- 5 files changed, 289 insertions(+), 23 deletions(-) create mode 100644 src/inference/tests/unit/update_executor_config_test.cpp diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp index 4167da60da00de..738377ddce4d1e 100644 --- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp +++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp @@ -150,6 +150,19 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor { _threadPreferredCoreType(threadPreferredCoreType), _streams_info_table{streamsInfoTable}, _cpu_reservation{cpuReservation} {} + + /** + * @brief Modify _streams_info_table and related configuration according to user-specified parameters, bind + * threads to cpu cores if cpu_pinning is true. + * @param stream_nums Number of streams specified by user + * @param threads_per_stream Number of threads per stream specified by user + * @param core_type Cpu type (Big/Little/Any) specified by user + * @param cpu_pinning Whether to bind the threads to cpu cores + */ + void update_executor_config(int stream_nums, + int threads_per_stream, + PreferredCoreType core_type, + bool cpu_pinning); }; /** diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp index 92d297a62ecb30..518cdd08c69c7f 100644 --- a/src/inference/src/dev/threading/istreams_executor.cpp +++ b/src/inference/src/dev/threading/istreams_executor.cpp @@ -553,5 +553,107 @@ IStreamsExecutor::Config IStreamsExecutor::Config::reserve_cpu_threads(const ISt return config; } +void IStreamsExecutor::Config::update_executor_config(int stream_nums, + int threads_per_stream, + IStreamsExecutor::Config::PreferredCoreType core_type, + bool cpu_pinning) { + const auto proc_type_table = ov::get_proc_type_table(); + + if (proc_type_table.empty()) { + return; + } + + // IStreamsExecutor::Config config = initial; + const auto total_num_cores = proc_type_table[0][ALL_PROC]; + const auto total_num_big_cores = proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC]; + const auto total_num_little_cores = proc_type_table[0][EFFICIENT_CORE_PROC]; + + int num_cores = total_num_cores; + if (core_type == ov::threading::IStreamsExecutor::Config::BIG) { + num_cores = total_num_big_cores; + } else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) { + num_cores = total_num_little_cores; + } + + int streams = std::min(stream_nums, num_cores); + + if (streams == 0) { + return; + } + + _streams = streams; + _threadPreferredCoreType = core_type; + _threadsPerStream = threads_per_stream; + + // create stream_info_table based on core type + std::vector stream_info(ov::CPU_STREAMS_TABLE_SIZE, 0); + stream_info[ov::THREADS_PER_STREAM] = _threadsPerStream; + stream_info[ov::STREAM_NUMA_NODE_ID] = 0; + stream_info[ov::STREAM_SOCKET_ID] = 0; + if (core_type == ov::threading::IStreamsExecutor::Config::BIG) { + if (proc_type_table[0][ov::MAIN_CORE_PROC] < _streams) { + stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::MAIN_CORE_PROC]; + stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC; + _streams_info_table.push_back(stream_info); + stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::HYPER_THREADING_PROC]; + stream_info[ov::PROC_TYPE] = ov::HYPER_THREADING_PROC; + _streams_info_table.push_back(stream_info); + } else { + stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC; + stream_info[ov::NUMBER_OF_STREAMS] = _streams; + _streams_info_table.push_back(stream_info); + } + } else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) { + stream_info[ov::PROC_TYPE] = ov::EFFICIENT_CORE_PROC; + stream_info[ov::NUMBER_OF_STREAMS] = _streams; + _streams_info_table.push_back(stream_info); + } else { + int total_streams = 0; + if (proc_type_table.size() == 1) { + for (int i = ov::MAIN_CORE_PROC; i <= ov::HYPER_THREADING_PROC; i++) { + if (proc_type_table[0][i] > 0) { + stream_info[ov::NUMBER_OF_STREAMS] = + (total_streams + proc_type_table[0][i] > _streams ? _streams - total_streams + : proc_type_table[0][i]); + stream_info[ov::PROC_TYPE] = i; + stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[0][PROC_NUMA_NODE_ID]; + stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[0][PROC_SOCKET_ID]; + _streams_info_table.push_back(stream_info); + total_streams += stream_info[ov::NUMBER_OF_STREAMS]; + } + if (total_streams >= _streams) + break; + } + } else { + for (size_t i = 1; i < proc_type_table.size(); i++) { + for (int j = ov::MAIN_CORE_PROC; j < ov::HYPER_THREADING_PROC; j++) { + if (proc_type_table[i][j] > 0) { + stream_info[ov::NUMBER_OF_STREAMS] = + (total_streams + proc_type_table[i][j] > _streams ? _streams - total_streams + : proc_type_table[i][j]); + stream_info[ov::PROC_TYPE] = j; + stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[i][PROC_NUMA_NODE_ID]; + stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[i][PROC_SOCKET_ID]; + _streams_info_table.push_back(stream_info); + total_streams += stream_info[ov::NUMBER_OF_STREAMS]; + } + if (total_streams >= _streams) + break; + } + if (total_streams >= _streams) + break; + } + } + } + + if (cpu_pinning) { + _cpu_reservation = cpu_pinning; + auto new_config = reserve_cpu_threads(*this); + _stream_processor_ids = new_config._stream_processor_ids; + _streams = new_config._streams; + _threads = new_config._threads; + } +} + } // namespace threading } // namespace ov diff --git a/src/inference/tests/unit/update_executor_config_test.cpp b/src/inference/tests/unit/update_executor_config_test.cpp new file mode 100644 index 00000000000000..a660dfff0597ae --- /dev/null +++ b/src/inference/tests/unit/update_executor_config_test.cpp @@ -0,0 +1,165 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include + +// #include "ie_system_conf.h" +#include "openvino/runtime/threading/istreams_executor.hpp" +#include "os/cpu_map_info.hpp" + +using namespace testing; +using namespace ov; +using namespace threading; + +namespace { + +#if defined(__linux__) || defined(_WIN32) + +struct UpdateExecutorConfigTestCase { + ov::threading::IStreamsExecutor::Config _config; + std::vector> _proc_type_table; + std::vector> _cpu_mapping_table; + int _num_streams; + int _threads_per_stream; + ov::threading::IStreamsExecutor::Config::PreferredCoreType _core_type; + bool _cpu_pinning; + std::vector> _streams_info_table; + std::vector> _stream_processors; +}; + +class UpdateExecutorConfigTest : public ov::test::TestsCommon, + public testing::WithParamInterface> { +public: + void SetUp() override { + auto test_data = std::get<0>(GetParam()); + + CPU& cpu = cpu_info(); + cpu._org_proc_type_table = test_data._proc_type_table; + cpu._proc_type_table = test_data._proc_type_table; + cpu._cpu_mapping_table = test_data._cpu_mapping_table; + cpu._numa_nodes = 1; + + test_data._config.update_executor_config(test_data._num_streams, + test_data._threads_per_stream, + test_data._core_type, + test_data._cpu_pinning); + + ASSERT_EQ(test_data._num_streams, test_data._config._streams); + ASSERT_EQ(test_data._threads_per_stream, test_data._config._threadsPerStream); + ASSERT_EQ(test_data._core_type, test_data._config._threadPreferredCoreType); + ASSERT_EQ(test_data._cpu_pinning, test_data._config._cpu_reservation); + ASSERT_EQ(test_data._num_streams, test_data._config._streams); + ASSERT_EQ(test_data._streams_info_table, test_data._config._streams_info_table); + ASSERT_EQ(test_data._stream_processors, test_data._config._stream_processor_ids); + } +}; + +UpdateExecutorConfigTestCase _update_num_streams = { + ov::threading::IStreamsExecutor::Config{"update num streams test"}, // param[in]: initial configuration + // param[in]: proc_type_table, {total processors, number of physical processors, number of Efficient processors, + // number of hyper threading processors} + { + {12, 6, 0, 6, 0, 0}, + }, + // param[in]: cpu_mapping_table, {PROCESSOR_ID, NUMA_ID, SOCKET_ID, CORE_ID, CORE_TYPE, GROUP_ID, Used} + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, + {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, + {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, + {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 4, MAIN_CORE_PROC, 8, -1}, + {9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1}, + {10, 0, 0, 5, MAIN_CORE_PROC, 10, -1}, + {11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1}, + }, + 4, // param[in]: the number of streams + 1, // param[in]: the number of threads per stream + ov::threading::IStreamsExecutor::Config::ANY, // param[in]: specified cpu core type + false, // param[in]: specified cpu pinning + // param[out]: streams_info_table, {NUMBER_OF_STREAMS, PROC_TYPE, THREADS_PER_STREAM, STREAM_NUMA_NODE_ID, + // STREAM_SOCKET_ID} + { + {4, MAIN_CORE_PROC, 1, 0, 0}, + }, + // param[out]: stream_processors, the list of processor ids on each stream. + {}, +}; + +UpdateExecutorConfigTestCase _update_core_type = { + ov::threading::IStreamsExecutor::Config{"update core type test"}, + { + {24, 8, 8, 8, 0, 0}, + }, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 4, MAIN_CORE_PROC, 8, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1}, + {10, 0, 0, 5, MAIN_CORE_PROC, 10, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1}, + {12, 0, 0, 6, MAIN_CORE_PROC, 12, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 13, -1}, + {14, 0, 0, 7, MAIN_CORE_PROC, 14, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 15, -1}, + {16, 0, 0, 8, EFFICIENT_CORE_PROC, 16, -1}, {17, 0, 0, 9, EFFICIENT_CORE_PROC, 17, -1}, + {18, 0, 0, 10, EFFICIENT_CORE_PROC, 18, -1}, {19, 0, 0, 11, EFFICIENT_CORE_PROC, 19, -1}, + {20, 0, 0, 12, EFFICIENT_CORE_PROC, 20, -1}, {21, 0, 0, 13, EFFICIENT_CORE_PROC, 21, -1}, + {22, 0, 0, 14, EFFICIENT_CORE_PROC, 22, -1}, {23, 0, 0, 15, EFFICIENT_CORE_PROC, 23, -1}, + }, + 8, + 1, + ov::threading::IStreamsExecutor::Config::LITTLE, + false, + { + {8, EFFICIENT_CORE_PROC, 1, 0, 0}, + }, + {}, +}; + +UpdateExecutorConfigTestCase _update_cpu_pinning = { + ov::threading::IStreamsExecutor::Config{"update cpu pinning test"}, + { + {8, 4, 0, 4, 0, 0}, + }, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, + {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, + {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, + {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1}, + }, + 8, + 1, + ov::threading::IStreamsExecutor::Config::ANY, + true, + { + {4, MAIN_CORE_PROC, 1, 0, 0}, + {4, HYPER_THREADING_PROC, 1, 0, 0}, + }, + { + {0}, + {2}, + {4}, + {6}, + {1}, + {3}, + {5}, + {7}, + }, +}; + +TEST_P(UpdateExecutorConfigTest, UpdateExecutorConfig) {} + +INSTANTIATE_TEST_SUITE_P(smoke_UpdateExecutorConfig, + UpdateExecutorConfigTest, + testing::Values(_update_num_streams, _update_core_type, _update_cpu_pinning)); +#endif +} // namespace diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index dde29dc1e32504..9bd5d57090c7ef 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -3,6 +3,7 @@ // #include "openvino/runtime/system_conf.hpp" +#include "openvino/runtime/threading/cpu_streams_info.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" @@ -104,26 +105,6 @@ using namespace cldnn; using namespace ov::intel_gpu; -static void adjust_num_cores(ov::threading::IStreamsExecutor::Config& config) { - if (ov::get_available_cores_types().size() == 1) { - return; - } - - const auto total_num_cores = ov::get_number_of_logical_cpu_cores(); - const auto total_num_big_cores = ov::get_number_of_logical_cpu_cores(true); - const auto total_num_little_cores = total_num_cores - total_num_big_cores; - auto core_type = config._threadPreferredCoreType; - - int num_cores = total_num_cores; - if (core_type == ov::threading::IStreamsExecutor::Config::BIG) { - num_cores = total_num_big_cores; - } else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) { - num_cores = total_num_little_cores; - } - - config._streams = std::min(config._streams, num_cores); -} - static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) { ov::threading::IStreamsExecutor::Config task_executor_config(tags, 1); task_executor_config._streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads); @@ -135,7 +116,10 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority); } - adjust_num_cores(task_executor_config); + task_executor_config.update_executor_config(task_executor_config._streams, + 1, + task_executor_config._threadPreferredCoreType, + false); return task_executor_config; } diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 60d9a66bca3122..48d75b4640cf6b 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -63,7 +63,8 @@ CompiledModel::CompiledModel(std::shared_ptr model, : ov::ICompiledModel(model, plugin, wrap_if_old_api(context, plugin->is_new_api()), - create_task_executor(plugin, config)) + create_task_executor(plugin, config), + nullptr) , m_context(context) , m_config(config) , m_wait_executor(std::make_shared(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"})) @@ -86,7 +87,8 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer ib, : ov::ICompiledModel(nullptr, plugin, wrap_if_old_api(context, plugin->is_new_api()), - create_task_executor(plugin, config)) + create_task_executor(plugin, config), + nullptr) , m_context(context) , m_config(config) , m_wait_executor(std::make_shared(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"})) From 7b9db3d81b4bed45b3c2a9166428f849b44cff09 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 30 Oct 2023 10:12:55 +0100 Subject: [PATCH 115/275] [PT FE] Add torch.int16 dtype support (#20735) * Add torch.int16 dtype support * Add test --- .../src/openvino/frontend/pytorch/utils.py | 1 + .../py_frontend_tests/test_torch_decoder.py | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index a3ac46e701119b..0901fea81c6c58 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -118,6 +118,7 @@ def get_value_from_getattr(getattr_node, self_module): "torch.float64": OVType.f64, "torch.uint8": OVType.u8, "torch.int8": OVType.i8, + "torch.int16": OVType.i16, "torch.int32": OVType.i32, "torch.int64": OVType.i64, "torch.bool": OVType.boolean, diff --git a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py index 7e1758bd84d4fe..dfacb8ecb85eac 100644 --- a/tests/layer_tests/py_frontend_tests/test_torch_decoder.py +++ b/tests/layer_tests/py_frontend_tests/test_torch_decoder.py @@ -249,6 +249,28 @@ def forward(self): assert ov_const[0].get_partial_shape() == PartialShape([2]) +@pytest.mark.precommit +def test_pytorch_decoder_can_convert_i16_tensor(): + from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder + from openvino.runtime import PartialShape, Type + + class SomeTensor(torch.nn.Module): + def forward(self): + return torch.tensor([1, 2], dtype=torch.int16) + + model = get_scripted_model(SomeTensor()) + consts = [n for n in model.inlined_graph.nodes() if n.kind() == + "prim::Constant"] + assert len(consts) > 0 + some_const = consts[0] + nc_decoder = TorchScriptPythonDecoder(model, some_const) + ov_const = nc_decoder.as_constant() + assert ov_const is not None + assert len(ov_const) == 1 + assert ov_const[0].get_element_type() == Type.i16 + assert ov_const[0].get_partial_shape() == PartialShape([2]) + + @pytest.mark.precommit def test_pytorch_decoder_can_convert_i32_tensor(): from openvino.frontend.pytorch.ts_decoder import TorchScriptPythonDecoder From 81dbe4aedd44522bc6b7009df13f221bbbe5bb20 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Mon, 30 Oct 2023 10:46:46 +0100 Subject: [PATCH 116/275] [DOCS] Fixing formatting in Model Preparation (#20752) * Update model_preparation.md * Update installing-openvino-from-archive-windows.md --- ...nstalling-openvino-from-archive-windows.md | 2 +- .../openvino_workflow/model_preparation.md | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md index d24670343d770a..f993cd874bc255 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-windows-header/installing-openvino-from-archive-windows.md @@ -98,7 +98,7 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh tar -xf openvino_2023.1.0.zip - ren w_openvino_toolkit_windows_2023.1.0.10926.b4452d56304_x86_64 openvino_2023.1.0 + ren w_openvino_toolkit_windows_2023.1.0.12185.47b736f63ed_x86_64 openvino_2023.1.0 move openvino_2023.1.0 "C:\Program Files (x86)\Intel" diff --git a/docs/articles_en/openvino_workflow/model_preparation.md b/docs/articles_en/openvino_workflow/model_preparation.md index dbc34640a992cc..0dabb7c5cbd2bf 100644 --- a/docs/articles_en/openvino_workflow/model_preparation.md +++ b/docs/articles_en/openvino_workflow/model_preparation.md @@ -36,16 +36,16 @@ Although in most cases it can be done automatically, under the hood, explicit conversion may enable more optimization options and better performance. It is done in one of two ways: - * the Python API functions (``openvino.convert_model`` and ``openvino.save_model``) - * the ``ovc`` command line tool. - - .. note:: - - Model conversion API prior to OpenVINO 2023.1 is considered deprecated. - Existing and new projects are recommended to transition to the new - solutions, keeping in mind that they are not fully backwards compatible - with ``openvino.tools.mo.convert_model`` or the ``mo`` CLI tool. - For more details, see the :doc:`Model Conversion API Transition Guide `. +* the Python API functions (``openvino.convert_model`` and ``openvino.save_model``) +* the ``ovc`` command line tool. + +.. note:: + + Model conversion API prior to OpenVINO 2023.1 is considered deprecated. + Existing and new projects are recommended to transition to the new + solutions, keeping in mind that they are not fully backwards compatible + with ``openvino.tools.mo.convert_model`` or the ``mo`` CLI tool. + For more details, see the :doc:`Model Conversion API Transition Guide `. Convert a Model in Python: ``convert_model`` From 45121411118715e69002b749bfa83d30b16453c3 Mon Sep 17 00:00:00 2001 From: Sungeun Kim Date: Mon, 30 Oct 2023 19:06:29 +0900 Subject: [PATCH 117/275] [GPU] fix axis for pad/stride/dilation when 1d conv (#20746) * fix axis for pad/stride/dilation when 1d conv * add test-cases for 1d conv with explicit pad --- .../intel_gpu/src/graph/impls/ocl/convolution.cpp | 8 +++++++- .../single_layer_tests/dynamic/convolution.cpp | 11 +++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index d62cd34bf5dcca..2c8904847e21c9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -42,7 +42,7 @@ struct convolution_impl : typed_primitive_impl_ocl { const auto& primitive = impl_param.typed_desc(); auto stride = primitive->stride; - const auto& dilation = primitive->dilation; + auto dilation = primitive->dilation; const auto& groups = primitive->groups; const auto& deformable_groups = primitive->deformable_groups; const auto transposed = primitive->transposed; @@ -113,6 +113,12 @@ struct convolution_impl : typed_primitive_impl_ocl { uint32_t kz = weights_layout.spatial(2); conv_params.filterSize = { kx, ky, kz }; + // WA: If 1d conv and dynamic shape, 1d pad should be applied to y axis. + if (pads_begin.size() == 1) pads_begin.push_back(0); + if (pads_end.size() == 1) pads_end.push_back(0); + if (stride.size() == 1) stride.push_back(1); + if (dilation.size() == 1) dilation.push_back(1); + uint32_t pad_begin_z = std::max(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0); uint32_t pad_begin_y = std::max(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0); uint32_t pad_begin_x = std::max(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp index 44101b10c8e172..a3c84e5cd517ab 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp @@ -88,6 +88,13 @@ class ConvolutionLayerGPUTestDynamic : public testing::WithParamInterface(inType, shape)); @@ -149,8 +156,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic1DSymPad, Convolut const std::vector kernels1D = { {3}, {1} }; const std::vector strides1D = { {1} }; -const std::vector> padBegins1D = { {0} }; -const std::vector> padEnds1D = { {0} }; +const std::vector> padBegins1D = { {0}, {1} }; +const std::vector> padEnds1D = { {0}, {1} }; const std::vector dilations1D = { {1} }; const SizeVector numOutChannels = { 64, 63 }; const std::vector inputShapes1D = { From 7cfeb413d403017f14c050af288ecd6f40406326 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 30 Oct 2023 12:07:36 +0100 Subject: [PATCH 118/275] [core]Migrate Concat operator to new API (#20600) * Migrate Concat op to new API * Move shape validation to shape_infer * Fix getting concat axis in shape inference --------- Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/concat.hpp | 3 - .../include/concat_shape_inference.hpp | 25 ++- src/core/src/op/concat.cpp | 142 ++++++------------ src/core/tests/type_prop/concat.cpp | 26 +--- 4 files changed, 70 insertions(+), 126 deletions(-) diff --git a/src/core/include/openvino/op/concat.hpp b/src/core/include/openvino/op/concat.hpp index 4b83f5ce6f6879..263409486a8e70 100644 --- a/src/core/include/openvino/op/concat.hpp +++ b/src/core/include/openvino/op/concat.hpp @@ -50,9 +50,6 @@ class OPENVINO_API Concat : public Op { void set_axis(int64_t axis) { m_axis = axis; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END bool has_evaluate() const override; bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool evaluate_lower(TensorVector& output_values) const override; diff --git a/src/core/shape_inference/include/concat_shape_inference.hpp b/src/core/shape_inference/include/concat_shape_inference.hpp index e6a72f9b44170d..cc35390164bc6a 100644 --- a/src/core/shape_inference/include/concat_shape_inference.hpp +++ b/src/core/shape_inference/include/concat_shape_inference.hpp @@ -14,9 +14,10 @@ namespace v0 { template > std::vector shape_infer(const Concat* op, const std::vector& input_shapes) { + NODE_VALIDATION_CHECK(op, !input_shapes.empty()); using DimType = typename T::value_type; - const auto concat_axis = op->get_concatenation_axis(); + auto concat_axis = op->get_concatenation_axis() < 0 ? op->get_axis() : op->get_concatenation_axis(); const auto empty_dim = DimType{}; auto concat_dim = DimType{0}; @@ -27,21 +28,29 @@ std::vector shape_infer(const Concat* op, const std::vector& input_s output_shape = PartialShape::dynamic(); } else { output_shape = input_shapes.front(); + OPENVINO_SUPPRESS_DEPRECATED_START + concat_axis = ov::normalize_axis(op, concat_axis, output_shape.rank()); + OPENVINO_SUPPRESS_DEPRECATED_END output_shape[concat_axis] = empty_dim; } for (auto& input : input_shapes) { - if (input.rank().is_static()) { + const auto& input_rank = input.rank(); + if (input_rank.is_static()) { + OPENVINO_SUPPRESS_DEPRECATED_START + concat_axis = ov::normalize_axis(op, concat_axis, input_rank); + OPENVINO_SUPPRESS_DEPRECATED_END auto in_copy = TRShape(input); concat_dim += in_copy[concat_axis]; in_copy[concat_axis] = empty_dim; - NODE_VALIDATION_CHECK(op, - TRShape::merge_into(output_shape, in_copy), - "Argument shapes are inconsistent; they must have the same rank, and must " - "have equal dimension everywhere except on the concatenation axis (axis ", - concat_axis, - ")."); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + TRShape::merge_into(output_shape, in_copy), + "Argument shapes are inconsistent; they must have the same rank, and must " + "have equal dimension everywhere except on the concatenation axis (axis ", + concat_axis, + ")."); } else { concat_dim += empty_dim; } diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp index 05d868b1096acb..809fcdba7e96d6 100644 --- a/src/core/src/op/concat.cpp +++ b/src/core/src/op/concat.cpp @@ -2,38 +2,33 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/concat.hpp" - -#include +#include "openvino/op/concat.hpp" #include "bound_evaluate.hpp" #include "concat_shape_inference.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/core/dimension_tracker.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/reference/concat.hpp" -#include "validation_util.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace v0 { -op::Concat::Concat(const OutputVector& args, int64_t axis) : Op(args), m_axis(axis) { +Concat::Concat(const OutputVector& args, int64_t axis) : Op(args), m_axis(axis) { constructor_validate_and_infer_types(); } -op::Concat::Concat(const NodeVector& args, int64_t axis) : Concat(as_output_vector(args), axis) {} +Concat::Concat(const NodeVector& args, int64_t axis) : Concat(as_output_vector(args), axis) {} -bool op::Concat::visit_attributes(AttributeVisitor& visitor) { +bool Concat::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_Concat_visit_attributes); visitor.on_attribute("axis", m_axis); return true; } -void op::Concat::validate_and_infer_types() { +void Concat::validate_and_infer_types() { OV_OP_SCOPE(v0_Concat_validate_and_infer_types); - NODE_VALIDATION_CHECK(this, get_input_size() >= 1, "At least one argument required."); - element::Type inputs_et{element::dynamic}; auto input_shapes = std::vector(); @@ -41,118 +36,68 @@ void op::Concat::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, element::Type::merge(inputs_et, inputs_et, get_input_element_type(i)), "Argument element types are inconsistent."); - const auto& input_shape = get_input_partial_shape(i); - const auto& input_rank = input_shape.rank(); - - if (input_rank.is_static() && (get_concatenation_axis() < 0)) { - set_concatenation_axis(get_axis() < 0 ? get_axis() + input_rank.get_length() : get_axis()); - } - - const auto concat_axis = get_concatenation_axis(); - - NODE_VALIDATION_CHECK(this, - input_shape.is_dynamic() || (0 <= concat_axis && concat_axis < input_rank.get_length()), - "Concatenation axis (", - concat_axis, - ") is out of bounds [", - -input_rank.get_length(), - ", ", - input_rank.get_length() - 1, - "] for ", - "argument ", - i, - ", which has shape ", - input_shape, - "."); - - input_shapes.push_back(input_shape); + input_shapes.push_back(get_input_partial_shape(i)); } - const auto output_shapes = shape_infer(this, input_shapes); - set_output_type(0, inputs_et, output_shapes.front()); -} - -shared_ptr op::Concat::clone_with_new_inputs(const OutputVector& new_args) const { - OV_OP_SCOPE(v0_Concat_clone_with_new_inputs); - return make_shared(new_args, m_axis); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace { -bool evaluate_concat(const HostTensorVector& args, const HostTensorPtr& out, int64_t concatenation_axis) { - std::vector arg_bufs; - std::vector arg_shapes; - ov::Shape out_shape(args[0]->get_shape()); - out_shape[concatenation_axis] = 0; - for (auto& input : args) { - arg_bufs.push_back(input->get_data_ptr()); - arg_shapes.push_back(input->get_shape()); - out_shape[concatenation_axis] += arg_shapes.back()[concatenation_axis]; + const auto output_shape = shape_infer(this, input_shapes).front(); + if (output_shape.rank().is_static() && (get_concatenation_axis() < 0)) { + set_concatenation_axis(ov::util::normalize(get_axis(), output_shape.size())); } - out->set_shape(out_shape); - ov::reference::concat(arg_bufs, - out->get_data_ptr(), - arg_shapes, - out_shape, - concatenation_axis, - out->get_element_type().size()); - return true; + set_output_type(0, inputs_et, output_shape); } -} // namespace -bool op::Concat::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Concat_evaluate); - OPENVINO_ASSERT(!inputs.empty()); - OPENVINO_ASSERT(validate_host_tensor_vector(inputs, inputs.size())); - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1)); - auto concat_axis = get_axis() < 0 ? get_axis() + inputs[0]->get_shape().size() : get_axis(); - return evaluate_concat(inputs, outputs[0], concat_axis); +std::shared_ptr Concat::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v0_Concat_clone_with_new_inputs); + return std::make_shared(new_args, m_axis); } -OPENVINO_SUPPRESS_DEPRECATED_END -bool op::Concat::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { +bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_Concat_evaluate); - OPENVINO_ASSERT(!inputs.empty()); OPENVINO_ASSERT(outputs.size() == 1); - auto concat_axis = ov::util::normalize(get_axis(), inputs.front().get_shape().size()); + const auto inputs_count = inputs.size(); + std::vector arg_bufs(inputs_count); + std::vector arg_shapes; + std::vector input_shapes; + arg_shapes.reserve(inputs_count); + input_shapes.reserve(inputs_count); - std::vector arg_bufs; - std::vector arg_shapes; - - ov::Shape out_shape(inputs.front().get_shape()); - out_shape[concat_axis] = 0; + auto arg_buf = arg_bufs.begin(); for (auto& input : inputs) { - arg_bufs.push_back(static_cast(input.data())); - arg_shapes.push_back(input.get_shape()); - out_shape[concat_axis] += arg_shapes.back()[concat_axis]; + *arg_buf = static_cast(input.data()); + ++arg_buf; + const auto& input_shape = input.get_shape(); + arg_shapes.emplace_back(input_shape); + input_shapes.emplace_back(input_shape); } + + const auto& out_shape = shape_infer(this, input_shapes).front().to_shape(); outputs.front().set_shape(out_shape); - ov::reference::concat(arg_bufs, - static_cast(outputs.front().data()), - arg_shapes, - out_shape, - concat_axis, - outputs.front().get_element_type().size()); + reference::concat(arg_bufs, + static_cast(outputs.front().data()), + arg_shapes, + out_shape, + ov::util::normalize(get_axis(), out_shape.size()), + outputs.front().get_element_type().size()); return true; } -bool op::Concat::has_evaluate() const { +bool Concat::has_evaluate() const { OV_OP_SCOPE(v0_Concat_has_evaluate); return true; } -bool op::Concat::evaluate_lower(ov::TensorVector& output_values) const { +bool Concat::evaluate_lower(TensorVector& output_values) const { return default_lower_bound_evaluator(this, output_values); } -bool op::Concat::evaluate_upper(ov::TensorVector& output_values) const { +bool Concat::evaluate_upper(TensorVector& output_values) const { return default_upper_bound_evaluator(this, output_values); } -bool op::Concat::evaluate_label(TensorLabelVector& output_labels) const { +bool Concat::evaluate_label(TensorLabelVector& output_labels) const { const auto& inputs = input_values(); if (std::all_of(inputs.cbegin(), inputs.cend(), [](const Output& out) { const auto& labels = out.get_tensor().get_value_label(); @@ -187,3 +132,6 @@ bool op::Concat::evaluate_label(TensorLabelVector& output_labels) const { return false; } } +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/tests/type_prop/concat.cpp b/src/core/tests/type_prop/concat.cpp index 22bdb3bf26a04c..0114f8192be62f 100644 --- a/src/core/tests/type_prop/concat.cpp +++ b/src/core/tests/type_prop/concat.cpp @@ -6,6 +6,7 @@ #include +#include "common_test_utils/test_assertions.hpp" #include "common_test_utils/type_prop.hpp" #include "openvino/core/dimension_tracker.hpp" #include "openvino/op/broadcast.hpp" @@ -68,15 +69,10 @@ TEST(type_prop, concat_deduce_axis_oob) { auto param0 = make_shared(ov::element::f32, ov::Shape{2, 3, 4}); auto param1 = make_shared(ov::element::f32, ov::Shape{2, 7, 4}); auto param2 = make_shared(ov::element::f32, ov::Shape{2, 2, 5}); - try { - auto c = make_shared(ov::NodeVector{param0, param1, param2}, 3); - // Should have thrown, so fail if it didn't - FAIL() << "Deduced type should disagree with specified type"; - } catch (const ov::NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Concatenation axis (3) is out of bounds")); - } catch (...) { - FAIL() << "Deduced type check failed for unexpected reason"; - } + + OV_EXPECT_THROW(ignore = make_shared(ov::NodeVector{param0, param1, param2}, 3), + ov::AssertFailure, + HasSubstr("Concat Parameter axis 3 out of the tensor rank range")); } TEST(type_prop, concat_deduce_axis_barely_in_bounds) { @@ -259,15 +255,9 @@ TEST(type_prop, concat_partial_negative_axis_incorrect) { auto param1 = make_shared(ov::element::f32, ov::Shape{2, 7, 4}); auto param2 = make_shared(ov::element::f32, ov::Shape{2, 2, 4}); - try { - auto c = make_shared(ov::NodeVector{param0, param1, param2}, -4); - // Should have thrown, so fail if it didn't - FAIL() << "Incorrect negative axis value not detected (out of bounds)"; - } catch (const ov::NodeValidationFailure& error) { - EXPECT_HAS_SUBSTRING(error.what(), std::string("Concatenation axis (-1) is out of bounds")); - } catch (...) { - FAIL() << "Deduced type check failed for unexpected reason"; - } + OV_EXPECT_THROW(ignore = make_shared(ov::NodeVector{param0, param1, param2}, -4), + ov::AssertFailure, + HasSubstr("Concat Parameter axis -4 out of the tensor rank range")); } /** \brief Test uses evaluate lower/upper and label of concat op. */ From e2ea571926c78166fe6fb3471855e8f65b5efcbf Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 30 Oct 2023 12:08:26 +0100 Subject: [PATCH 119/275] Migrate Maximum operator to new API (#20602) Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/maximum.hpp | 4 +- .../include/openvino/reference/maximum.hpp | 25 +++- src/core/src/op/maximum.cpp | 109 ++++++++---------- 3 files changed, 66 insertions(+), 72 deletions(-) diff --git a/src/core/include/openvino/op/maximum.hpp b/src/core/include/openvino/op/maximum.hpp index 742878b09c4eba..69652a958dba82 100644 --- a/src/core/include/openvino/op/maximum.hpp +++ b/src/core/include/openvino/op/maximum.hpp @@ -29,9 +29,7 @@ class OPENVINO_API Maximum : public util::BinaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/maximum.hpp b/src/core/reference/include/openvino/reference/maximum.hpp index 12388a1026c685..18711cdd29c967 100644 --- a/src/core/reference/include/openvino/reference/maximum.hpp +++ b/src/core/reference/include/openvino/reference/maximum.hpp @@ -12,13 +12,28 @@ namespace ov { namespace reference { +namespace func { +template +T max(const T a, const T b) { + return std::max(a, b); +} +} // namespace func + template void maximum(const T* arg0, const T* arg1, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] > arg1[i] ? arg0[i] : arg1[i]; - } + std::transform(arg0, std::next(arg0, count), arg1, out, func::max); } +/** + * @brief Reference implementation of binary elementwise Maximum operator. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg0_shape Input 0 shape. + * @param arg1_shape Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ template void maximum(const T* arg0, const T* arg1, @@ -26,9 +41,7 @@ void maximum(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T { - return x > y ? x : y; - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::max); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/maximum.cpp b/src/core/src/op/maximum.cpp index 8a9a2a6569b336..90a038d0b540cb 100644 --- a/src/core/src/op/maximum.cpp +++ b/src/core/src/op/maximum.cpp @@ -2,92 +2,75 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/maximum.hpp" - -#include +#include "openvino/op/maximum.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/convert.hpp" -#include "ngraph/op/greater.hpp" -#include "ngraph/op/multiply.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/type/element_type.hpp" #include "openvino/reference/maximum.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; - -// ------------------------------------ v0 ------------------------------------- +namespace ov { +namespace op { -OPENVINO_SUPPRESS_DEPRECATED_START -namespace maximumop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::maximum(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} +namespace maximum { +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_maximum(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_maximum, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_maximum, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_maximum, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_maximum, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_maximum, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_maximum, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::maximum(arg0.data(), arg1.data(), out.data(), shape0, shape1, broadcast_spec); + return true; } - return rc; -} -} // namespace -} // namespace maximumop +}; +} // namespace maximum // ------------------------------------ v1 ------------------------------------- - -op::v1::Maximum::Maximum(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +Maximum::Maximum(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Maximum::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Maximum::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Maximum_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::Maximum::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Maximum::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Maximum_evaluate); - return maximumop::evaluate_maximum(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::Maximum::has_evaluate() const { +bool Maximum::has_evaluate() const { OV_OP_SCOPE(v1_Maximum_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: + case element::f16: + case element::f32: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov From c3a90f8f7042ab7ad4ab3479745c8a401796d26d Mon Sep 17 00:00:00 2001 From: Przemyslaw Wysocki Date: Mon, 30 Oct 2023 12:24:37 +0100 Subject: [PATCH 120/275] [PyOV] Add lower bound to `datasets` (#20674) --- samples/python/benchmark/bert_benchmark/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/python/benchmark/bert_benchmark/requirements.txt b/samples/python/benchmark/bert_benchmark/requirements.txt index e4caa4c6886653..abd0c7273fd6db 100644 --- a/samples/python/benchmark/bert_benchmark/requirements.txt +++ b/samples/python/benchmark/bert_benchmark/requirements.txt @@ -1,4 +1,4 @@ -datasets +datasets>=2.13 transformers[onnx]; python_version < "3.11" --extra-index-url https://download.pytorch.org/whl/cpu torch From 6dce6cc599c9078f8405631c015454e8f0b2b0f9 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Mon, 30 Oct 2023 21:27:50 +0900 Subject: [PATCH 121/275] [GPU] Fix va surface sharing issue (#20730) --- src/plugins/intel_gpu/src/plugin/remote_tensor.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp index 9bf19f8df50535..0c4851b2f68c3a 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp @@ -109,6 +109,10 @@ void RemoteTensorImpl::allocate() { auto context = std::dynamic_pointer_cast(m_context); auto enable_caching = supports_caching(); + if (is_surface()) { + m_layout.format = cldnn::format::nv12; // Other formats are not supported + } + if (enable_caching) { m_memory_object = context->try_get_cached_memory(m_hash); if (m_memory_object) { @@ -156,7 +160,6 @@ void RemoteTensorImpl::allocate() { } #ifdef _WIN32 case TensorType::BT_SURF_SHARED: { - m_layout.format = cldnn::format::nv12; // Other formats are not supported m_memory_object = engine.share_surface(m_layout, m_mem, m_plane); break; } @@ -166,13 +169,11 @@ void RemoteTensorImpl::allocate() { } #else case TensorType::BT_SURF_SHARED: { - m_layout.format = cldnn::format::nv12; // Other formats are not supported m_memory_object = engine.share_surface(m_layout, m_surf, m_plane); break; } #endif case TensorType::BT_IMG_SHARED: { - m_layout.format = cldnn::format::nv12; // Other formats are not supported m_memory_object = engine.share_image(m_layout, m_mem); break; } @@ -218,8 +219,7 @@ void RemoteTensorImpl::update_hash() { bool RemoteTensorImpl::is_surface() const noexcept { return m_mem_type == TensorType::BT_SURF_SHARED || - m_mem_type == TensorType::BT_IMG_SHARED || - m_mem_type == TensorType::BT_DX_BUF_SHARED; + m_mem_type == TensorType::BT_IMG_SHARED; } cldnn::memory::ptr RemoteTensorImpl::get_memory() const { From 142a72d0f08845dbe35223f99fe198d41c693d03 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Mon, 30 Oct 2023 13:52:56 +0100 Subject: [PATCH 122/275] [core] Migrate SpaceToBatch operator to new API (#20510) * Drop HostTensor and move to ov namespace * Style * Optimize vector assignment * Optimize vector assignment --------- Co-authored-by: Michal Lukaszewski --- .../include/openvino/op/space_to_batch.hpp | 7 +- src/core/src/op/space_to_batch.cpp | 167 ++++++++---------- 2 files changed, 79 insertions(+), 95 deletions(-) diff --git a/src/core/include/openvino/op/space_to_batch.hpp b/src/core/include/openvino/op/space_to_batch.hpp index 83d47b96ba4c45..bb68e258bd47e6 100644 --- a/src/core/include/openvino/op/space_to_batch.hpp +++ b/src/core/include/openvino/op/space_to_batch.hpp @@ -44,13 +44,8 @@ class OPENVINO_API SpaceToBatch : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool visit_attributes(AttributeVisitor& visitor) override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; - -private: - bool evaluate_space_to_batch(const HostTensorVector& outputs, const HostTensorVector& inputs) const; }; } // namespace v1 } // namespace op diff --git a/src/core/src/op/space_to_batch.cpp b/src/core/src/op/space_to_batch.cpp index 1747b6b615648a..d53eda550169a5 100644 --- a/src/core/src/op/space_to_batch.cpp +++ b/src/core/src/op/space_to_batch.cpp @@ -2,39 +2,37 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/space_to_batch.hpp" +#include "openvino/op/space_to_batch.hpp" #include #include #include -#include #include -#include #include "itt.hpp" -#include "ngraph/builder/make_constant.hpp" -#include "ngraph/node.hpp" -#include "ngraph/ops.hpp" -#include "ngraph/shape.hpp" +#include "openvino/core/shape.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/util/attr_types.hpp" #include "openvino/op/util/precision_sensitive_attribute.hpp" #include "openvino/reference/pad.hpp" #include "openvino/reference/reshape.hpp" - -using namespace std; -using namespace ngraph; - -ngraph::op::v1::SpaceToBatch::SpaceToBatch(const ngraph::Output& data, - const ngraph::Output& block_shape, - const ngraph::Output& pads_begin, - const ngraph::Output& pads_end) +#include "space_to_batch_shape_inference.hpp" + +namespace ov { +namespace op { +namespace v1 { +SpaceToBatch::SpaceToBatch(const Output& data, + const Output& block_shape, + const Output& pads_begin, + const Output& pads_end) : Op({data, block_shape, pads_begin, pads_end}) { - ov::mark_as_precision_sensitive(input(1)); - ov::mark_as_precision_sensitive(input(2)); - ov::mark_as_precision_sensitive(input(3)); + mark_as_precision_sensitive(input(1)); + mark_as_precision_sensitive(input(2)); + mark_as_precision_sensitive(input(3)); constructor_validate_and_infer_types(); } -void op::v1::SpaceToBatch::validate_and_infer_types() { +void SpaceToBatch::validate_and_infer_types() { OV_OP_SCOPE(v1_SpaceToBatch_validate_and_infer_types); const auto& data_type = get_input_element_type(0); const auto& block_shape_type = get_input_element_type(1); @@ -64,84 +62,65 @@ void op::v1::SpaceToBatch::validate_and_infer_types() { set_output_type(0, data_type, output_shape); } -std::shared_ptr ngraph::op::v1::SpaceToBatch::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr SpaceToBatch::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_SpaceToBatch_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); } -bool ngraph::op::v1::SpaceToBatch::visit_attributes(ngraph::AttributeVisitor& visitor) { +bool SpaceToBatch::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_SpaceToBatch_visit_attributes); return true; } -OPENVINO_SUPPRESS_DEPRECATED_START -bool ngraph::op::v1::SpaceToBatch::evaluate_space_to_batch(const HostTensorVector& outputs, - const HostTensorVector& inputs) const { - if (outputs[0]->get_partial_shape().is_dynamic()) { - std::vector input_shapes; - input_shapes.reserve(inputs.size()); - - for (size_t i = 0; i < inputs.size(); ++i) { - input_shapes.push_back(inputs[i]->get_partial_shape()); - if (input_shapes.back().is_dynamic()) { - return false; - } - } - - const auto output_shape = shape_infer(this, input_shapes, make_tensor_accessor(inputs)).front().to_shape(); - - outputs[0]->set_element_type(inputs[0]->get_element_type()); - outputs[0]->set_shape(output_shape); - } - +namespace space_to_batch { +namespace { +bool evaluate(TensorVector& outputs, const TensorVector& inputs) { const auto& data = inputs[0]; const auto& out = outputs[0]; - size_t elem_size = data->get_element_type().size(); + const auto elem_size = data.get_element_type().size(); - auto data_shape = data->get_shape(); + auto data_shape = data.get_shape(); - if (!(data->get_shape().size() == 3 || data->get_shape().size() == 4 || data->get_shape().size() == 5)) { + if (!(data.get_shape().size() == 3 || data.get_shape().size() == 4 || data.get_shape().size() == 5)) { return false; } - size_t block_values_size = shape_size(inputs[1]->get_shape()); - const auto* block_values = inputs[1]->get_data_ptr(); - const auto* pads_begin = inputs[2]->get_data_ptr(); - const auto* pads_end = inputs[3]->get_data_ptr(); + const auto block_values_size = shape_size(inputs[1].get_shape()); + const auto block_values = static_cast(inputs[1].data()); + const auto pads_begin = static_cast(inputs[2].data()); + const auto pads_end = static_cast(inputs[3].data()); const char* pad_value = nullptr; const std::vector pad_zero_value(elem_size, 0); if (inputs.size() == 4) { - pad_value = inputs[3]->get_data_ptr(); + pad_value = static_cast(inputs[3].data()); } else { pad_value = pad_zero_value.data(); } - CoordinateDiff pads_begin_vec(shape_size(inputs[2]->get_shape())); - pads_begin_vec.assign(pads_begin, pads_begin + shape_size(inputs[2]->get_shape())); - CoordinateDiff pads_end_vec(shape_size(inputs[2]->get_shape())); - pads_end_vec.assign(pads_end, pads_end + shape_size(inputs[2]->get_shape())); + CoordinateDiff pads_begin_vec(pads_begin, pads_begin + shape_size(inputs[2].get_shape())); + CoordinateDiff pads_end_vec(pads_end, pads_end + shape_size(inputs[2].get_shape())); - ov::Shape padded_shape(data_shape.size()); + Shape padded_shape(data_shape.size()); for (size_t i = 0; i < data_shape.size(); ++i) { padded_shape[i] = data_shape[i] + pads_begin_vec[i] + pads_end_vec[i]; } std::vector padded_data(shape_size(padded_shape) * elem_size); - ov::reference::pad(data->get_data_ptr(), - pad_value, - padded_data.data(), - elem_size, - data_shape, - padded_shape, - pads_begin_vec, - pads_end_vec, - ngraph::op::PadMode::CONSTANT); + reference::pad(static_cast(data.data()), + pad_value, + padded_data.data(), + elem_size, + data_shape, + padded_shape, + pads_begin_vec, + pads_end_vec, + op::PadMode::CONSTANT); data_shape = padded_shape; - ov::Shape dispersed_shape(block_values_size + 1); + Shape dispersed_shape(block_values_size + 1); std::vector axes_order(block_values_size + 1); - ov::Shape squeezed_shape(data_shape.begin(), data_shape.end()); + Shape squeezed_shape(data_shape.begin(), data_shape.end()); std::vector plain_axes_order(block_values_size + 1); std::iota(plain_axes_order.begin(), plain_axes_order.end(), 0); @@ -169,48 +148,58 @@ bool ngraph::op::v1::SpaceToBatch::evaluate_space_to_batch(const HostTensorVecto } } - ov::reference::reshape(flat_data.data(), - dispersed_data.data(), - data_shape, - plain_axes_order, - dispersed_shape, - elem_size); - ov::Shape post_transpose_shape(axes_order.size()); + reference::reshape(flat_data.data(), + dispersed_data.data(), + data_shape, + plain_axes_order, + dispersed_shape, + elem_size); + Shape post_transpose_shape(axes_order.size()); for (size_t i = 0; i < axes_order.size(); ++i) { post_transpose_shape[i] = dispersed_shape[axes_order[i]]; } - ov::reference::reshape(dispersed_data.data(), - post_transpose_data.data(), - dispersed_shape, - axes_order, - post_transpose_shape, - elem_size); + reference::reshape(dispersed_data.data(), + post_transpose_data.data(), + dispersed_shape, + axes_order, + post_transpose_shape, + elem_size); squeezed_shape[0] *= block_values[block_idx]; squeezed_shape[block_idx] /= block_values[block_idx]; - ov::reference::reshape(post_transpose_data.data(), - flat_data.data(), - post_transpose_shape, - plain_axes_order, - squeezed_shape, - elem_size); + reference::reshape(post_transpose_data.data(), + flat_data.data(), + post_transpose_shape, + plain_axes_order, + squeezed_shape, + elem_size); data_shape = squeezed_shape; } - out->write(flat_data.data(), elem_size * shape_size(out->get_shape())); + std::memcpy(out.data(out.get_element_type()), flat_data.data(), elem_size * shape_size(out.get_shape())); return true; } +} // namespace +} // namespace space_to_batch -bool ngraph::op::v1::SpaceToBatch::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool SpaceToBatch::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_SpaceToBatch_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + + const auto input_shapes = ov::util::get_tensors_partial_shapes(inputs); + const auto output_shape = shape_infer(this, input_shapes, make_tensor_accessor(inputs)).front().to_shape(); + outputs[0].set_shape(output_shape); - return evaluate_space_to_batch(outputs, inputs); + return space_to_batch::evaluate(outputs, inputs); } -bool ngraph::op::v1::SpaceToBatch::has_evaluate() const { +bool SpaceToBatch::has_evaluate() const { OV_OP_SCOPE(v1_SpaceToBatch_has_evaluate); return !get_input_partial_shape(0).is_dynamic() && (get_input_shape(0).size() == 4 || get_input_shape(0).size() == 5); } +} // namespace v1 +} // namespace op +} // namespace ov From fdb22c861045f2b69cd6f9cdedb125d82088b27c Mon Sep 17 00:00:00 2001 From: Mateusz Mikolajczyk Date: Mon, 30 Oct 2023 14:11:14 +0100 Subject: [PATCH 123/275] [Opset13][PT FE] Update torch bitwise operators (#20339) * Add opset-13 bitwise implementation * Improvements in test * Add transformation BitwiseOps->LogicalOps for bool * Improve existing tests to better tests dtypes * Disable transformatiions for supported bitwise ops * Improvebitwise test inputs * Update src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp Co-authored-by: Katarzyna Mitrus * Update src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp Co-authored-by: Katarzyna Mitrus * Update src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp Co-authored-by: Katarzyna Mitrus * Update src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp Co-authored-by: Katarzyna Mitrus * Update to REGISETR_PASS --------- Co-authored-by: Katarzyna Mitrus --- .../convert_bitwise_to_logical_bool.hpp | 52 +++++++ .../common_optimizations.cpp | 6 + .../convert_bitwise_to_logical_bool.cpp | 116 +++++++++++++++ .../convert_bitwise_to_logical_bool_test.cpp | 124 ++++++++++++++++ src/frontends/pytorch/src/op/bitwise.cpp | 42 +++--- src/frontends/pytorch/src/op_table.cpp | 8 +- .../transformation_pipeline.cpp | 6 + tests/layer_tests/pytorch_tests/test_and.py | 46 ++++-- .../pytorch_tests/test_bitwise_not.py | 29 ---- .../pytorch_tests/test_bitwise_ops.py | 132 ++++++++++++++++++ tests/layer_tests/pytorch_tests/test_or.py | 79 +++++++++-- tests/layer_tests/pytorch_tests/test_xor.py | 35 ++--- 12 files changed, 576 insertions(+), 99 deletions(-) create mode 100644 src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp create mode 100644 src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp create mode 100644 src/common/transformations/tests/op_conversions/convert_bitwise_to_logical_bool_test.cpp delete mode 100644 tests/layer_tests/pytorch_tests/test_bitwise_not.py create mode 100644 tests/layer_tests/pytorch_tests/test_bitwise_ops.py diff --git a/src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp b/src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp new file mode 100644 index 00000000000000..91e8d430fd1fba --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_bitwise_to_logical_bool.hpp @@ -0,0 +1,52 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { +class TRANSFORMATIONS_API ConvertBitwiseAndToLogicalAnd; +class TRANSFORMATIONS_API ConvertBitwiseNotToLogicalNot; +class TRANSFORMATIONS_API ConvertBitwiseOrToLogicalOr; +class TRANSFORMATIONS_API ConvertBitwiseXorToLogicalXor; +} // namespace pass +} // namespace ov + +class ov::pass::ConvertBitwiseAndToLogicalAnd : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertBitwiseAndToLogicalAnd", "0"); + ConvertBitwiseAndToLogicalAnd(); +}; +class ov::pass::ConvertBitwiseNotToLogicalNot : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertBitwiseNotToLogicalNot", "0"); + ConvertBitwiseNotToLogicalNot(); +}; +class ov::pass::ConvertBitwiseOrToLogicalOr : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertBitwiseOrToLogicalOr", "0"); + ConvertBitwiseOrToLogicalOr(); +}; +class ov::pass::ConvertBitwiseXorToLogicalXor : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertBitwiseXorToLogicalXor", "0"); + ConvertBitwiseXorToLogicalXor(); +}; +/** + * @ingroup ie_transformation_common_api + * @brief Converts Bitwise operators to Logical for boolean datatype for plugins that don't support opset13 Bitwise + */ +class ConvertBitwiseToLogical : public ov::pass::GraphRewrite { +public: + OPENVINO_RTTI("ConvertBitwiseToLogical", "0"); + ConvertBitwiseToLogical() { + add_matcher(); + add_matcher(); + add_matcher(); + add_matcher(); + } +}; diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 4d4cebe5c62222..4357fdc2607d35 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -65,6 +65,7 @@ #include "transformations/init_node_info.hpp" #include "transformations/op_conversions/batch_norm_decomposition.hpp" #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_bitwise_to_logical_bool.hpp" #include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" #include "transformations/op_conversions/convert_convertlike.hpp" #include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" @@ -226,6 +227,11 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptrset_name("ov::pass::FakeQuantizeFusions"); + // Temporary transformation to allow for PyTorch frontend to + // partially support bitwise operators with boolean inputs for plugins + // that didn't enabled BitwiseOps from opset13 + REGISTER_PASS(manager, ConvertBitwiseToLogical) + // StridesOptimization should be at the very end // because we cannot insert any MaxPools since they may prevent // other optimizations diff --git a/src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp b/src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp new file mode 100644 index 00000000000000..130c5ddbab059b --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_bitwise_to_logical_bool.cpp @@ -0,0 +1,116 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_bitwise_to_logical_bool.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/bitwise_and.hpp" +#include "openvino/op/bitwise_not.hpp" +#include "openvino/op/bitwise_or.hpp" +#include "openvino/op/bitwise_xor.hpp" +#include "openvino/op/logical_and.hpp" +#include "openvino/op/logical_not.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/logical_xor.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +ov::pass::ConvertBitwiseAndToLogicalAnd::ConvertBitwiseAndToLogicalAnd() { + MATCHER_SCOPE(ConvertBitwiseAndToLogicalAnd); + auto pattern = + pattern::wrap_type({pattern::any_input(pattern::type_matches(element::boolean)), + pattern::any_input(pattern::type_matches(element::boolean))}); + + const matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto bitwise = std::dynamic_pointer_cast(m.get_match_root()); + if (!bitwise || transformation_callback(bitwise)) { + return false; + } + + const auto logical = std::make_shared(bitwise->input_value(0), + bitwise->input_value(1), + bitwise->get_autob()); + + logical->set_friendly_name(bitwise->get_friendly_name()); + copy_runtime_info(bitwise, logical); + replace_node(bitwise, logical); + + return true; + }; + auto m = std::make_shared(pattern, matcher_name); + register_matcher(m, callback); +} +ov::pass::ConvertBitwiseNotToLogicalNot::ConvertBitwiseNotToLogicalNot() { + MATCHER_SCOPE(ConvertBitwiseNotToLogicalNot); + auto pattern = + pattern::wrap_type({pattern::any_input(pattern::type_matches(element::boolean))}); + + const matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto bitwise = std::dynamic_pointer_cast(m.get_match_root()); + if (!bitwise || transformation_callback(bitwise)) { + return false; + } + + const auto logical = std::make_shared(bitwise->input_value(0)); + + logical->set_friendly_name(bitwise->get_friendly_name()); + copy_runtime_info(bitwise, logical); + replace_node(bitwise, logical); + + return true; + }; + auto m = std::make_shared(pattern, matcher_name); + register_matcher(m, callback); +} + +ov::pass::ConvertBitwiseOrToLogicalOr::ConvertBitwiseOrToLogicalOr() { + MATCHER_SCOPE(ConvertBitwiseOrToLogicalOr); + auto pattern = + pattern::wrap_type({pattern::any_input(pattern::type_matches(element::boolean)), + pattern::any_input(pattern::type_matches(element::boolean))}); + + const matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto bitwise = std::dynamic_pointer_cast(m.get_match_root()); + if (!bitwise || transformation_callback(bitwise)) { + return false; + } + + const auto logical = std::make_shared(bitwise->input_value(0), + bitwise->input_value(1), + bitwise->get_autob()); + + logical->set_friendly_name(bitwise->get_friendly_name()); + copy_runtime_info(bitwise, logical); + replace_node(bitwise, logical); + + return true; + }; + auto m = std::make_shared(pattern, matcher_name); + register_matcher(m, callback); +} + +ov::pass::ConvertBitwiseXorToLogicalXor::ConvertBitwiseXorToLogicalXor() { + MATCHER_SCOPE(ConvertBitwiseXorToLogicalXor); + auto pattern = + pattern::wrap_type({pattern::any_input(pattern::type_matches(element::boolean)), + pattern::any_input(pattern::type_matches(element::boolean))}); + + const matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto bitwise = std::dynamic_pointer_cast(m.get_match_root()); + if (!bitwise || transformation_callback(bitwise)) { + return false; + } + + const auto logical = std::make_shared(bitwise->input_value(0), + bitwise->input_value(1), + bitwise->get_autob()); + + logical->set_friendly_name(bitwise->get_friendly_name()); + copy_runtime_info(bitwise, logical); + replace_node(bitwise, logical); + + return true; + }; + auto m = std::make_shared(pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/tests/op_conversions/convert_bitwise_to_logical_bool_test.cpp b/src/common/transformations/tests/op_conversions/convert_bitwise_to_logical_bool_test.cpp new file mode 100644 index 00000000000000..761a2fb93d0598 --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_bitwise_to_logical_bool_test.cpp @@ -0,0 +1,124 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_bitwise_to_logical_bool.hpp" + +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset13.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/utils/utils.hpp" +using namespace ov; +using namespace testing; + +namespace { + +std::shared_ptr create_bitwise_model(std::string op_type, const ov::element::Type input_type) { + const auto lhs = std::make_shared(input_type, ov::Shape{1, 3, 100, 100}); + const auto rhs = std::make_shared(input_type, ov::Shape{1, 3, 100, 100}); + + std::shared_ptr bitwise; + ParameterVector params{lhs, rhs}; + if (op_type == "and") { + bitwise = std::make_shared(lhs, rhs, op::AutoBroadcastType::NONE); + } else if (op_type == "not") { + bitwise = std::make_shared(lhs); + params = {lhs}; + } else if (op_type == "or") { + bitwise = std::make_shared(lhs, rhs, op::AutoBroadcastType::NONE); + } else if (op_type == "xor") { + bitwise = std::make_shared(lhs, rhs, op::AutoBroadcastType::NONE); + } + + bitwise->set_friendly_name("bitwise"); + + return std::make_shared(bitwise->outputs(), params); +} + +std::shared_ptr create_logical_model(std::string op_type) { + const auto lhs = std::make_shared(ov::element::boolean, ov::Shape{1, 3, 100, 100}); + const auto rhs = std::make_shared(ov::element::boolean, ov::Shape{1, 3, 100, 100}); + std::shared_ptr logical; + ParameterVector params = {lhs, rhs}; + if (op_type == "and") { + logical = std::make_shared(lhs, rhs, op::AutoBroadcastType::NONE); + } else if (op_type == "not") { + logical = std::make_shared(lhs); + params = {lhs}; + } else if (op_type == "or") { + logical = std::make_shared(lhs, rhs, op::AutoBroadcastType::NONE); + } else if (op_type == "xor") { + logical = std::make_shared(lhs, rhs, op::AutoBroadcastType::NONE); + } + + logical->set_friendly_name("logical"); + + return std::make_shared(logical->outputs(), params); +} + +} // namespace + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_and_i32) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("and", element::i32); +} + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_not_i32) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("not", element::i32); +} + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_or_i32) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("or", element::i32); +} + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_xor_i32) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("xor", element::i32); +} + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_and_boolean) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("and", element::boolean); + model_ref = create_logical_model("and"); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_not_boolean) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("not", element::boolean); + model_ref = create_logical_model("not"); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_or_boolean) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("or", element::boolean); + model_ref = create_logical_model("or"); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, ConvertBitwiseToLogical_xor_boolean) { + auto transform = manager.register_pass(); + transform->add_matcher(); + model = create_bitwise_model("xor", element::boolean); + model_ref = create_logical_model("xor"); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} diff --git a/src/frontends/pytorch/src/op/bitwise.cpp b/src/frontends/pytorch/src/op/bitwise.cpp index 673ba77dda14ac..84465502969d81 100644 --- a/src/frontends/pytorch/src/op/bitwise.cpp +++ b/src/frontends/pytorch/src/op/bitwise.cpp @@ -3,10 +3,10 @@ // #include "openvino/frontend/pytorch/node_context.hpp" -#include "openvino/op/logical_and.hpp" -#include "openvino/op/logical_not.hpp" -#include "openvino/op/logical_or.hpp" -#include "openvino/op/logical_xor.hpp" +#include "openvino/op/bitwise_and.hpp" +#include "openvino/op/bitwise_not.hpp" +#include "openvino/op/bitwise_or.hpp" +#include "openvino/op/bitwise_xor.hpp" #include "utils.hpp" namespace ov { @@ -17,9 +17,7 @@ namespace op { OutputVector translate_bitwise_not(const NodeContext& context) { num_inputs_check(context, 1, 2); auto x = context.get_input(0); - FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not supported only for boolean input"); - auto not_x = context.mark_node(std::make_shared(x)); + auto not_x = context.mark_node(std::make_shared(x)); if (!context.input_is_none(1)) { context.mutate_input(1, not_x); } @@ -27,32 +25,38 @@ OutputVector translate_bitwise_not(const NodeContext& context) { }; OutputVector translate_bitwise_and(const NodeContext& context) { - num_inputs_check(context, 2, 2); + num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); - FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not supported only for boolean input"); - auto and_x = context.mark_node(std::make_shared(x, y)); + align_eltwise_input_types(context, x, y, false); + auto and_x = context.mark_node(std::make_shared(x, y)); + if (!context.input_is_none(2)) { + context.mutate_input(2, and_x); + } return {and_x}; }; OutputVector translate_bitwise_or(const NodeContext& context) { - num_inputs_check(context, 2, 2); + num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); - FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not supported only for boolean input"); - auto or_x = context.mark_node(std::make_shared(x, y)); + align_eltwise_input_types(context, x, y, false); + auto or_x = context.mark_node(std::make_shared(x, y)); + if (!context.input_is_none(2)) { + context.mutate_input(2, or_x); + } return {or_x}; }; OutputVector translate_bitwise_xor(const NodeContext& context) { - num_inputs_check(context, 2, 2); + num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); - FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_xor supported only for boolean input"); - auto xor_x = context.mark_node(std::make_shared(x, y)); + align_eltwise_input_types(context, x, y, false); + auto xor_x = context.mark_node(std::make_shared(x, y)); + if (!context.input_is_none(2)) { + context.mutate_input(2, xor_x); + } return {xor_x}; }; diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index de030ade50d797..3f71d22e428c5f 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -43,6 +43,7 @@ OP_CONVERTER(translate_batch_norm); OP_CONVERTER(translate_bitwise_and); OP_CONVERTER(translate_bitwise_not); OP_CONVERTER(translate_bitwise_or); +OP_CONVERTER(translate_bitwise_xor); OP_CONVERTER(translate_cat); OP_CONVERTER(translate_cdist); OP_CONVERTER(translate_channel_shuffle); @@ -230,11 +231,11 @@ OP_CONVERTER(translate_transpose_fx); // Supported ops for TorchScript const std::map get_supported_ops_ts() { return { - {"aten::__and__", op::translate_and}, + {"aten::__and__", op::translate_bitwise_and}, {"aten::__derive_index", op::translate_derive_index}, {"aten::__getitem__", op::translate_getitem}, {"aten::__not__", op::translate_1to1_match_1_inputs}, - {"aten::__or__", op::translate_or}, + {"aten::__or__", op::translate_bitwise_or}, {"aten::__xor__", op::translate_bitwise_xor}, {"aten::__range_length", op::translate_range_length}, {"aten::_convolution", op::translate_convolution}, @@ -280,7 +281,10 @@ const std::map get_supported_ops_ts() { {"aten::broadcast_to", op::translate_expand}, {"aten::baddbmm", op::translate_addmm}, {"aten::batch_norm", op::translate_batch_norm}, + {"aten::bitwise_and", op::translate_bitwise_and}, {"aten::bitwise_not", op::translate_bitwise_not}, + {"aten::bitwise_or", op::translate_bitwise_or}, + {"aten::bitwise_xor", op::translate_bitwise_xor}, {"aten::bmm", op::translate_1to1_match_2_inputs}, {"aten::Bool", op::translate_bool}, {"aten::cat", op::translate_cat}, diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index e979270fee3318..590af95e01812d 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -33,6 +33,7 @@ #include "transformations/control_flow/unroll_tensor_iterator.hpp" #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/op_conversions/convert_batch_to_space.hpp" +#include "transformations/op_conversions/convert_bitwise_to_logical_bool.hpp" #include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" #include "transformations/op_conversions/convert_depth_to_space.hpp" #include "transformations/op_conversions/convert_gather_downgrade.hpp" @@ -444,6 +445,11 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_ENABLE_PASS_COMMON(manager, ov::pass::ConvertDetectionOutput1ToDetectionOutput8); CPU_ENABLE_PASS_COMMON(manager, ov::pass::ConvertROIAlign3To9); + CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertBitwiseAndToLogicalAnd); + CPU_ENABLE_PASS_COMMON(manager, ov::pass::ConvertBitwiseNotToLogicalNot); + CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertBitwiseOrToLogicalOr); + CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertBitwiseXorToLogicalXor); + if (useLpt) { CPU_LPT_SCOPE(LowPrecisionTransformations_Part3); CPU_SET_CALLBACK_COMMON(manager, diff --git a/tests/layer_tests/pytorch_tests/test_and.py b/tests/layer_tests/pytorch_tests/test_and.py index 815cfede7d0fa8..07e9b1d660d6a3 100644 --- a/tests/layer_tests/pytorch_tests/test_and.py +++ b/tests/layer_tests/pytorch_tests/test_and.py @@ -9,13 +9,11 @@ class TestAnd(PytorchLayerTest): - def _prepare_input(self): return self.input_data def create_model_tensor_input(self): class aten_and_tensor(torch.nn.Module): - def __init__(self) -> None: super().__init__() @@ -25,10 +23,9 @@ def forward(self, tensor_a, tensor_b): ref_net = None return aten_and_tensor(), ref_net, "aten::__and__" - + def create_model_bool_input(self): class aten_and_bool(torch.nn.Module): - def __init__(self) -> None: super().__init__() @@ -39,18 +36,43 @@ def forward(self, bool_a: bool, bool_b: bool): return aten_and_bool(), ref_net, "aten::__and__" + def create_model_int_input(self): + class aten_and_int(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, int_a: int, int_b: int): + return int_a & int_b + + ref_net = None + + return aten_and_int(), ref_net, "aten::__and__" + @pytest.mark.nightly @pytest.mark.precommit def test_and_tensor(self, ie_device, precision, ir_version): - self.input_data = (np.array([True, False, False], dtype=np.bool_), np.array( - [True, True, False], dtype=np.bool_)) - self._test(*self.create_model_tensor_input(), - ie_device, precision, ir_version) + self.input_data = ( + np.array([True, False, False], dtype=np.bool_), + np.array([True, True, False], dtype=np.bool_), + ) + self._test(*self.create_model_tensor_input(), ie_device, precision, ir_version) @pytest.mark.nightly @pytest.mark.precommit def test_and_bool(self, ie_device, precision, ir_version): - self.input_data = (np.array(True, dtype=np.bool_), - np.array(True, dtype=np.bool_)) - self._test(*self.create_model_bool_input(), - ie_device, precision, ir_version) + self.input_data = (np.array(True, dtype=np.bool_), np.array(True, dtype=np.bool_)) + self._test(*self.create_model_bool_input(), ie_device, precision, ir_version) + + @pytest.mark.nightly + @pytest.mark.precommit + def test_and_int(self, ie_device, precision, ir_version): + self.input_data = (np.array(3, dtype=np.int32), np.array(4, dtype=np.int32)) + self._test(*self.create_model_int_input(), ie_device, precision, ir_version) + + @pytest.mark.nightly + @pytest.mark.precommit + def test_and_tensor(self, ie_device, precision, ir_version): + self.input_data = (np.array([3, 5, 8], dtype=np.int32), np.array([7, 11, 2], dtype=np.int32)) + self._test( + *self.create_model_tensor_input(), ie_device, precision, ir_version, freeze_model=False, trace_model=True + ) diff --git a/tests/layer_tests/pytorch_tests/test_bitwise_not.py b/tests/layer_tests/pytorch_tests/test_bitwise_not.py deleted file mode 100644 index c303fd1e1b132a..00000000000000 --- a/tests/layer_tests/pytorch_tests/test_bitwise_not.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2018-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import pytest - -from pytorch_layer_test_class import PytorchLayerTest - - -class TestBitwiseNot(PytorchLayerTest): - def _prepare_input(self): - import numpy as np - return ((np.random.randn(1, 5) > 0).astype(bool),) - - def create_model(self): - import torch - - class aten_bitwise_not(torch.nn.Module): - - def forward(self, x): - return torch.bitwise_not(x) - - ref_net = None - - return aten_bitwise_not(), ref_net, "aten::bitwise_not" - - @pytest.mark.nightly - @pytest.mark.precommit - def test_bitwise_not(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py new file mode 100644 index 00000000000000..b5c8b456fabd83 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py @@ -0,0 +1,132 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch +from pytorch_layer_test_class import PytorchLayerTest + + +class TestBitwiseOp(PytorchLayerTest): + def _prepare_input(self, out, unary, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape): + choices = np.array([0, 1, 255, 7]) + x = np.random.choice(choices, lhs_shape).astype(lhs_dtype) + if unary: + return (x,) if not out else (x, np.zeros_like(x).astype(lhs_dtype)) + y = np.random.choice(choices, rhs_shape).astype(rhs_dtype) + if not out: + return x, y + return x, y, np.zeros_like(x).astype(lhs_dtype) + np.zeros_like(y).astype(rhs_dtype) + + def create_model(self, op_name, out): + ops = { + "and": torch.bitwise_and, + "or": torch.bitwise_or, + "xor": torch.bitwise_xor, + "not": torch.bitwise_not, + } + op = ops[op_name] + + class aten_bitwise(torch.nn.Module): + def __init__(self, op, out) -> None: + super().__init__() + self.op = op + if op == torch.bitwise_not: + self.forward = self.forward_not + if out: + self.forward = self.forward_out if not op == torch.bitwise_not else self.forward_not_out + + def forward(self, tensor_a, tensor_b): + return self.op(tensor_a, tensor_b) + + def forward_out(self, tensor_a, tensor_b, out): + return self.op(tensor_a, tensor_b, out=out), out + + def forward_not(self, tensor_a): + return self.op(tensor_a) + + def forward_not_out(self, tensor_a, out): + return self.op(tensor_a, out=out), out + + ref_net = None + + return aten_bitwise(op, out), ref_net, f"aten::bitwise_{op_name}" + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("op_type", ["and", "or", "not", "xor"]) + @pytest.mark.parametrize("lhs_dtype", ["bool", "int32", "uint8", "int64"]) + @pytest.mark.parametrize("rhs_dtype", ["bool", "int32", "uint8", "int64"]) + @pytest.mark.parametrize( + ("lhs_shape", "rhs_shape"), + [ + ([2, 3], [2, 3]), + ([2, 3], []), + ([], [2, 3]), + ], + ) + @pytest.mark.parametrize("out", [False, True]) + def test_bitwise_mixed_dtypes( + self, op_type, out, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape, ie_device, precision, ir_version + ): + self._test( + *self.create_model(op_type, out), + ie_device, + precision, + ir_version, + kwargs_to_prepare_input={ + "out": out, + "unary": op_type == "not", + "lhs_dtype": lhs_dtype, + "rhs_dtype": rhs_dtype, + "lhs_shape": lhs_shape, + "rhs_shape": rhs_shape, + }, + freeze_model=False, + trace_model=True, + ) + + +class TestBitwiseOperators(PytorchLayerTest): + def _prepare_input(self, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape): + choices = np.array([0, 1, 255, 7]) + x = np.random.choice(choices, lhs_shape).astype(lhs_dtype) + y = np.random.choice(choices, rhs_shape).astype(rhs_dtype) + return x, y + + def create_model(self): + class aten_bitwise(torch.nn.Module): + def forward(self, lhs, rhs): + return lhs & rhs, ~lhs, lhs | rhs, lhs ^ rhs + + ref_net = None + + return aten_bitwise(), ref_net, ("aten::__and__", "aten::bitwise_not", "aten::__or__", "aten::__xor__") + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("lhs_dtype", ["bool", "int32"]) + @pytest.mark.parametrize("rhs_dtype", ["bool", "int32"]) + @pytest.mark.parametrize( + ("lhs_shape", "rhs_shape"), + [ + ([2, 3], [2, 3]), + ([2, 3], []), + ([], [2, 3]), + ], + ) + def test_bitwise_operators(self, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape, ie_device, precision, ir_version): + self._test( + *self.create_model(), + ie_device, + precision, + ir_version, + kwargs_to_prepare_input={ + "lhs_dtype": lhs_dtype, + "rhs_dtype": rhs_dtype, + "lhs_shape": lhs_shape, + "rhs_shape": rhs_shape, + }, + trace_model=True, + freeze_model=False, + ) diff --git a/tests/layer_tests/pytorch_tests/test_or.py b/tests/layer_tests/pytorch_tests/test_or.py index bde1e61ecce74d..e096e125865174 100644 --- a/tests/layer_tests/pytorch_tests/test_or.py +++ b/tests/layer_tests/pytorch_tests/test_or.py @@ -1,29 +1,78 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import numpy as np import pytest +import torch + from pytorch_layer_test_class import PytorchLayerTest -class TestLog(PytorchLayerTest): +class TestOr(PytorchLayerTest): def _prepare_input(self): - import numpy as np - return (np.random.randint(0, 255, (20, 30, 40, 50)),) + return self.input_data + + def create_model_tensor_input(self): + class aten_or_tensor(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, tensor_a, tensor_b): + return tensor_a | tensor_b + + ref_net = None + + return aten_or_tensor(), ref_net, "aten::__or__" + + def create_model_bool_input(self): + class aten_or_bool(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, bool_a: bool, bool_b: bool): + return bool_a | bool_b + + ref_net = None + + return aten_or_bool(), ref_net, "aten::__or__" + + def create_model_int_input(self): + class aten_or_int(torch.nn.Module): + def __init__(self) -> None: + super().__init__() - def create_model(self): - import torch + def forward(self, int_a: int, int_b: int): + return int_a | int_b - class aten_or(torch.nn.Module): - def forward(self, x): - res = torch.ByteTensor(x.size()).zero_() - res[:, :, :, 1:] = res[:, :, :, 1:] | (x[:, :, :, 1:] != x[:, :, :, :-1]) - res[:, :, :, :-1] = res[:, :, :, :-1] | (x[:, :, :, 1:] != x[:, :, :, :-1]) - return res.float() + ref_net = None - return aten_or(), None, "aten::__or__" + return aten_or_int(), ref_net, "aten::__or__" + + @pytest.mark.nightly + @pytest.mark.precommit + def test_or_tensor(self, ie_device, precision, ir_version): + self.input_data = ( + np.array([True, False, False], dtype=np.bool_), + np.array([True, True, False], dtype=np.bool_), + ) + self._test(*self.create_model_tensor_input(), ie_device, precision, ir_version) + + @pytest.mark.nightly + @pytest.mark.precommit + def test_or_bool(self, ie_device, precision, ir_version): + self.input_data = (np.array(True, dtype=np.bool_), np.array(True, dtype=np.bool_)) + self._test(*self.create_model_bool_input(), ie_device, precision, ir_version) + + @pytest.mark.nightly + @pytest.mark.precommit + def test_or_int(self, ie_device, precision, ir_version): + self.input_data = (np.array(3, dtype=np.int32), np.array(4, dtype=np.int32)) + self._test(*self.create_model_int_input(), ie_device, precision, ir_version) @pytest.mark.nightly @pytest.mark.precommit - def test_or(self, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, - dynamic_shapes=False, trace_model=True, use_convert_model=True) + def test_or_tensor(self, ie_device, precision, ir_version): + self.input_data = (np.array([3, 5, 8], dtype=np.int32), np.array([7, 11, 2], dtype=np.int32)) + self._test( + *self.create_model_tensor_input(), ie_device, precision, ir_version, freeze_model=False, trace_model=True + ) diff --git a/tests/layer_tests/pytorch_tests/test_xor.py b/tests/layer_tests/pytorch_tests/test_xor.py index 6fc9b467a78bf2..c1d3730bb15066 100644 --- a/tests/layer_tests/pytorch_tests/test_xor.py +++ b/tests/layer_tests/pytorch_tests/test_xor.py @@ -9,13 +9,11 @@ class TestXor(PytorchLayerTest): - def _prepare_input(self): return self.input_data def create_model_tensor_input(self): class aten_xor_tensor(torch.nn.Module): - def __init__(self) -> None: super().__init__() @@ -28,7 +26,6 @@ def forward(self, tensor_a, tensor_b): def create_model_bool_input(self): class aten_xor_bool(torch.nn.Module): - def __init__(self) -> None: super().__init__() @@ -41,7 +38,6 @@ def forward(self, bool_a: bool, bool_b: bool): def create_model_int_input(self): class aten_xor_int(torch.nn.Module): - def __init__(self) -> None: super().__init__() @@ -55,33 +51,28 @@ def forward(self, int_a: int, int_b: int): @pytest.mark.nightly @pytest.mark.precommit def test_xor_tensor(self, ie_device, precision, ir_version): - self.input_data = (np.array([True, False, False], dtype=np.bool_), np.array( - [True, True, False], dtype=np.bool_)) - self._test(*self.create_model_tensor_input(), - ie_device, precision, ir_version) + self.input_data = ( + np.array([True, False, False], dtype=np.bool_), + np.array([True, True, False], dtype=np.bool_), + ) + self._test(*self.create_model_tensor_input(), ie_device, precision, ir_version) @pytest.mark.nightly @pytest.mark.precommit def test_xor_bool(self, ie_device, precision, ir_version): - self.input_data = (np.array(True, dtype=np.bool_), - np.array(True, dtype=np.bool_)) - self._test(*self.create_model_bool_input(), - ie_device, precision, ir_version) + self.input_data = (np.array(True, dtype=np.bool_), np.array(True, dtype=np.bool_)) + self._test(*self.create_model_bool_input(), ie_device, precision, ir_version) - @pytest.mark.xfail(reason="bitwise_xor is not implemented") @pytest.mark.nightly @pytest.mark.precommit def test_xor_int(self, ie_device, precision, ir_version): - self.input_data = (np.array(3, dtype=np.int), - np.array(4, dtype=np.int)) - self._test(*self.create_model_int_input(), - ie_device, precision, ir_version) + self.input_data = (np.array(3, dtype=np.int32), np.array(4, dtype=np.int32)) + self._test(*self.create_model_int_input(), ie_device, precision, ir_version) - @pytest.mark.xfail(reason="bitwise_xor is not implemented") @pytest.mark.nightly @pytest.mark.precommit def test_xor_tensor(self, ie_device, precision, ir_version): - self.input_data = (np.array([3, 5, 8], dtype=np.int), np.array( - [7, 11, 2], dtype=np.int)) - self._test(*self.create_model_tensor_input(), - ie_device, precision, ir_version) + self.input_data = (np.array([3, 5, 8], dtype=np.int32), np.array([7, 11, 2], dtype=np.int32)) + self._test( + *self.create_model_tensor_input(), ie_device, precision, ir_version, freeze_model=False, trace_model=True + ) From 56cb121e8a5aa1381c325d0b5dc093cb1a894e24 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 30 Oct 2023 14:17:02 +0100 Subject: [PATCH 124/275] Fix coverity issues (#20743) --- src/frontends/pytorch/src/input_model.hpp | 2 +- src/frontends/pytorch/src/op/avg_poolnd.cpp | 3 ++- src/frontends/pytorch/src/op/max_poolnd.cpp | 2 +- src/frontends/pytorch/src/op/pixel_shuffle.cpp | 3 ++- src/frontends/pytorch/src/op/pythonop.cpp | 2 +- src/frontends/pytorch/src/op/scatter.cpp | 12 ++++++++---- src/frontends/pytorch/src/transforms.cpp | 2 +- .../src/transforms/append_list_unpack_replacer.cpp | 2 +- .../pytorch/src/transforms/aten_cat_replacer.cpp | 4 ++-- .../src/transforms/index_loop_getitem_replacer.cpp | 2 +- .../src/transforms/prim_list_unpack_replacer.cpp | 14 +++++++------- .../prim_tuple_unpack_parameter_replacer.cpp | 2 +- .../src/transforms/rfftn_complex_replacer.cpp | 2 +- .../src/transforms/tuple_unpack_replacer.cpp | 10 +++++----- .../pytorch/src/transforms/u4_block_repack.cpp | 3 ++- src/frontends/pytorch/src/translate_session.cpp | 6 +++--- 16 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/frontends/pytorch/src/input_model.hpp b/src/frontends/pytorch/src/input_model.hpp index 9b322e6538983c..d0295f12de12eb 100644 --- a/src/frontends/pytorch/src/input_model.hpp +++ b/src/frontends/pytorch/src/input_model.hpp @@ -17,7 +17,7 @@ class Place; class TorchDecoder; struct PlaceDesc { - PlaceDesc(std::shared_ptr value) : m_value(value) {} + PlaceDesc(const std::shared_ptr& value) : m_value(value) {} std::shared_ptr m_value; }; diff --git a/src/frontends/pytorch/src/op/avg_poolnd.cpp b/src/frontends/pytorch/src/op/avg_poolnd.cpp index 9a9cf28bad4782..486d4dfb0980c6 100644 --- a/src/frontends/pytorch/src/op/avg_poolnd.cpp +++ b/src/frontends/pytorch/src/op/avg_poolnd.cpp @@ -60,7 +60,8 @@ OutputVector translate_avg_poolnd(const NodeContext& context) { auto pads_len = context.mark_node(v0::Constant::create(element::i32, Shape{}, {pads.size()})); auto pads_diff = context.mark_node(std::make_shared(rank, pads_len)); auto pads_remaining = context.mark_node(std::make_shared(zero_i32, pads_diff)); - auto padding = context.mark_node(std::make_shared(OutputVector{pads_remaining, pad_values}, 0)); + auto padding = context.mark_node( + std::make_shared(OutputVector{std::move(pads_remaining), std::move(pad_values)}, 0)); input = context.mark_node(std::make_shared(input, padding, padding, zero, ov::op::PadMode::CONSTANT)); pads = Shape(pads.size(), 0); } diff --git a/src/frontends/pytorch/src/op/max_poolnd.cpp b/src/frontends/pytorch/src/op/max_poolnd.cpp index 30bf1546ec87e8..917431ba726a1b 100644 --- a/src/frontends/pytorch/src/op/max_poolnd.cpp +++ b/src/frontends/pytorch/src/op/max_poolnd.cpp @@ -113,7 +113,7 @@ OutputVector translate_max_poolnd(const NodeContext& context) { if (context.get_output_size() == 2) { auto out1 = res->output(0); auto out2 = res->output(1); - return {out1, out2}; + return {std::move(out1), std::move(out2)}; } else { return {res}; } diff --git a/src/frontends/pytorch/src/op/pixel_shuffle.cpp b/src/frontends/pytorch/src/op/pixel_shuffle.cpp index fc35b44d9a304a..1e83f909e18a0a 100644 --- a/src/frontends/pytorch/src/op/pixel_shuffle.cpp +++ b/src/frontends/pytorch/src/op/pixel_shuffle.cpp @@ -127,7 +127,8 @@ OutputVector translate_channel_shuffle(const NodeContext& context) { auto k = context.mark_node(std::make_shared(c, groups, true)); auto g = context.mark_node(std::make_shared(groups, zero)); // 1. Reshape input [N, G, K=C/G, -1] - auto reshape_indices = context.mark_node(std::make_shared(OutputVector{n, g, k, neg_1}, 0)); + auto reshape_indices = context.mark_node( + std::make_shared(OutputVector{std::move(n), std::move(g), std::move(k), std::move(neg_1)}, 0)); x = context.mark_node(std::make_shared(x, reshape_indices, false)); // 2. Transpose to [N, K, G, -1] auto permute_indices = context.mark_node(v0::Constant::create(element::i32, Shape{4}, {0, 2, 1, 3})); diff --git a/src/frontends/pytorch/src/op/pythonop.cpp b/src/frontends/pytorch/src/op/pythonop.cpp index 4aa142f04b58ed..ccaac4a4909004 100644 --- a/src/frontends/pytorch/src/op/pythonop.cpp +++ b/src/frontends/pytorch/src/op/pythonop.cpp @@ -33,7 +33,7 @@ OutputVector translate_pythonop(const NodeContext& context) { } OutputVector outputs{}; - for (auto result : body->get_results()) { + for (auto& result : body->get_results()) { auto output = result->get_input_source_output(0); outputs.push_back(context.mark_output(output)); } diff --git a/src/frontends/pytorch/src/op/scatter.cpp b/src/frontends/pytorch/src/op/scatter.cpp index 4c60f12eed546b..68abc51298265b 100644 --- a/src/frontends/pytorch/src/op/scatter.cpp +++ b/src/frontends/pytorch/src/op/scatter.cpp @@ -18,7 +18,10 @@ namespace op { using namespace ov::op; namespace { -Output prepare_source(const NodeContext& context, Output src, Output index, Output input) { +Output prepare_source(const NodeContext& context, + const Output& src, + const Output& index, + const Output& input) { auto src_partial_shape = src.get_partial_shape(); auto index_shape_rank = get_shape_rank(context, index); auto index_shape = std::get<0>(index_shape_rank); @@ -28,8 +31,9 @@ Output prepare_source(const NodeContext& context, Output src, Output // into shape of indices. // TODO: Figure out way to dynamically broadcast scalar src only, without affecting Tensor src. Current // implementation will fail if Scalar source would have dynamic rank. + auto _src = std::move(src); if (src_partial_shape.rank().is_static() && src_partial_shape.rank().get_length() == 0) { - src = context.mark_node(std::make_shared(src, index_shape)); + _src = context.mark_node(std::make_shared(_src, index_shape)); } auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); @@ -38,13 +42,13 @@ Output prepare_source(const NodeContext& context, Output src, Output auto ones = context.mark_node(std::make_shared(const_1, index_rank)); // In torch indices can be of different shape than source tensor. Create slice to trim source tensor to shape of // indices. - auto src_pruned = context.mark_node(std::make_shared(src, zeros, index_shape, ones)); + auto src_pruned = context.mark_node(std::make_shared(_src, zeros, index_shape, ones)); auto src_input_dtype = context.mark_node(std::make_shared(src_pruned, input)); return src_input_dtype; }; -const v12::ScatterElementsUpdate::Reduction get_reduction_mode(std::string pt_reduce_mode) { +const v12::ScatterElementsUpdate::Reduction get_reduction_mode(const std::string& pt_reduce_mode) { static const std::unordered_map TORCH_REDUCTION_TO_OV{ {"add", v12::ScatterElementsUpdate::Reduction::SUM}, {"multiply", v12::ScatterElementsUpdate::Reduction::PROD}, diff --git a/src/frontends/pytorch/src/transforms.cpp b/src/frontends/pytorch/src/transforms.cpp index 976a99fe1723a2..962c254fbb8130 100644 --- a/src/frontends/pytorch/src/transforms.cpp +++ b/src/frontends/pytorch/src/transforms.cpp @@ -346,7 +346,7 @@ class DecomposeListResults : public pass::ModelPass { // Replace a single result with 6 results, per each input of parent list_pack auto inputs = list_pack->inputs(); - for (auto input : inputs) { + for (auto& input : inputs) { model->add_results({make_shared(input.get_source_output())}); // TODO: Keep tracking between original and new Results } diff --git a/src/frontends/pytorch/src/transforms/append_list_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/append_list_unpack_replacer.cpp index d3dfc7467b313b..796c2125a7da8b 100644 --- a/src/frontends/pytorch/src/transforms/append_list_unpack_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/append_list_unpack_replacer.cpp @@ -76,7 +76,7 @@ AppendListUnpackReplacer::AppendListUnpackReplacer() { auto split = std::make_shared(inputs[index], axis_0, list_unpack->get_output_size()); NodeVector to_copy_rt{axis_0, split}; OutputVector res; - for (auto output : split->outputs()) { + for (auto& output : split->outputs()) { auto squeeze = std::make_shared(output, axis_0); to_copy_rt.push_back(squeeze); res.push_back(squeeze); diff --git a/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp index fe0c828a33c082..38352babc7c96c 100644 --- a/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_cat_replacer.cpp @@ -73,7 +73,7 @@ AtenCatToConcat::AtenCatToConcat() { auto body = loop->get_function(); auto output_index = cat->input(0).get_source_output().get_index(); int64_t body_result_index = -1; - for (auto out_desc : loop->get_output_descriptions()) { + for (auto& out_desc : loop->get_output_descriptions()) { if (out_desc->m_output_index == output_index) { body_result_index = static_cast(out_desc->m_body_value_index); break; @@ -99,7 +99,7 @@ AtenCatToConcat::AtenCatToConcat() { auto body_param_index = body->get_parameter_index(param); FRONT_END_GENERAL_CHECK(body_param_index >= 0, "Couldn't find parameter in body parameters."); int64_t input_index = -1; - for (auto in_desc : loop->get_input_descriptions()) { + for (auto& in_desc : loop->get_input_descriptions()) { if (in_desc->m_body_parameter_index == static_cast(body_param_index)) { input_index = static_cast(in_desc->m_input_index); break; diff --git a/src/frontends/pytorch/src/transforms/index_loop_getitem_replacer.cpp b/src/frontends/pytorch/src/transforms/index_loop_getitem_replacer.cpp index 34154fd9cc43df..3ad535ba412561 100644 --- a/src/frontends/pytorch/src/transforms/index_loop_getitem_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/index_loop_getitem_replacer.cpp @@ -62,7 +62,7 @@ IndexLoopGetitemReplacer::IndexLoopGetitemReplacer() { auto body = loop_op->get_function(); std::shared_ptr chunk_param; - for (auto input_desc : loop_op->get_input_descriptions()) { + for (auto& input_desc : loop_op->get_input_descriptions()) { if (input_desc->m_input_index == chunk_idx) { chunk_param = body->get_parameters().at(input_desc->m_body_parameter_index); break; diff --git a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp index e5fa463af31d00..2a59a27e01fe12 100644 --- a/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/prim_list_unpack_replacer.cpp @@ -181,14 +181,14 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { return false; } Output final_shape_t = opset10::Constant::create(element::i32, Shape{}, {0}); - for (auto input : tensors->inputs()) { + for (auto& input : tensors->inputs()) { auto tensor_shape = rg.make(input.get_source_output(), element::i32); final_shape_t = rg.make(final_shape_t, tensor_shape, ov::op::BroadcastType::BIDIRECTIONAL); } auto final_shape = rg.make(final_shape_t, element::i32); OutputVector outputs; - for (auto input : tensors->inputs()) { + for (auto& input : tensors->inputs()) { outputs.push_back(rg.make(input.get_source_output(), final_shape)); } copy_runtime_info_and_name(list_unpack, rg.get(), {input_node}); @@ -202,7 +202,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { const auto num_splits = list_unpack->get_output_size(); auto split = rg.make(input, axis, num_splits); OutputVector outputs; - for (auto output : split->outputs()) { + for (auto& output : split->outputs()) { const auto squeeze = rg.make(output, axis); outputs.push_back(squeeze); } @@ -218,7 +218,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { const auto num_splits = list_unpack->get_output_size(); auto split = rg.make(non_zero, axis, num_splits); OutputVector outputs; - for (auto output : split->outputs()) { + for (auto& output : split->outputs()) { const auto squeeze = rg.make(output, axis); outputs.push_back(squeeze); } @@ -234,7 +234,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { const auto num_splits = list_unpack->get_output_size(); auto split = rg.make(non_zero, axis, num_splits); OutputVector outputs; - for (auto output : split->outputs()) { + for (auto& output : split->outputs()) { const auto squeeze = rg.make(output, axis); outputs.push_back(squeeze); } @@ -310,7 +310,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { auto split = rg.make(shape_of, axis_0, list_unpack->get_output_size()); OutputVector res; - for (auto output : split->outputs()) { + for (auto& output : split->outputs()) { auto squeeze = rg.make(output, axis_0); res.push_back(squeeze); } @@ -328,7 +328,7 @@ PrimListUnpackReplacer::PrimListUnpackReplacer() { auto split = rg.make(slice, axis_0, list_unpack->get_output_size()); OutputVector res; - for (auto output : split->outputs()) { + for (auto& output : split->outputs()) { auto squeeze = rg.make(output, axis_0); res.push_back(squeeze); } diff --git a/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.cpp b/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.cpp index 12577daa6f2456..f18befab77927f 100644 --- a/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/prim_tuple_unpack_parameter_replacer.cpp @@ -94,7 +94,7 @@ bool DecomposeTupleParameters::run_on_model(const std::shared_ptr& model) auto new_parameter = std::make_shared(et, ps); - for (auto input : inputs) { + for (auto& input : inputs) { auto names = input.get_tensor().get_names(); input.replace_source_output(new_parameter->output(0)); new_parameter->output(0).add_names(names); diff --git a/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp b/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp index 82fd18f44f2029..01e99609a0df56 100644 --- a/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/rfftn_complex_replacer.cpp @@ -130,7 +130,7 @@ RFFTNComplexReplacer::RFFTNComplexReplacer() { auto normalized_rfftn_splitted = std::make_shared(normalized_rfftn, const_neg_1, 2); auto rfftn_outs = rfftn_op->get_users(); bool rval = false; - for (auto out : rfftn_outs) { + for (auto& out : rfftn_outs) { if (auto real_op = cast_fw_node(out, "aten::real")) { auto squeezed = std::make_shared(normalized_rfftn_splitted->output(0), const_neg_1); copy_runtime_info({rfftn_op, real_op}, squeezed); diff --git a/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp b/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp index 5884ae6d590eaf..5f9d72dcf7081b 100644 --- a/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/tuple_unpack_replacer.cpp @@ -47,7 +47,7 @@ PrimTupleUnpackReplacer::PrimTupleUnpackReplacer() { bool TupleUnpackInBodyReplacer::run_on_model(const std::shared_ptr& model) { bool result = false; - for (auto op : model->get_ordered_ops()) { + for (auto& op : model->get_ordered_ops()) { const auto if_op = as_type_ptr(op); if (if_op) { for (size_t i = 1; i < if_op->get_input_size(); i++) { @@ -61,7 +61,7 @@ bool TupleUnpackInBodyReplacer::run_on_model(const std::shared_ptr& model int else_body_idx = -1; auto then_descs = if_op->get_input_descriptions(v8::If::THEN_BODY_INDEX); auto else_descs = if_op->get_input_descriptions(v8::If::ELSE_BODY_INDEX); - for (auto inp_desc : then_descs) { + for (auto& inp_desc : then_descs) { if (inp_desc->m_input_index == i) { if (then_body_idx != -1) { add_exception_to_fw_node( @@ -72,7 +72,7 @@ bool TupleUnpackInBodyReplacer::run_on_model(const std::shared_ptr& model } } } - for (auto inp_desc : else_descs) { + for (auto& inp_desc : else_descs) { if (inp_desc->m_input_index == i) { if (else_body_idx != -1) { add_exception_to_fw_node( @@ -130,10 +130,10 @@ bool TupleUnpackInBodyReplacer::run_on_model(const std::shared_ptr& model // create new If inputs std::vector> inputs_mapping(if_op->get_input_size(), {-1, -1}); - for (auto inp_desc : then_descs) { + for (auto& inp_desc : then_descs) { inputs_mapping[inp_desc->m_input_index].first = static_cast(inp_desc->m_body_parameter_index); } - for (auto inp_desc : else_descs) { + for (auto& inp_desc : else_descs) { inputs_mapping[inp_desc->m_input_index].second = static_cast(inp_desc->m_body_parameter_index); } for (size_t j = 0; j < inputs_mapping.size(); j++) { diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp index e08ebd728b050e..9dcd4569ea8f66 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -85,7 +85,8 @@ U4BlockRepack::U4BlockRepack() { } } - copy_runtime_info(NodeVector{constant, reshape1, transpose, reshape2}, new_const); + copy_runtime_info({std::move(constant), std::move(reshape1), std::move(transpose), std::move(reshape2)}, + new_const); replace_node(reshape2, new_const); return true; diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 9d5c719a3afa78..f08a7d08c7a36a 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -94,7 +94,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( if (input_model) { // When we have input model we should use its inputs order to create Parameters // We use m_inputs instead of get_inputs() because latter doesn't have "self" input - for (auto input_p : input_model->m_inputs) { + for (auto& input_p : input_model->m_inputs) { auto pytorch_place = std::dynamic_pointer_cast(input_p); FRONT_END_GENERAL_CHECK(pytorch_place, "Only place produced by PyTorch Frontend is supported."); auto tensor_id = pytorch_place->get_tensor_index(); @@ -108,7 +108,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( (*tensor_map)[tensor_id] = parameter; } // Add all tensors that were frozen - for (auto desc : input_model->m_descriptors) { + for (auto& desc : input_model->m_descriptors) { (*tensor_map)[desc.first] = desc.second.m_value; } } else { @@ -225,7 +225,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( ResultVector results; if (input_model) { // For the case when we have InputModel we need to have same order as its outputs - for (auto output_p : input_model->get_outputs()) { + for (auto& output_p : input_model->get_outputs()) { auto pytorch_place = std::dynamic_pointer_cast(output_p); FRONT_END_GENERAL_CHECK(pytorch_place, "Only place produced by PyTorch Frontend is supported."); auto tensor_id = pytorch_place->get_tensor_index(); From 439b7e57c736160e481e9d3040165a727b68297b Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 30 Oct 2023 14:23:02 +0100 Subject: [PATCH 125/275] [GHA] Add torch.compile layer tests to GHA (#20655) * [GHA] Add torch.compile layer tests to GHA * Update pytorch_layer_test_class.py --- .github/workflows/linux.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 888358d10b1182..7d346f98911bb0 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -1018,7 +1018,23 @@ jobs: run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -n logical -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml env: TEST_DEVICE: CPU - TEST_PRECISION: FP16 + TEST_PRECISION: FP32 + + - name: PyTorch torch.compile TORCHFX Layer Tests + run: | + python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit_fx_backend --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP32 + PYTORCH_TRACING_MODE: TORCHFX + + - name: PyTorch torch.compile TORCHSCRIPT Layer Tests + run: | + python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit_ts_backend --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP32 + PYTORCH_TRACING_MODE: TORCHSCRIPT - name: ONNX Layer Tests run: | From 061d6b5b6228c794b33acb41c26b891632fc75a2 Mon Sep 17 00:00:00 2001 From: Sofya Balandina Date: Mon, 30 Oct 2023 13:45:46 +0000 Subject: [PATCH 126/275] [apiConformance] Move sw plugin to run with Template only (#20084) * [apiConformance] Move sw plugin to run over TEMPLATE * Fix set_property * update * add configs for sw plugins --- .../include/api_conformance_helpers.hpp | 111 ------------------ .../include/ov_api_conformance_helpers.hpp | 53 ++++++--- .../ov_executable_network/exec_graph_info.cpp | 4 +- .../exec_network_base.cpp | 4 +- .../ov_executable_network/get_metric.cpp | 8 +- .../ov_executable_network/properties.cpp | 34 ++---- .../behavior/ov_infer_request/callback.cpp | 2 +- .../ov_infer_request/cancellation.cpp | 2 +- .../infer_request_dynamic.cpp | 4 +- .../ov_infer_request/inference_chaining.cpp | 4 +- .../behavior/ov_infer_request/io_tensor.cpp | 10 +- .../ov_infer_request/multithreading.cpp | 2 +- .../ov_infer_request/perf_counters.cpp | 2 +- .../src/behavior/ov_infer_request/wait.cpp | 2 +- .../src/behavior/ov_plugin/caching_tests.cpp | 12 +- .../behavior/ov_plugin/core_integration.cpp | 6 +- .../src/behavior/ov_plugin/life_time.cpp | 4 +- .../src/behavior/ov_plugin/properties.cpp | 50 +++----- .../config/batch_config.txt | 1 + .../config/hetero_config.txt | 1 + .../config/multi_config.txt | 1 + 21 files changed, 93 insertions(+), 224 deletions(-) delete mode 100644 src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp create mode 100644 src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt create mode 100644 src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt create mode 100644 src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/multi_config.txt diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp deleted file mode 100644 index bdde3438192bd6..00000000000000 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/api_conformance_helpers.hpp +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "conformance.hpp" -#include "common_test_utils/test_constants.hpp" - -namespace ov { -namespace test { -namespace conformance { - -inline const std::string get_plugin_lib_name_by_device(const std::string& deviceName) { - const std::map devices{ - { "AUTO", "openvino_auto_plugin" }, - { "HETERO", "openvino_hetero_plugin" }, - { "BATCH", "openvino_auto_batch_plugin" }, - { "MULTI", "openvino_auto_plugin" }, - { "NPU", "openvino_intel_npu_plugin" }, - { "CPU", "openvino_intel_cpu_plugin" }, - { "GNA", "openvino_intel_gna_plugin" }, - { "GPU", "openvino_intel_gpu_plugin" }, - { "TEMPLATE", "openvino_template_plugin" }, - { "NVIDIA", "openvino_nvidia_gpu_plugin" }, - }; - if (devices.find(deviceName) == devices.end()) { - if (std::string(targetPluginName) != "") { - return targetPluginName; - } - throw std::runtime_error("Incorrect device name"); - } - return devices.at(deviceName); -} - -inline const std::pair generate_default_multi_config() { - return {MULTI_CONFIG_KEY(DEVICE_PRIORITIES), ov::test::conformance::targetDevice}; -} - -inline const std::pair generate_default_hetero_config() { - return { "TARGET_FALLBACK" , ov::test::conformance::targetDevice }; -} - -inline const std::pair generate_default_batch_config() { - // auto-batching with batch 1 (no real batching in fact, but full machinery is in action) - return { CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , ov::test::conformance::targetDevice }; -} - -inline const std::vector> generate_configs(const std::string& target_plugin, - const std::vector>& config = {}) { - std::pair default_config; - if (target_plugin == std::string(ov::test::utils::DEVICE_MULTI) || target_plugin == std::string(ov::test::utils::DEVICE_AUTO)) { - default_config = generate_default_multi_config(); - } else if (target_plugin == std::string(ov::test::utils::DEVICE_HETERO)) { - default_config = generate_default_hetero_config(); - } else if (target_plugin == std::string(ov::test::utils::DEVICE_BATCH)) { - default_config = generate_default_batch_config(); - } else { - throw std::runtime_error("Incorrect target device: " + target_plugin); - } - - std::vector> resultConfig; - if (config.empty()) { - return {{default_config}}; - } - for (auto configItem : config) { - configItem.insert(default_config); - resultConfig.push_back(configItem); - } - return resultConfig; -} - -inline const std::string generate_complex_device_name(const std::string& deviceName) { - return deviceName + ":" + ov::test::conformance::targetDevice; -} - -inline const std::vector return_all_possible_device_combination(bool enable_complex_name = true) { - std::vector res{ov::test::conformance::targetDevice}; - std::vector devices{ov::test::utils::DEVICE_HETERO, ov::test::utils::DEVICE_AUTO, - ov::test::utils::DEVICE_BATCH, ov::test::utils::DEVICE_MULTI}; - for (const auto& device : devices) { - res.emplace_back(enable_complex_name ? generate_complex_device_name(device) : device); - } - return res; -} - -inline std::vector> generate_pairs_plugin_name_by_device() { - std::vector> res; - for (const auto& device : return_all_possible_device_combination()) { - std::string real_device = device.substr(0, device.find(':')); - res.push_back(std::make_pair(get_plugin_lib_name_by_device(real_device), - real_device)); - } - return res; -} - -inline std::map AnyMap2StringMap(const AnyMap& config) { - if (config.empty()) - return {}; - std::map result; - for (const auto& configItem : config) { - result.insert({configItem.first, configItem.second.as()}); - } - return result; -} - -const std::map ie_config = AnyMap2StringMap(ov::test::conformance::pluginConfig); - -} // namespace conformance -} // namespace test -} // namespace ov diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/ov_api_conformance_helpers.hpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/ov_api_conformance_helpers.hpp index 1f6278be78116c..6dd7a82928cff6 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/ov_api_conformance_helpers.hpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/include/ov_api_conformance_helpers.hpp @@ -4,36 +4,53 @@ #pragma once -#include "api_conformance_helpers.hpp" +#include "conformance.hpp" +#include "common_test_utils/test_constants.hpp" namespace ov { namespace test { namespace conformance { -inline const std::vector generate_ov_configs(const std::string& target_plugin, - const std::vector& config = {}) { - std::pair default_config; - if (target_plugin == std::string(ov::test::utils::DEVICE_MULTI) || - target_plugin == std::string(ov::test::utils::DEVICE_AUTO) || - target_plugin == std::string(ov::test::utils::DEVICE_HETERO)) { - default_config = ov::device::priorities(ov::test::conformance::targetDevice); - } else if (target_plugin == std::string(ov::test::utils::DEVICE_BATCH)) { - default_config = { CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(ov::test::conformance::targetDevice)}; - } else { - throw std::runtime_error("Incorrect target device: " + target_plugin); - } - +inline const std::vector generate_ov_configs(const std::vector& config = {}) { std::vector resultConfig; - if (config.empty()) { - return {{default_config}}; - } for (auto configItem : config) { - configItem.insert(default_config); + configItem.insert(ov::test::conformance::pluginConfig.begin(), ov::test::conformance::pluginConfig.end()); resultConfig.push_back(configItem); } return resultConfig; } +inline const std::string get_plugin_lib_name_by_device(const std::string& deviceName) { + const std::map devices{ + { "AUTO", "openvino_auto_plugin" }, + { "HETERO", "openvino_hetero_plugin" }, + { "BATCH", "openvino_auto_batch_plugin" }, + { "MULTI", "openvino_auto_plugin" }, + { "NPU", "openvino_intel_npu_plugin" }, + { "CPU", "openvino_intel_cpu_plugin" }, + { "GNA", "openvino_intel_gna_plugin" }, + { "GPU", "openvino_intel_gpu_plugin" }, + { "TEMPLATE", "openvino_template_plugin" }, + { "NVIDIA", "openvino_nvidia_gpu_plugin" }, + }; + if (devices.find(deviceName) == devices.end()) { + if (std::string(targetPluginName) != "") { + return targetPluginName; + } + throw std::runtime_error("Incorrect device name"); + } + return devices.at(deviceName); +} + +inline std::vector> generate_ov_pairs_plugin_name_by_device() { + std::vector> res; + std::string device(ov::test::conformance::targetDevice); + std::string real_device = device.substr(0, device.find(':')); + res.push_back(std::make_pair(get_plugin_lib_name_by_device(real_device), + real_device)); + return res; +} + } // namespace conformance } // namespace test } // namespace ov diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_graph_info.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_graph_info.cpp index 71f0bd1175b6aa..3fc385eb81701c 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_graph_info.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_graph_info.cpp @@ -32,12 +32,12 @@ INSTANTIATE_TEST_SUITE_P(ov_compiled_model, OVCompiledGraphImportExportTest, ::testing::Combine( ::testing::ValuesIn(ovExecGraphInfoElemTypes), - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVCompiledGraphImportExportTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P( ov_compiled_model, OVClassCompiledModelImportExportTestP, - ::testing::ValuesIn(return_all_possible_device_combination())); + ::testing::Values(targetDevice)); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp index fcf1952c55af4c..b7abf26114f916 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/exec_network_base.cpp @@ -13,13 +13,13 @@ using namespace ov::test::conformance; INSTANTIATE_TEST_SUITE_P(ov_compiled_model_mandatory, OVCompiledModelBaseTest, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVCompiledModelBaseTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_compiled_model, OVCompiledModelBaseTestOptional, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVCompiledModelBaseTestOptional::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/get_metric.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/get_metric.cpp index 3448f8d355161f..aac7e09a380cf4 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/get_metric.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/get_metric.cpp @@ -16,7 +16,7 @@ using namespace ov::test::conformance; INSTANTIATE_TEST_SUITE_P( ov_compiled_model_mandatory, OVClassCompiledModelGetPropertyTest, - ::testing::ValuesIn(return_all_possible_device_combination())); + ::testing::Values(targetDevice)); // // Executable Network GetConfig / SetConfig @@ -24,15 +24,15 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P( ov_compiled_model_mandatory, OVClassCompiledModelGetIncorrectPropertyTest, - ::testing::ValuesIn(return_all_possible_device_combination())); + ::testing::Values(targetDevice)); INSTANTIATE_TEST_SUITE_P( ov_compiled_model_mandatory, OVClassCompiledModelGetConfigTest, - ::testing::ValuesIn(return_all_possible_device_combination())); + ::testing::Values(targetDevice)); INSTANTIATE_TEST_SUITE_P( ov_compiled_model, OVClassCompiledModelSetIncorrectConfigTest, - ::testing::ValuesIn(return_all_possible_device_combination())); + ::testing::Values(targetDevice)); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/properties.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/properties.cpp index bb361ab03088d8..5639a0360eaec7 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/properties.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_executable_network/properties.cpp @@ -15,55 +15,35 @@ const std::vector inproperties = { {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, }; -const std::vector auto_batch_inproperties = {}; - INSTANTIATE_TEST_SUITE_P(ov_compiled_model_mandatory, OVClassCompiledModelPropertiesIncorrectTests, ::testing::Combine( - ::testing::ValuesIn(ov::test::conformance::return_all_possible_device_combination()), - ::testing::ValuesIn(inproperties)), + ::testing::Values(targetDevice), + ::testing::ValuesIn(generate_ov_configs(inproperties))), OVClassCompiledModelPropertiesIncorrectTests::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(ov_compiled_model_AutoBatch, OVClassCompiledModelPropertiesIncorrectTests, - ::testing::Combine( - ::testing::Values(ov::test::utils::DEVICE_BATCH), - ::testing::ValuesIn(generate_ov_configs(ov::test::utils::DEVICE_BATCH, auto_batch_inproperties))), - OVClassCompiledModelPropertiesIncorrectTests::getTestCaseName); - - const std::vector default_properties = { {ov::enable_profiling(false)} }; INSTANTIATE_TEST_SUITE_P(ov_compiled_model_mandatory, OVClassCompiledModelPropertiesDefaultTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::ValuesIn(default_properties)), OVClassCompiledModelPropertiesDefaultTests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_compiled_model_mandatory, OVCompiledModelPropertiesDefaultSupportedTests, - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), OVCompiledModelPropertiesDefaultSupportedTests::getTestCaseName); -const std::vector auto_batch_properties = { - {{CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}}, - {{ov::auto_batch_timeout(10)}}, -}; - INSTANTIATE_TEST_SUITE_P(ov_compiled_model_mandatory, OVClassCompiledModelPropertiesTests, ::testing::Combine( - ::testing::ValuesIn(ov::test::conformance::return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::ValuesIn(default_properties)), OVClassCompiledModelPropertiesTests::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(ov_compiled_model_AutoBatch, OVClassCompiledModelPropertiesTests, - ::testing::Combine( - ::testing::Values(ov::test::utils::DEVICE_BATCH), - ::testing::ValuesIn(ov::test::conformance::generate_ov_configs(ov::test::utils::DEVICE_BATCH, auto_batch_properties))), - OVClassCompiledModelPropertiesTests::getTestCaseName); - INSTANTIATE_TEST_SUITE_P( ov_compiled_model, OVClassCompiledModelEmptyPropertiesTests, - ::testing::ValuesIn(ov::test::conformance::return_all_possible_device_combination())); + ::testing::Values(targetDevice)); // IE Class Load network @@ -78,7 +58,7 @@ const std::vector multiModelPriorityConfigs = { INSTANTIATE_TEST_SUITE_P(ov_compiled_model_mandatory, OVClassCompiledModelGetPropertyTest_MODEL_PRIORITY, - ::testing::Combine(::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Combine(::testing::Values(targetDevice), ::testing::ValuesIn(multiModelPriorityConfigs))); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/callback.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/callback.cpp index a32f200024b7e8..02631d2f897ecb 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/callback.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/callback.cpp @@ -14,7 +14,7 @@ using namespace ov::test::conformance; INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestCallbackTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestCallbackTests::getTestCaseName); diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/cancellation.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/cancellation.cpp index 76d64baef6f6f2..5ffdbc0f354fa4 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/cancellation.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/cancellation.cpp @@ -11,7 +11,7 @@ using namespace ov::test::conformance; INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestCancellationTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestCancellationTests::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/infer_request_dynamic.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/infer_request_dynamic.cpp index 0ebf894b6e516e..b8749687c3458e 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/infer_request_dynamic.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/infer_request_dynamic.cpp @@ -60,7 +60,7 @@ INSTANTIATE_TEST_SUITE_P(ov_infer_request_1, OVInferRequestDynamicTests, ::testing::Values(std::vector, std::vector>>{ {{1, 4, 20, 20}, {1, 4, 20, 20}}, {{2, 4, 20, 20}, {2, 4, 20, 20}}}), - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestDynamicTests::getTestCaseName); @@ -70,7 +70,7 @@ INSTANTIATE_TEST_SUITE_P(ov_infer_request_2, OVInferRequestDynamicTests, ::testing::Values(std::vector, std::vector>>{ {{1, 4, 20, 20}, {1, 2, 20, 40}}, {{2, 4, 20, 20}, {2, 2, 20, 40}}}), - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestDynamicTests::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/inference_chaining.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/inference_chaining.cpp index f45c529015ecd0..d485e404d3195e 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/inference_chaining.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/inference_chaining.cpp @@ -12,13 +12,13 @@ using namespace ov::test::conformance; INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferenceChainingStatic, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferenceChaining::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_infer_request, OVInferenceChaining, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferenceChaining::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/io_tensor.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/io_tensor.cpp index 5d741f5f8148e7..779ae92c921cac 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/io_tensor.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/io_tensor.cpp @@ -14,7 +14,7 @@ using namespace ov::test::conformance; namespace { INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestIOTensorTest, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestIOTensorTest::getTestCaseName); @@ -36,14 +36,14 @@ std::vector ovIOTensorElemTypes = { INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestIOTensorSetPrecisionTest, ::testing::Combine( ::testing::ValuesIn(ovIOTensorElemTypes), - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestIOTensorSetPrecisionTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestCheckTensorPrecision, ::testing::Combine( ::testing::ValuesIn(ovIOTensorElemTypes), - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestCheckTensorPrecision::getTestCaseName); @@ -54,14 +54,14 @@ std::vector ovIOTensorElemTypesOptional = { INSTANTIATE_TEST_SUITE_P(ov_infer_request, OVInferRequestIOTensorSetPrecisionTest, ::testing::Combine( ::testing::ValuesIn(ovIOTensorElemTypesOptional), - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestIOTensorSetPrecisionTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_infer_request, OVInferRequestCheckTensorPrecision, ::testing::Combine( ::testing::ValuesIn(ovIOTensorElemTypesOptional), - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestCheckTensorPrecision::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/multithreading.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/multithreading.cpp index 5b319775c7a72d..3c9a2a8e50e58b 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/multithreading.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/multithreading.cpp @@ -15,7 +15,7 @@ namespace { INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestMultithreadingTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestMultithreadingTests::getTestCaseName); diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/perf_counters.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/perf_counters.cpp index b31428ff9edb41..fae5dad32cfbe3 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/perf_counters.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/perf_counters.cpp @@ -12,7 +12,7 @@ namespace { INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestPerfCountersTest, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestPerfCountersTest::getTestCaseName); diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/wait.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/wait.cpp index 6b996615578ff5..538a45b5d721e4 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/wait.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_infer_request/wait.cpp @@ -15,7 +15,7 @@ namespace { INSTANTIATE_TEST_SUITE_P(ov_infer_request_mandatory, OVInferRequestWaitTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(pluginConfig)), OVInferRequestWaitTests::getTestCaseName); diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp index 38cb32b4887993..2a0d49b97de6df 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp @@ -42,8 +42,8 @@ INSTANTIATE_TEST_SUITE_P(ov_plugin, CompileModelCacheTestBase, ::testing::ValuesIn(CompileModelCacheTestBase::getAnyTypeOnlyFunctions()), ::testing::ValuesIn(ovElemTypesTemplate), ::testing::ValuesIn(ovBatchSizesTemplate), - ::testing::ValuesIn(return_all_possible_device_combination()), - ::testing::Values(ov::AnyMap{})), + ::testing::Values(targetDevice), + ::testing::Values(pluginConfig)), CompileModelCacheTestBase::getTestCaseName); // Convolution/UnaryElementwiseArithmetic/BinaryElementwiseArithmetic is not supported boolean elemnt type @@ -52,8 +52,8 @@ INSTANTIATE_TEST_SUITE_P(ov_plugin_numeric, CompileModelCacheTestBase, ::testing::ValuesIn(CompileModelCacheTestBase::getNumericTypeOnlyFunctions()), ::testing::ValuesIn(ovElemAnyNumericTypesTemplate), ::testing::ValuesIn(ovBatchSizesTemplate), - ::testing::ValuesIn(return_all_possible_device_combination()), - ::testing::Values(ov::AnyMap{})), + ::testing::Values(targetDevice), + ::testing::Values(pluginConfig)), CompileModelCacheTestBase::getTestCaseName); // LSTMcell supported floating-point element type @@ -62,8 +62,8 @@ INSTANTIATE_TEST_SUITE_P(ov_plugin_floating_point, CompileModelCacheTestBase, ::testing::ValuesIn(CompileModelCacheTestBase::getFloatingPointOnlyFunctions()), ::testing::ValuesIn(ovElemAnyFloatingPointTypesTemplate), ::testing::ValuesIn(ovBatchSizesTemplate), - ::testing::ValuesIn(return_all_possible_device_combination()), - ::testing::Values(ov::AnyMap{})), + ::testing::Values(targetDevice), + ::testing::Values(pluginConfig)), CompileModelCacheTestBase::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/core_integration.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/core_integration.cpp index 0c0cee4632233a..ca47910e9e3d95 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/core_integration.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/core_integration.cpp @@ -18,15 +18,15 @@ namespace { INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVClassModelTestP, - ::testing::ValuesIn(return_all_possible_device_combination())); + ::testing::Values(targetDevice)); INSTANTIATE_TEST_SUITE_P(ov_plugin, OVClassModelOptionalTestP, - ::testing::ValuesIn(return_all_possible_device_combination())); + ::testing::Values(targetDevice)); // IE Class Query network INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVClassQueryModelTest, - ::testing::ValuesIn(return_all_possible_device_combination(false))); + ::testing::Values(targetDevice)); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/life_time.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/life_time.cpp index 49cd957bb82e65..85b7ab4bdfa945 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/life_time.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/life_time.cpp @@ -11,10 +11,10 @@ using namespace ov::test::conformance; namespace { INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVHoldersTest, - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), OVHoldersTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVHoldersTestOnImportedNetwork, - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), OVHoldersTestOnImportedNetwork::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/properties.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/properties.cpp index e2ca8e79edaf10..517f8363f0327f 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/properties.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/properties.cpp @@ -16,18 +16,10 @@ const std::vector inproperties = { {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, }; -const std::vector auto_batch_inproperties = {}; - INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVPropertiesIncorrectTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), - ::testing::ValuesIn(inproperties)), - OVPropertiesIncorrectTests::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(ov_plugin_AutoBatch, OVPropertiesIncorrectTests, - ::testing::Combine( - ::testing::Values(ov::test::utils::DEVICE_BATCH), - ::testing::ValuesIn(auto_batch_inproperties)), + ::testing::Values(targetDevice), + ::testing::ValuesIn(generate_ov_configs(inproperties))), OVPropertiesIncorrectTests::getTestCaseName); const std::vector default_properties = { @@ -35,63 +27,51 @@ const std::vector default_properties = { {ov::enable_profiling(true)}, }; -const std::vector auto_batch_properties = { - {}, - {{CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}}, - {{ov::auto_batch_timeout(10)}}, -}; - INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVPropertiesTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination(false)), + ::testing::Values(targetDevice), ::testing::ValuesIn(default_properties)), OVPropertiesTests::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(ov_plugin_AutoBatch, OVPropertiesTests, - ::testing::Combine( - ::testing::Values(ov::test::utils::DEVICE_BATCH), - ::testing::ValuesIn(ov::test::conformance::generate_ov_configs(ov::test::utils::DEVICE_BATCH, auto_batch_properties))), - OVPropertiesTests::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVCheckGetSupportedROMetricsPropsTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::ValuesIn(OVCheckGetSupportedROMetricsPropsTests::getROMandatoryProperties())), OVCheckGetSupportedROMetricsPropsTests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_plugin, OVCheckGetSupportedROMetricsPropsTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::ValuesIn(OVCheckGetSupportedROMetricsPropsTests::getROOptionalProperties())), OVCheckGetSupportedROMetricsPropsTests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVCheckSetSupportedRWMetricsPropsTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::ValuesIn(OVCheckSetSupportedRWMetricsPropsTests::getRWMandatoryPropertiesValues())), OVCheckSetSupportedRWMetricsPropsTests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_plugin, OVCheckSetSupportedRWMetricsPropsTests, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::ValuesIn(OVCheckSetSupportedRWMetricsPropsTests::getRWOptionalPropertiesValues())), OVCheckSetSupportedRWMetricsPropsTests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVCheckChangePropComplieModleGetPropTests_DEVICE_ID, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(ov::AnyMap({}))), OVCheckChangePropComplieModleGetPropTests_DEVICE_ID::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_plugin_mandatory, OVCheckChangePropComplieModleGetPropTests_InferencePrecision, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::Values(ov::AnyMap({}))), OVCheckChangePropComplieModleGetPropTests_InferencePrecision::getTestCaseName); INSTANTIATE_TEST_SUITE_P(ov_plugin, OVCheckMetricsPropsTests_ModelDependceProps, ::testing::Combine( - ::testing::ValuesIn(return_all_possible_device_combination()), + ::testing::Values(targetDevice), ::testing::ValuesIn(OVCheckMetricsPropsTests_ModelDependceProps::getModelDependcePropertiesValues())), OVCheckMetricsPropsTests_ModelDependceProps::getTestCaseName); @@ -101,15 +81,15 @@ INSTANTIATE_TEST_SUITE_P(ov_plugin, OVCheckMetricsPropsTests_ModelDependceProps, INSTANTIATE_TEST_SUITE_P( ov_plugin_mandatory, OVGetMetricPropsTest, - ::testing::ValuesIn(return_all_possible_device_combination(false))); + ::testing::Values(targetDevice)); INSTANTIATE_TEST_SUITE_P( ov_plugin, OVGetMetricPropsOptionalTest, - ::testing::ValuesIn(return_all_possible_device_combination(false))); + ::testing::Values(targetDevice)); INSTANTIATE_TEST_SUITE_P( ov_plugin_mandatory, OVGetAvailableDevicesPropsTest, - ::testing::ValuesIn(return_all_possible_device_combination(false))); + ::testing::Values(targetDevice)); // // IE Class GetConfig @@ -117,9 +97,9 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P( ov_plugin, OVPropertiesDefaultSupportedTests, - ::testing::ValuesIn(return_all_possible_device_combination(false))); + ::testing::Values(targetDevice)); INSTANTIATE_TEST_SUITE_P( ov_plugin_remove_mandatory, OVBasicPropertiesTestsP, - ::testing::ValuesIn(generate_pairs_plugin_name_by_device())); + ::testing::ValuesIn(generate_ov_pairs_plugin_name_by_device())); } // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt new file mode 100644 index 00000000000000..e51aed2fc05f4c --- /dev/null +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/batch_config.txt @@ -0,0 +1 @@ +AUTO_BATCH_DEVICE_CONFIG TEMPLATE \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt new file mode 100644 index 00000000000000..4c8a8807b6aba4 --- /dev/null +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/hetero_config.txt @@ -0,0 +1 @@ +TARGET_FALLBACK TEMPLATE \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/multi_config.txt b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/multi_config.txt new file mode 100644 index 00000000000000..af5c0ce5e5f5b5 --- /dev/null +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/config/multi_config.txt @@ -0,0 +1 @@ +MULTI_DEVICE_PRIORITIES TEMPLATE \ No newline at end of file From bc9b52ef7e5113d297c9dc06cf56981d51bb593d Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 30 Oct 2023 15:27:00 +0100 Subject: [PATCH 127/275] Increase timeout for hub tests (#20764) --- .github/workflows/linux.yml | 4 ++-- tests/model_hub_tests/torch_tests/test_hf_transformers.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7d346f98911bb0..06825e2ece1903 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -1203,7 +1203,7 @@ jobs: run: shell: bash runs-on: ${{ github.event_name == 'schedule' && 'ubuntu-20.04-16-cores' || 'ubuntu-20.04-8-cores'}} - timeout-minutes: ${{ github.event_name == 'schedule' && 300 || 5 }} + timeout-minutes: ${{ github.event_name == 'schedule' && 400 || 5 }} # TODO: Switch back to self-hosted runners # container: # image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 @@ -1280,7 +1280,7 @@ jobs: PyTorch_Models_Tests: name: PyTorch Models tests needs: Build - timeout-minutes: ${{ github.event_name == 'schedule' && 300 || 30 }} + timeout-minutes: ${{ github.event_name == 'schedule' && 400 || 30 }} defaults: run: shell: bash diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 8e3ea5ecfdd2c9..0dfef7deaae547 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -53,7 +53,7 @@ def setup_class(self): from PIL import Image import requests - self.infer_timeout = 1200 + self.infer_timeout = 800 url = "http://images.cocodataset.org/val2017/000000039769.jpg" self.image = Image.open(requests.get(url, stream=True).raw) From 34467ef45eba931b32fb8405e6e5636f11fecd06 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 30 Oct 2023 18:33:04 +0400 Subject: [PATCH 128/275] Improved warning message for BA + OpenCL case (#20725) * Improved warning message for BA + OpenCL case * Update samples/cpp/benchmark_app/CMakeLists.txt Co-authored-by: Helena Kloosterman --------- Co-authored-by: Helena Kloosterman --- samples/cpp/benchmark_app/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/cpp/benchmark_app/CMakeLists.txt b/samples/cpp/benchmark_app/CMakeLists.txt index 863c2278058f5f..c786fde4c2fbe8 100644 --- a/samples/cpp/benchmark_app/CMakeLists.txt +++ b/samples/cpp/benchmark_app/CMakeLists.txt @@ -143,7 +143,7 @@ if(SAMPLES_ENABLE_OPENCL) target_link_libraries(${TARGET_NAME} PRIVATE OpenCL::OpenCL) else() - message(WARNING "OpenCL CPP header is not found, ${TARGET_NAME} will be built without OpenCL support. Download it from: https://github.com/KhronosGroup/OpenCL-CLHPP and set -Dopencl_root_hints=[PATH]/OpenCL-CLHPP/include to cmake.") + message(WARNING "OpenCL CPP header is not found, ${TARGET_NAME} will be built without OpenCL support and you will not be able to use the '-use_device_mem' option. Please, install ' install opencl-headers' to enable the option") endif() endif() From a4c47bf6ab9e425d254b521d1c77378f4e4c5f01 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Mon, 30 Oct 2023 18:40:09 +0400 Subject: [PATCH 129/275] [TF FE] Fix body graph injection, CumSum and SparseFillEmptyRows (#20680) * [TF FE] Fix body graph injection, CumSum and SparseFillEmptyRows Signed-off-by: Kazantsev, Roman * Do not handle non-parameters in body * Update layer test to cover default parameter and attribute values * Fix layer tests --------- Signed-off-by: Kazantsev, Roman --- src/frontends/tensorflow/src/input_model.cpp | 69 ++++++++++++------- .../tensorflow/src/op/sparse_reshape.cpp | 15 +++- src/frontends/tensorflow/src/op_table.cpp | 5 +- .../tensorflow/src/translate_session.cpp | 3 +- .../tensorflow_common/src/op/cumsum.cpp | 19 ++--- .../tensorflow_tests/test_tf_Cumsum.py | 38 +++++----- 6 files changed, 87 insertions(+), 62 deletions(-) diff --git a/src/frontends/tensorflow/src/input_model.cpp b/src/frontends/tensorflow/src/input_model.cpp index 65672ae13a6d1e..459ce69b5466bb 100644 --- a/src/frontends/tensorflow/src/input_model.cpp +++ b/src/frontends/tensorflow/src/input_model.cpp @@ -149,7 +149,44 @@ void InputModel::InputModelTFImpl::load_places() { all_op_names.insert(op_name); m_op_places.push_back(op_place); m_op_places_map[op_name] = op_place; + + // compute non-terminating nodes in the graph + // and put such nodes into op_names_with_consumers + for (size_t input_port_idx = 0; input_port_idx < node_decoder->get_input_size(); ++input_port_idx) { + std::string producer_op_name; + std::string producer_output_port_name; + size_t producer_output_port_idx; + try { + node_decoder->get_input_node(input_port_idx, + producer_op_name, + producer_output_port_name, + producer_output_port_idx); + if (is_conditional_edge(producer_op_name)) { + // exclude "^" mark indicating (execution) conditional dependency + // for example, "^sub_op" means dependency on a producer node with a name "sub_op" + // if a node has dependent operation nodes and has no data consumers, + // this node is not terminating and will not output to the Result node + producer_op_name = producer_op_name.substr(1); + } + + op_names_with_consumers.insert(producer_op_name); + } catch (const std::exception&) { + FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " + std::to_string(input_port_idx) + + " for op '" + node_decoder->get_op_name() + "', expected input name: '" + + producer_op_name + + "', expected input port index: " + std::to_string(producer_output_port_idx)); + } + } + + // put places for all inputs of a model into m_inputs if (op_type == "Placeholder" || op_type == "PlaceholderWithDefault") { + if (m_input_names.size() > 0 && + std::find(m_input_names.begin(), m_input_names.end(), op_name) == m_input_names.end()) { + // this is a body graph since it contains non-empty m_input_names + // such node not included into m_input_names should be skipped + continue; + } + // in case Placeholder we put created TensorPlace to both m_tensor_places container and m_inputs // since they can be used if user does not override them // in case PlaceholderWithDefault we put created TensorPlace only to m_tensor_places container @@ -199,6 +236,13 @@ void InputModel::InputModelTFImpl::load_places() { m_inputs.push_back(tensor_place); } } else if (op_type == "input_arg") { + if (m_input_names.size() > 0 && + std::find(m_input_names.begin(), m_input_names.end(), op_name) == m_input_names.end()) { + // this is a body graph since it contains non-empty m_input_names + // such node not included into m_input_names should be skipped + continue; + } + // create a tensor place for the body graph parameter node and save it in the m_inputs // it allows to set shapes for the body graph InputModel for its more optimal conversion auto param_type = node_decoder->get_attribute("type"); @@ -212,31 +256,6 @@ void InputModel::InputModelTFImpl::load_places() { std::vector{op_name}); m_inputs.push_back(tensor_place); } - for (size_t input_port_idx = 0; input_port_idx < node_decoder->get_input_size(); ++input_port_idx) { - std::string producer_op_name; - std::string producer_output_port_name; - size_t producer_output_port_idx; - try { - node_decoder->get_input_node(input_port_idx, - producer_op_name, - producer_output_port_name, - producer_output_port_idx); - if (is_conditional_edge(producer_op_name)) { - // exclude "^" mark indicating (execution) conditional dependency - // for example, "^sub_op" means dependency on a producer node with a name "sub_op" - // if a node has dependent operation nodes and has no data consumers, - // this node is not terminating and will not output to the Result node - producer_op_name = producer_op_name.substr(1); - } - - op_names_with_consumers.insert(producer_op_name); - } catch (const std::exception&) { - FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " + std::to_string(input_port_idx) + - " for op '" + node_decoder->get_op_name() + "', expected input name: '" + - producer_op_name + - "', expected input port index: " + std::to_string(producer_output_port_idx)); - } - } } if (m_telemetry) { diff --git a/src/frontends/tensorflow/src/op/sparse_reshape.cpp b/src/frontends/tensorflow/src/op/sparse_reshape.cpp index 10f830cf4b2c24..1def5f4192f155 100644 --- a/src/frontends/tensorflow/src/op/sparse_reshape.cpp +++ b/src/frontends/tensorflow/src/op/sparse_reshape.cpp @@ -70,20 +70,29 @@ OutputVector translate_sparse_reshape_op(const ov::frontend::tensorflow::NodeCon return {input_indices, input_shape}; } -OutputVector translate_sparse_fill_empty_rows_op(const ov::frontend::tensorflow::NodeContext& node) { +NamedOutputVector translate_sparse_fill_empty_rows_op(const ov::frontend::tensorflow::NodeContext& node) { default_op_checks(node, 3, {"SparseFillEmptyRows"}); auto input_indices = node.get_input(0); auto input_values = node.get_input(1); auto dense_shape = node.get_input(2); auto default_value = node.get_input(3); + auto node_name = node.get_name(); auto sparse_fill_empty_rows = make_shared(input_indices, input_values, dense_shape, default_value, node.get_decoder()); - set_node_name(node.get_name(), sparse_fill_empty_rows); - return sparse_fill_empty_rows->outputs(); + sparse_fill_empty_rows->set_friendly_name(node_name); + set_out_name(node_name + ":0", sparse_fill_empty_rows->output(0)); + set_out_name(node_name + ":1", sparse_fill_empty_rows->output(1)); + set_out_name(node_name + ":2", sparse_fill_empty_rows->output(2)); + set_out_name(node_name + ":3", sparse_fill_empty_rows->output(3)); + + return {{"output_indices", sparse_fill_empty_rows->output(0)}, + {"output_values", sparse_fill_empty_rows->output(1)}, + {"empty_row_indicator", sparse_fill_empty_rows->output(2)}, + {"reverse_index_map", sparse_fill_empty_rows->output(3)}}; } OutputVector translate_sparse_segment_sum_op(const ov::frontend::tensorflow::NodeContext& node) { diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 3a4c570c6576fb..3251ad81b0f7af 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -18,7 +18,8 @@ namespace frontend { namespace tensorflow { namespace op { -#define TF_OP_CONVERTER(op) OutputVector op(const ov::frontend::tensorflow::NodeContext& node) +#define TF_OP_CONVERTER(op) OutputVector op(const ov::frontend::tensorflow::NodeContext& node) +#define TF_OP_CONVERTER_NAMED(op) NamedOutputVector op(const ov::frontend::tensorflow::NodeContext& node) TF_OP_CONVERTER(translate_assignvariable_op); TF_OP_CONVERTER(translate_block_lstm_op); @@ -40,7 +41,7 @@ TF_OP_CONVERTER(translate_queue_dequeue_op); TF_OP_CONVERTER(translate_queue_dequeue_many_op); TF_OP_CONVERTER(translate_readvariable_op); TF_OP_CONVERTER(translate_restorev2_op); -TF_OP_CONVERTER(translate_sparse_fill_empty_rows_op); +TF_OP_CONVERTER_NAMED(translate_sparse_fill_empty_rows_op); TF_OP_CONVERTER(translate_sparse_reshape_op); TF_OP_CONVERTER(translate_sparse_segment_sum_op); TF_OP_CONVERTER(translate_staticregexfullmatch_op); diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp index 51f0addb7c3ff5..4038995c6cb693 100644 --- a/src/frontends/tensorflow/src/translate_session.cpp +++ b/src/frontends/tensorflow/src/translate_session.cpp @@ -337,6 +337,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu const auto& model_frozen_inputs = model_tf->get_tensor_values(); const auto& saved_model_inputs = model_tf->get_saved_model_input_names(); const auto& saved_model_outputs = model_tf->get_saved_model_output_names(); + bool is_body_graph = (model_tf->get_input_names().size() > 0); // fill ng_op_map with Constant outputs for frozen inputs for (const auto& frozen_input : model_frozen_inputs) { @@ -532,7 +533,7 @@ void TranslateSession::translate_graph(const ov::frontend::InputModel::Ptr& inpu } else { auto param = as_type_ptr(output.port.get_node_shared_ptr()); // avoid duplicating Parameter nodes if they are already in the Parameters vector - if (param && std::find(params.begin(), params.end(), param) == params.end()) { + if (param && std::find(params.begin(), params.end(), param) == params.end() && !is_body_graph) { params.push_back(param); } ng_op_map[operation_name].push_back(output); diff --git a/src/frontends/tensorflow_common/src/op/cumsum.cpp b/src/frontends/tensorflow_common/src/op/cumsum.cpp index cfffbb0c0b8f48..0fcf0f2da048b8 100644 --- a/src/frontends/tensorflow_common/src/op/cumsum.cpp +++ b/src/frontends/tensorflow_common/src/op/cumsum.cpp @@ -3,10 +3,10 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/cum_sum.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,14 +14,15 @@ namespace tensorflow { namespace op { OutputVector translate_cumsum_op(const NodeContext& node) { - auto ng_x = node.get_input(0); - auto ng_axis = node.get_input(1); - auto exclusive = node.get_attribute("exclusive"); - auto reverse = node.get_attribute("reverse"); + default_op_checks(node, 2, {"Cumsum"}); + auto x = node.get_input(0); + auto axis = node.get_input(1); + auto exclusive = node.get_attribute("exclusive", false); + auto reverse = node.get_attribute("reverse", false); - auto res = make_shared(ng_x, ng_axis, exclusive, reverse); - set_node_name(node.get_name(), res); - return res->outputs(); + auto cum_sum = make_shared(x, axis, exclusive, reverse); + set_node_name(node.get_name(), cum_sum); + return cum_sum->outputs(); } } // namespace op } // namespace tensorflow diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Cumsum.py b/tests/layer_tests/tensorflow_tests/test_tf_Cumsum.py index 7379d08a600c5b..79bb756d4a0650 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Cumsum.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Cumsum.py @@ -6,24 +6,16 @@ from common.tf_layer_test_class import CommonTFLayerTest + # Testing Cumsum operation # Documentation: https://www.tensorflow.org/api_docs/python/tf/raw_ops/Cumsum -class TestCumsumOps(CommonTFLayerTest): +class TestCumsum(CommonTFLayerTest): # input_shape - should be an array # axis - array which points on axis for the operation # exclusive - enables exclusive Cumsum # reverse - enables reverse order of Cumsum - # ir_version - common parameter - # use_new_frontend - common parameter - def create_cumsum_ops_placeholder_const_net(self, input_shape, axis, exclusive, reverse, ir_version, use_new_frontend): - """ - Tensorflow net IR net - - Placeholder->Cumsum => Placeholder->Cumsum - - """ - + def create_cumsum_net(self, input_shape, axis, exclusive, reverse): import tensorflow as tf tf.compat.v1.reset_default_graph() @@ -31,9 +23,9 @@ def create_cumsum_ops_placeholder_const_net(self, input_shape, axis, exclusive, # Create the graph and model with tf.compat.v1.Session() as sess: tf_input = tf.compat.v1.placeholder(tf.float32, input_shape, 'Input') - tf_axis = tf.constant(axis) - tf.raw_ops.Cumsum(x = tf_input, axis = tf_axis, exclusive = exclusive, reverse = reverse) + tf_axis = tf.constant(axis, dtype=tf.int32) + tf.raw_ops.Cumsum(x=tf_input, axis=tf_axis, exclusive=exclusive, reverse=reverse) tf.compat.v1.global_variables_initializer() tf_net = sess.graph_def @@ -43,20 +35,22 @@ def create_cumsum_ops_placeholder_const_net(self, input_shape, axis, exclusive, return tf_net, ref_net test_data = [ - pytest.param( - dict(input_shape=[2, 3], axis=1), - marks=pytest.mark.precommit_tf_fe), + dict(input_shape=[2], axis=-1), + dict(input_shape=[2, 3], axis=0), + dict(input_shape=[2, 3], axis=1), + dict(input_shape=[2, 3], axis=-2), dict(input_shape=[2, 3, 3, 4], axis=2), + dict(input_shape=[2, 3, 3, 4], axis=-3), ] @pytest.mark.parametrize("params", test_data) - @pytest.mark.parametrize("exclusive", [False, True]) - @pytest.mark.parametrize("reverse", [False, True]) + @pytest.mark.parametrize("exclusive", [False, True, None]) + @pytest.mark.parametrize("reverse", [False, True, None]) @pytest.mark.precommit + @pytest.mark.precommit_tf_fe @pytest.mark.nightly - def test_cumsum_ops_placeholder_const(self, params, exclusive, reverse, ie_device, precision, ir_version, temp_dir, - use_new_frontend, use_old_api): - self._test(*self.create_cumsum_ops_placeholder_const_net(**params, exclusive=exclusive, ir_version=ir_version, - use_new_frontend=use_new_frontend, reverse=reverse), + def test_cumsum_basic(self, params, exclusive, reverse, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_cumsum_net(**params, exclusive=exclusive, reverse=reverse), ie_device, precision, ir_version, temp_dir=temp_dir, use_new_frontend=use_new_frontend, use_old_api=use_old_api) From fdec7fb7eebe9b3788ec68ee475787cf3b24dd7c Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Mon, 30 Oct 2023 17:14:37 +0100 Subject: [PATCH 130/275] [Opset13][ONNX] NMSRotated-13 test and sort update (#20670) * Set sort_result_descending attr false for nms * Add onnx test * Tmp exclude for CPU * Update test values * Update test vals to avoid cast warning * Fix typo --- .../onnx/frontend/src/op/nms_rotated.hpp | 3 +- .../onnx/tests/models/mm_nms_rotated.prototxt | 71 +++++++++++++++++++ src/frontends/onnx/tests/onnx_import.in.cpp | 14 ++++ .../onnx/tests/runtime/ie/unit_test.manifest | 3 + 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 src/frontends/onnx/tests/models/mm_nms_rotated.prototxt diff --git a/src/frontends/onnx/frontend/src/op/nms_rotated.hpp b/src/frontends/onnx/frontend/src/op/nms_rotated.hpp index c575ce8a0217c6..b41d203474408c 100644 --- a/src/frontends/onnx/frontend/src/op/nms_rotated.hpp +++ b/src/frontends/onnx/frontend/src/op/nms_rotated.hpp @@ -30,7 +30,8 @@ inline OutputVector nms_rotated(const Node& node) { node.get_ng_inputs().at(1), max_output_boxes_per_class, iou_threshold_const, - score_threshold_const); + score_threshold_const, + false); return {nms->output(0)}; } diff --git a/src/frontends/onnx/tests/models/mm_nms_rotated.prototxt b/src/frontends/onnx/tests/models/mm_nms_rotated.prototxt new file mode 100644 index 00000000000000..feda742ec9a805 --- /dev/null +++ b/src/frontends/onnx/tests/models/mm_nms_rotated.prototxt @@ -0,0 +1,71 @@ +ir_version: 8 +producer_name: "onnx_frontend_test" +graph { + node { + input: "boxes" + input: "scores" + output: "selected_indices" + op_type: "NMSRotated" + domain: "mmdeploy" + attribute { + name: "iou_threshold" + f: 0.5 + type: FLOAT + } + attribute { + name: "score_threshold" + f: 0.3 + type: FLOAT + } + } + input { + name: "boxes" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "scores" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "selected_indices" + type { + tensor_type { + elem_type: 7 + } + } + } +} +opset_import { + domain: "mmdeploy" + version: 1 +} diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index 2ac1dc6d464567..45e7bbf81eff28 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -6962,3 +6962,17 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_group_normalization_2grp_custom_eps) { test_case.run_with_tolerance_as_fp(0.000001f); } + +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_mm_nms_rotated) { + auto function = onnx_import::import_onnx_model( + file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/mm_nms_rotated.onnx")); + + auto test_case = ov::test::TestCase(function, s_device); + test_case.add_input(Shape{1, 4, 5}, + std::vector({23.0f, 10.5f, 4.0f, 15.0f, 2.5f, 11.0f, 15.0f, 4.0f, 2.0f, 0.7854f, + 20.0f, 4.5f, 4.0f, 3.0f, -5.3f, 8.0f, 11.5f, 4.0f, 3.0f, -0.5236f})); + test_case.add_input(Shape{1, 1, 4}, std::vector({0.6f, 0.8f, 0.5f, 0.7f})); + test_case.add_expected_output(Shape{4, 3}, {0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 0, 2}); + + test_case.run(); +} diff --git a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest index 785c1d2d25028d..6c8a302313f032 100644 --- a/src/frontends/onnx/tests/runtime/ie/unit_test.manifest +++ b/src/frontends/onnx/tests/runtime/ie/unit_test.manifest @@ -205,6 +205,9 @@ IE_CPU.onnx_constant_sparse_tensor_double_3x4 IE_CPU.onnx_constant_sparse_tensor_int16_3x4 IE_CPU.onnx_constant_sparse_tensor_uint16_3x4 +# Unsupported op NMSRotated +onnx_model_mm_nms_rotated + #------------------------------------------------------------------------------- # # Inference Engine GPU plugin excludes From df3e51f818408d79553d9cdbcf3be549d11e9ec7 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Tue, 31 Oct 2023 02:54:19 +0900 Subject: [PATCH 131/275] add comments about origin of reference implementation (#20763) --- .../kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl index cf26d0cbc276c0..3a0173a0dbc01f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/non_max_suppression_gpu_ref.cl @@ -1,6 +1,9 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +// Copyright (c) Facebook, Inc. and its affiliates. +// The implementation for rotated boxes intersection is based on the code from: +// https://github.com/facebookresearch/detectron2/blob/v0.6/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h #include "include/batch_headers/fetch_data.cl" From 7eb53586f553aa7fa32d5d0532a91b1122c27cca Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Mon, 30 Oct 2023 21:58:34 +0400 Subject: [PATCH 132/275] [GPU] Added WA for MarkDequantizationSubgraph to work correctly in the plugin (#20767) --- .../intel_gpu/src/plugin/transformations_pipeline.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index f96122a6531ea5..68268790c62860 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -109,6 +109,7 @@ #include "transformations/convert_precision.hpp" #include "transformations/init_node_info.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" +#include "transformations/rt_info/keep_const_precision.hpp" #include "transformations/smart_reshape/matmul_sr.hpp" #include "plugin/transformations/convert_matmul_to_fc.hpp" @@ -159,6 +160,12 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto pass_config = manager.get_pass_config(); manager.set_per_pass_validation(false); + // Temporary solution, global rt info cleanup is needed + for (auto& node : func->get_ops()) { + ov::enable_constant_folding(node); + ov::disable_keep_const_precision(node); + } + enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); if (enableInt8) { manager.register_pass( From c21fd2ea6168bd79924455262101719b19f6ea1d Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Mon, 30 Oct 2023 22:36:08 +0400 Subject: [PATCH 133/275] [PT FE]: fix object has no attribute 'dtype' (#20755) * [PT FE]: fix object has no attribute 'dtype' * add model in tests * update test --- .../python/src/openvino/frontend/pytorch/ts_decoder.py | 2 +- tests/model_hub_tests/torch_tests/hf_transformers_models | 1 + tests/model_hub_tests/torch_tests/test_hf_transformers.py | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index a57393e7638d67..04259234298601 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -67,7 +67,7 @@ def _get_preserved_attributes(model) -> list: preserved_attributes = [] for name, module in model.named_modules(): if hasattr(module, "weight"): - if module.weight is not None and module.weight.dtype in [torch.int8, torch.uint8]: + if module.weight is not None and getattr(module.weight, "dtype", None) in [torch.int8, torch.uint8]: preserved_attributes.append(name) return preserved_attributes diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index dd41a18235b688..ac1e1a8d510ec6 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -186,6 +186,7 @@ jambran/depression-classification,DepressionDetection,skip,Load problem Jellywibble/dalio-reward-charlie-v1,reward-model,skip,Load problem JonasGeiping/crammed-bert-legacy,crammedBERT,skip,Load problem jonatasgrosman/wav2vec2-large-xlsr-53-english,wav2vec2,xfail,Unsupported op aten::index_put_ prim::TupleConstruct +facebook/mms-lid-126,wav2vec2 Joqsan/test-my-fnet,my_fnet,skip,Load problem jozhang97/deta-swin-large,deta,skip,Load problem jploski/retnet-mini-shakespeare,retnet,skip,Load problem diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 0dfef7deaae547..d29d9dfc538e7d 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -104,6 +104,13 @@ def forward(self, x): model = VIT_GPT2_Model(model) example = (encoded_input.pixel_values,) + elif "mms-lid" in name: + # mms-lid model config does not have auto_model attribute, only direct loading aviable + from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor + model = Wav2Vec2ForSequenceClassification.from_pretrained(name, torchscript=True) + processor = AutoFeatureExtractor.from_pretrained(name) + input_values = processor(torch.randn(16000).numpy(), sampling_rate=16_000, return_tensors="pt") + example = {"input_values": input_values.input_values} elif "retribert" in mi.tags: from transformers import RetriBertTokenizer text = "How many cats are there?" From 5b583276b226657846ba78def837e355199adf14 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Mon, 30 Oct 2023 19:46:07 +0100 Subject: [PATCH 134/275] [PyOV] Add get_byte_size to const node (#20759) --- src/bindings/python/src/pyopenvino/graph/ops/constant.cpp | 2 ++ src/bindings/python/tests/test_graph/test_ops.py | 1 + 2 files changed, 3 insertions(+) diff --git a/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp b/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp index 002a674e45cb71..92f5b0e2ace138 100644 --- a/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp +++ b/src/bindings/python/src/pyopenvino/graph/ops/constant.cpp @@ -127,6 +127,8 @@ void regclass_graph_op_Constant(py::module m) { constant.def("get_value_strings", &ov::op::v0::Constant::get_value_strings); + constant.def("get_byte_size", &ov::op::v0::Constant::get_byte_size); + constant.def("get_vector", [](const ov::op::v0::Constant& self) { auto element_type = self.get_element_type(); if (element_type == ov::element::boolean) { diff --git a/src/bindings/python/tests/test_graph/test_ops.py b/src/bindings/python/tests/test_graph/test_ops.py index 1fcaed082220a1..bd81278cf59feb 100644 --- a/src/bindings/python/tests/test_graph/test_ops.py +++ b/src/bindings/python/tests/test_graph/test_ops.py @@ -128,6 +128,7 @@ def test_constant(const, args, expectation): assert node.get_output_size() == 1 assert list(node.get_output_shape(0)) == [3, 3] assert node.get_output_element_type(0) == Type.f32 + assert node.get_byte_size() == 36 def test_concat(): From 31f6a9512202b7f591515732e9b1234ddda91fba Mon Sep 17 00:00:00 2001 From: Przemyslaw Wysocki Date: Mon, 30 Oct 2023 20:06:55 +0100 Subject: [PATCH 135/275] [PyOV] Set inference precision explicitly to fp32 for torchvision tests (#20713) * Set fp32 * Fix flake * use properties api --- .../python/tests/test_torchvision_to_ov/test_preprocessor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py b/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py index a1cdc41f610e62..2bb85b55012efb 100644 --- a/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py +++ b/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py @@ -12,8 +12,10 @@ import torch.nn.functional as f import torchvision.transforms as transforms +from openvino import Type from openvino.runtime import Core, Tensor from openvino.tools.mo import convert_model +from openvino.properties.hint import inference_precision from openvino.preprocess.torchvision import PreprocessConverter @@ -38,7 +40,8 @@ def _infer_pipelines(test_input, preprocess_pipeline, input_channels=3): ov_model = PreprocessConverter.from_torchvision( model=ov_model, transform=preprocess_pipeline, input_example=Image.fromarray(test_input.astype("uint8"), "RGB"), ) - ov_model = core.compile_model(ov_model, "CPU") + infer_config = {inference_precision: Type.f32} + ov_model = core.compile_model(ov_model, "CPU", infer_config) # Torch results torch_input = copy.deepcopy(test_input) From ac176beb3339698e249ab3b724aa0a6b406f40d4 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 30 Oct 2023 23:58:35 +0100 Subject: [PATCH 136/275] [core]Migrate Clamp operator to new API (#20676) * Migrate Clamp operator to new API * Remove not used function add comment --- src/core/include/openvino/op/clamp.hpp | 4 +- .../include/openvino/reference/clamp.hpp | 14 +- src/core/src/op/clamp.cpp | 181 +++++++++--------- 3 files changed, 99 insertions(+), 100 deletions(-) diff --git a/src/core/include/openvino/op/clamp.hpp b/src/core/include/openvino/op/clamp.hpp index 1114b7b9c3a1f5..bdee557cc0a94a 100644 --- a/src/core/include/openvino/op/clamp.hpp +++ b/src/core/include/openvino/op/clamp.hpp @@ -45,9 +45,7 @@ class OPENVINO_API Clamp : public util::UnaryElementwiseArithmetic { void set_max(const double& x) { m_max = x; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; bool has_evaluate() const override; diff --git a/src/core/reference/include/openvino/reference/clamp.hpp b/src/core/reference/include/openvino/reference/clamp.hpp index 1370d28d6a8c97..a658928a9943c7 100644 --- a/src/core/reference/include/openvino/reference/clamp.hpp +++ b/src/core/reference/include/openvino/reference/clamp.hpp @@ -9,9 +9,19 @@ namespace ov { namespace reference { + +/** + * @brief Reference implementation of Clamp operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param min Minimum value used to clamp input data. + * @param max Maximum value used to clamp input data. + * @param count Number of elements in input buffer. + */ template -void clamp(const T* arg, T* out, T min, T max, size_t count) { - for (size_t i = 0; i < count; i++) { +void clamp(const T* arg, T* out, const T min, const T max, const size_t count) { + for (size_t i = 0; i < count; ++i) { if (arg[i] < min) { out[i] = min; } else if (arg[i] > max) { diff --git a/src/core/src/op/clamp.cpp b/src/core/src/op/clamp.cpp index 91caa3f3fdb48b..f3b0d19af17b82 100644 --- a/src/core/src/op/clamp.cpp +++ b/src/core/src/op/clamp.cpp @@ -2,119 +2,111 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/clamp.hpp" - -#include +#include "openvino/op/clamp.hpp" #include "bound_evaluate.hpp" +#include "compare.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/util.hpp" #include "openvino/reference/clamp.hpp" +#include "openvino/reference/utils/type_util.hpp" -using namespace std; -using namespace ngraph; - -OPENVINO_SUPPRESS_DEPRECATED_START +namespace ov { +namespace op { namespace clamp { -namespace { -template -bool evaluate(const HostTensorPtr& arg, const HostTensorPtr& out, T min, T max, size_t count) { - ov::reference::clamp(arg->get_data_ptr(), out->get_data_ptr(), min, max, count); - return true; -} -bool evaluate_clamp(const HostTensorPtr& arg, const HostTensorPtr& out, double min, double max) { - size_t count = shape_size(arg->get_shape()); - auto ceil_func = [](double x) { - return ceil(x); - }; - auto floor_func = [](double x) { - return floor(x); - }; - - bool rc = true; - OPENVINO_SUPPRESS_DEPRECATED_START - switch (arg->get_element_type()) { - TYPE_CASE(i8) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(i16) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(i32) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(i64) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(u8) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(u16) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(u32) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(u64) - (arg, out, double_to_int(min, ceil_func), double_to_int(max, floor_func), count); - break; - TYPE_CASE(f16)(arg, out, static_cast(min), static_cast(max), count); - break; - TYPE_CASE(bf16) - (arg, out, static_cast(min), static_cast(max), count); - break; - TYPE_CASE(f32)(arg, out, static_cast(min), static_cast(max), count); - break; - default: - rc = false; - break; +// Make it part of reference/convert.hpp (requires to move compare.hpp to reference from shape inference) +template +TO convert(const FROM value) { + if (cmp::lt(value, std::numeric_limits::min())) { + return std::numeric_limits::lowest(); + } else if (cmp::gt(value, std::numeric_limits::max())) { + return std::numeric_limits::max(); + } else { + return static_cast(value); } - return rc; - OPENVINO_SUPPRESS_DEPRECATED_END } -} // namespace + +template ::value>::type* = nullptr> +T min_as(const double value) { + return convert(std::ceil(value)); +} + +template ()>::type* = nullptr> +T min_as(const double value) { + return static_cast(value); +} + +template ::value>::type* = nullptr> +T max_as(const double value) { + return convert(std::floor(value)); +} + +template ()>::type* = nullptr> +T max_as(const double value) { + return static_cast(value); +} + +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg, Tensor& out, const double min, const double max, const size_t count) { + reference::clamp(arg.data(), out.data(), min_as(min), max_as(max), count); + return true; + } +}; } // namespace clamp -bool op::v0::Clamp::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +namespace v0 { +bool Clamp::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_Clamp_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return clamp::evaluate_clamp(inputs[0], outputs[0], get_min(), get_max()); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); + + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); + + using namespace ov::element; + return IfTypeOf::apply( + inputs[0].get_element_type(), + inputs[0], + outputs[0], + get_min(), + get_max(), + shape_size(in_shape)); } -bool op::v0::Clamp::has_evaluate() const { +bool Clamp::has_evaluate() const { OV_OP_SCOPE(v0_Clamp_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i8: - case ngraph::element::i16: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u16: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::bf16: - case ngraph::element::f32: + case element::bf16: + case element::f16: + case element::f32: + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } -op::Clamp::Clamp(const Output& data, const double min, const double max) +Clamp::Clamp(const Output& data, const double min, const double max) : util::UnaryElementwiseArithmetic(data), m_min{min}, m_max{max} { constructor_validate_and_infer_types(); } -void op::Clamp::validate_and_infer_types() { +void Clamp::validate_and_infer_types() { OV_OP_SCOPE(v0_Clamp_validate_and_infer_types); - const element::Type& input_et = get_input_element_type(0); + const auto& input_et = get_input_element_type(0); NODE_VALIDATION_CHECK(this, input_et.is_integral_number() || input_et.is_real(), "Input element type must be numeric. Got: ", @@ -128,27 +120,26 @@ void op::Clamp::validate_and_infer_types() { set_output_type(0, input_et, get_input_partial_shape(0)); } -shared_ptr op::Clamp::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Clamp::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Clamp_clone_with_new_inputs); - NODE_VALIDATION_CHECK(this, - new_args.size() == 1, - "Expected 1 element in new_args for the Clamp op but got ", - new_args.size()); - - return make_shared(new_args.at(0), m_min, m_max); + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0), get_min(), get_max()); } -bool op::Clamp::visit_attributes(AttributeVisitor& visitor) { +bool Clamp::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_Clamp_visit_attributes); visitor.on_attribute("min", m_min); visitor.on_attribute("max", m_max); return true; } -bool op::Clamp::evaluate_lower(ov::TensorVector& output_values) const { +bool Clamp::evaluate_lower(ov::TensorVector& output_values) const { return ov::default_lower_bound_evaluator(this, output_values); } -bool op::Clamp::evaluate_upper(ov::TensorVector& output_values) const { +bool Clamp::evaluate_upper(ov::TensorVector& output_values) const { return ov::default_upper_bound_evaluator(this, output_values); } +} // namespace v0 +} // namespace op +} // namespace ov From ce8ac6f478880f3dc4143ac2a7ee4120d52c4ef5 Mon Sep 17 00:00:00 2001 From: Mateusz Mikolajczyk Date: Tue, 31 Oct 2023 00:18:28 +0100 Subject: [PATCH 137/275] [Opset13][TF FE] Enable tensorflow bitwise operators (#20340) * Add opset-13 bitwise ops * Fix issue in BinaryOps test --------- Co-authored-by: Katarzyna Mitrus --- src/frontends/tensorflow/src/op_table.cpp | 7 ++++++- .../tensorflow_common/src/op/binary_op.cpp | 7 +++++-- .../tensorflow_common/src/op/unary_op.cpp | 2 ++ .../tensorflow_tests/test_tf_BinaryOps.py | 15 +++++++++++++-- .../tensorflow_tests/test_tf_UnaryOps.py | 14 +++++++++++++- 5 files changed, 39 insertions(+), 6 deletions(-) diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 3251ad81b0f7af..4643994b728b5b 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -6,6 +6,7 @@ #include "common_op_table.hpp" #include "openvino/opsets/opset10.hpp" +#include "openvino/opsets/opset13.hpp" #include "openvino/opsets/opset8.hpp" #include "openvino/opsets/opset9.hpp" @@ -78,6 +79,7 @@ const std::map get_supported_ops() { {"Erf", CreatorFunction(translate_unary_op)}, {"Exp", CreatorFunction(translate_unary_op)}, {"Floor", CreatorFunction(translate_unary_op)}, + {"Invert", CreatorFunction(translate_unary_op)}, {"IsFinite", CreatorFunction(translate_unary_op)}, {"IsInf", CreatorFunction(translate_unary_op)}, {"IsNan", CreatorFunction(translate_unary_op)}, @@ -100,6 +102,9 @@ const std::map get_supported_ops() { // note: BinaryOp translator declaration for each op must to be added in binary_op.cpp file {"Add", CreatorFunction(translate_binary_op)}, {"AddV2", CreatorFunction(translate_binary_op)}, + {"BitwiseAnd", CreatorFunction(translate_binary_op)}, + {"BitwiseOr", CreatorFunction(translate_binary_op)}, + {"BitwiseXor", CreatorFunction(translate_binary_op)}, {"Equal", CreatorFunction(translate_binary_op)}, {"FloorMod", CreatorFunction(translate_binary_op)}, {"Greater", CreatorFunction(translate_binary_op)}, @@ -351,4 +356,4 @@ const std::map get_supported_ops() { } // namespace op } // namespace tensorflow } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/binary_op.cpp b/src/frontends/tensorflow_common/src/op/binary_op.cpp index 67d8dce497ca27..a1a056f048d066 100644 --- a/src/frontends/tensorflow_common/src/op/binary_op.cpp +++ b/src/frontends/tensorflow_common/src/op/binary_op.cpp @@ -3,10 +3,10 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/opsets/opset13.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::opset13; namespace ov { namespace frontend { @@ -37,6 +37,9 @@ OutputVector translate_binary_op(const NodeContext& node) { } template OutputVector translate_binary_op(const NodeContext& node); +template OutputVector translate_binary_op(const NodeContext& node); +template OutputVector translate_binary_op(const NodeContext& node); +template OutputVector translate_binary_op(const NodeContext& node); template OutputVector translate_binary_op(const NodeContext& node); template OutputVector translate_binary_op(const NodeContext& node); template OutputVector translate_binary_op(const NodeContext& node); diff --git a/src/frontends/tensorflow_common/src/op/unary_op.cpp b/src/frontends/tensorflow_common/src/op/unary_op.cpp index a1286b35bf4e9a..16c279bd63e9d4 100644 --- a/src/frontends/tensorflow_common/src/op/unary_op.cpp +++ b/src/frontends/tensorflow_common/src/op/unary_op.cpp @@ -10,6 +10,7 @@ #include "openvino/op/asinh.hpp" #include "openvino/op/atan.hpp" #include "openvino/op/atanh.hpp" +#include "openvino/op/bitwise_not.hpp" #include "openvino/op/ceiling.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/cos.hpp" @@ -69,6 +70,7 @@ template OutputVector translate_unary_op(const NodeContext& node); template OutputVector translate_unary_op(const NodeContext& node); template OutputVector translate_unary_op(const NodeContext& node); template OutputVector translate_unary_op(const NodeContext& node); +template OutputVector translate_unary_op(const NodeContext& node); template OutputVector translate_unary_op(const NodeContext& node); template OutputVector translate_unary_op(const NodeContext& node); template OutputVector translate_unary_op(const NodeContext& node); diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py index 7e3964e68c9c35..e542881944ce45 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py @@ -15,6 +15,8 @@ def generate_input(op_type, size): logical_type = ['LogicalAnd', 'LogicalOr', 'LogicalXor'] + bitwise_type = ['BitwiseAnd', 'BitwiseOr', 'BitwiseXor'] + # usual function domain lower = -256 upper = 256 @@ -26,6 +28,8 @@ def generate_input(op_type, size): if op_type in logical_type: return np.random.randint(0, 1, size).astype(bool) + elif op_type in bitwise_type: + return np.random.randint(lower, upper, size).astype(np.int32) elif op_type in narrow_borders: return np.random.uniform(lower, upper, size).astype(np.float32) else: @@ -79,13 +83,18 @@ def create_add_placeholder_const_net(self, x_shape, y_shape, ir_version, op_type 'FloorMod': tf.math.floormod, 'FloorDiv': tf.math.floordiv, 'Xdivy': tf.raw_ops.Xdivy, + 'BitwiseAnd': tf.raw_ops.BitwiseAnd, + 'BitwiseOr': tf.raw_ops.BitwiseOr, + 'BitwiseXor': tf.raw_ops.BitwiseXor, } - op_type_kw_args = [ 'AddV2', 'Xdivy' ] + op_type_kw_args = ["AddV2", "Xdivy", "BitwiseAnd", "BitwiseOr", "BitwiseXor"] type = np.float32 if op_type in ["LogicalAnd", "LogicalOr", "LogicalXor"]: type = bool + elif op_type in ["BitwiseAnd", "BitwiseOr", "BitwiseXor"]: + type = np.int32 tf.compat.v1.reset_default_graph() # Create the graph and model with tf.compat.v1.Session() as sess: @@ -125,13 +134,15 @@ def create_add_placeholder_const_net(self, x_shape, y_shape, ir_version, op_type 'Equal', 'NotEqual', 'Mod', 'Greater', 'GreaterEqual', 'Less', 'LessEqual', 'LogicalAnd', 'LogicalOr', 'LogicalXor', 'FloorMod', 'FloorDiv', - 'Xdivy']) + 'Xdivy', 'BitwiseAnd', 'BitwiseOr', 'BitwiseXor',]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122716') def test_binary_op(self, params, ie_device, precision, ir_version, temp_dir, op_type, use_new_frontend, use_old_api): + if not use_new_frontend and op_type in ['BitwiseAnd', 'BitwiseOr', 'BitwiseXor']: + pytest.skip("Bitwise ops are supported only by new TF FE.") if precision == "FP16": pytest.skip("BinaryOps tests are skipped with FP16 precision." "They don't pass accuracy checks because chaotic output.") diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py index 91ed0a72db0bcf..e0b30f5fb17c0d 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOps.py @@ -19,6 +19,7 @@ def _prepare_input(self, inputs_dict): from_one = ['Acosh'] logical_type = ['LogicalNot'] + bitwise_type = ["BitwiseNot"] # usual function domain lower = -256 @@ -39,6 +40,8 @@ def _prepare_input(self, inputs_dict): for input in inputs_dict.keys(): if self.current_op_type in logical_type: inputs_dict[input] = np.random.randint(0, 1, inputs_dict[input]).astype(bool) + elif self.current_op_type in bitwise_type: + inputs_dict[input] = np.random.randint(lower, upper, inputs_dict[input]).astype(np.int32) else: inputs_dict[input] = np.random.uniform(lower, upper, inputs_dict[input]).astype( np.float32) @@ -77,6 +80,7 @@ def create_net_with_unary_op(self, shape, ir_version, op_type, use_new_frontend) 'Asinh': tf.math.asinh, 'Atan': tf.math.atan, 'Atanh': tf.math.atanh, + 'BitwiseNot': tf.bitwise.invert, 'Ceiling': tf.math.ceil, 'Cos': tf.math.cos, 'Cosh': tf.math.cosh, @@ -105,6 +109,8 @@ def create_net_with_unary_op(self, shape, ir_version, op_type, use_new_frontend) type = tf.float32 if op_type == "LogicalNot": type = tf.bool + elif op_type == "BitwiseNot": + type = tf.int32 # Create the graph and model with tf.compat.v1.Session() as sess: tf_x_shape = shape.copy() @@ -152,10 +158,13 @@ def create_net_with_unary_op(self, shape, ir_version, op_type, use_new_frontend) 'LogicalNot', 'Square', 'Erf', + 'BitwiseNot' ]) @pytest.mark.precommit def test_unary_op_precommit(self, params, ie_device, precision, ir_version, temp_dir, op_type, use_new_frontend, use_old_api): + if not use_new_frontend and op_type in ['BitwiseNot']: + pytest.skip("Bitwise ops are supported only by new TF FE.") if ie_device == 'GPU': pytest.skip("5D tensors is not supported on GPU") self._test(*self.create_net_with_unary_op(**params, ir_version=ir_version, op_type=op_type, @@ -211,12 +220,15 @@ def test_unary_op_mish_precommit(self, params, ie_device, precision, ir_version, 'Asinh', 'Square', 'Erf', - 'Selu' + 'Selu', + 'BitwiseNot' ]) @pytest.mark.nightly @pytest.mark.skipif(sys.platform == 'darwin', reason="Ticket - 122182") def test_unary_op(self, params, ie_device, precision, ir_version, temp_dir, op_type, use_new_frontend, use_old_api): + if not use_new_frontend and op_type in ['BitwiseNot']: + pytest.skip("Bitwise ops are supported only by new TF FE.") if ie_device == 'GPU': pytest.skip("5D tensors is not supported on GPU") self._test(*self.create_net_with_unary_op(**params, ir_version=ir_version, op_type=op_type, From 246410b0dd28430a69056e89b9c53d08d752cec3 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 31 Oct 2023 01:01:27 +0100 Subject: [PATCH 138/275] [core]Migrate NotEqual operator to new API (#20648) * Migrate NotEqual operator to new API * Remove `visit_attributes` is same as base --------- Co-authored-by: Michal Lukaszewski --- src/core/include/openvino/op/not_equal.hpp | 5 +- .../include/openvino/reference/not_equal.hpp | 25 ++-- src/core/src/op/not_equal.cpp | 118 ++++++++---------- 3 files changed, 63 insertions(+), 85 deletions(-) diff --git a/src/core/include/openvino/op/not_equal.hpp b/src/core/include/openvino/op/not_equal.hpp index dfae8b59a8fd0e..371d6d7cafd3df 100644 --- a/src/core/include/openvino/op/not_equal.hpp +++ b/src/core/include/openvino/op/not_equal.hpp @@ -27,11 +27,8 @@ class OPENVINO_API NotEqual : public util::BinaryElementwiseComparison { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; - bool visit_attributes(AttributeVisitor& visitor) override; }; } // namespace v1 } // namespace op diff --git a/src/core/reference/include/openvino/reference/not_equal.hpp b/src/core/reference/include/openvino/reference/not_equal.hpp index b6b5c1a348476d..eb71674f32e782 100644 --- a/src/core/reference/include/openvino/reference/not_equal.hpp +++ b/src/core/reference/include/openvino/reference/not_equal.hpp @@ -4,19 +4,22 @@ #pragma once -#if defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wfloat-equal" -#endif - #include +#include -#include "openvino/core/shape.hpp" -#include "openvino/op/util/attr_types.hpp" #include "openvino/reference/autobroadcast_binop.hpp" namespace ov { namespace reference { +// Use custom implementation as function instead std::not_equal_to functor, gives smaller binary size. +// If removed or replace check impact on library binary size. +namespace func { +template +bool not_equal(const T lhs, const T rhs) { + return lhs != rhs; +} +} // namespace func + template void not_equal(const T* arg0, const T* arg1, @@ -24,13 +27,7 @@ void not_equal(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> U { - return static_cast(x != y); - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::not_equal); } } // namespace reference } // namespace ov - -#if defined(__GNUC__) -# pragma GCC diagnostic pop -#endif diff --git a/src/core/src/op/not_equal.cpp b/src/core/src/op/not_equal.cpp index 68da0abeaa6e0e..55c0f5a3d9fa44 100644 --- a/src/core/src/op/not_equal.cpp +++ b/src/core/src/op/not_equal.cpp @@ -2,95 +2,79 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/not_equal.hpp" +#include "openvino/op/not_equal.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/reference/not_equal.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace not_equalop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::not_equal(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), +namespace ov { +namespace op { +namespace not_equal { +struct Evaluate : element::NoAction { + using element::NoAction::visit; + template > + static result_type visit(const Tensor& in0, + const Tensor& in1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::not_equal(in0.data(), + in1.data(), + out.data>(), + shape0, + shape1, broadcast_spec); - return true; -} - -bool evaluate_not_equal(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_not_equal, boolean, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_not_equal, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_not_equal, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_not_equal, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_not_equal, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_not_equal, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_not_equal, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; + return true; } - return rc; -} -} // namespace -} // namespace not_equalop +}; +} // namespace not_equal // ----------------------------------- v1 -------------------------------------- -op::v1::NotEqual::NotEqual(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +NotEqual::NotEqual(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseComparison(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::NotEqual::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr NotEqual::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_NotEqual_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::NotEqual::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool NotEqual::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_NotEqual_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2)); - OPENVINO_SUPPRESS_DEPRECATED_END - return not_equalop::evaluate_not_equal(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::NotEqual::has_evaluate() const { +bool NotEqual::has_evaluate() const { OV_OP_SCOPE(v1_NotEqual_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; -} - -bool op::v1::NotEqual::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v1_NotEqual_visit_attributes); - BinaryElementwiseComparison::visit_attributes(visitor); - return true; } +} // namespace v1 +} // namespace op +} // namespace ov From 0076f7fc0086a5e5689a586f82d58498e2a9978a Mon Sep 17 00:00:00 2001 From: Xiuchuan Zhai Date: Tue, 31 Oct 2023 11:17:38 +0800 Subject: [PATCH 139/275] [PD FE] loading weight from ov::tensor (#20044) * fix paddle load model from memory * fix coding style * ignore the deprecated api * fix a istream bug; add test case * simplify func variant_to_stream_ptr * restore the previous impl for less memory affect * fix memory leak --- src/frontends/paddle/src/frontend.cpp | 40 ++++++++++---- .../paddle/tests/read_paddle_model_test.cpp | 54 +++++++++++++++++++ 2 files changed, 83 insertions(+), 11 deletions(-) diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index 9582fccf6c447f..8caf37968db985 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -135,22 +135,32 @@ bool normalize_framework_node(const std::shared_ptr& node, return true; } -std::istream* variant_to_stream_ptr(const ov::Any& variant, std::ifstream& ext_stream) { +OPENVINO_SUPPRESS_DEPRECATED_START +std::istream* variant_to_stream_ptr(const ov::Any& variant, std::fstream& fs, std::stringstream& ss) { if (variant.is()) { return variant.as(); + } else if (variant.is>()) { + auto& aligned_weights_buffer = variant.as>(); + ss.write(aligned_weights_buffer->get_ptr(), aligned_weights_buffer->size()); + FRONT_END_INITIALIZATION_CHECK(ss && ss.good(), "Cannot open ov::tensor."); + return &ss; } else if (variant.is()) { const auto& model_path = variant.as(); - ext_stream.open(model_path, std::ios::in | std::ifstream::binary); + fs.open(model_path, std::ios::in | std::ifstream::binary); + FRONT_END_INITIALIZATION_CHECK(fs && fs.is_open(), "Cannot open model file."); + return &fs; } #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) else if (variant.is()) { const auto& model_path = variant.as(); - ext_stream.open(model_path.c_str(), std::ios::in | std::ifstream::binary); + fs.open(model_path.c_str(), std::ios::in | std::ifstream::binary); + FRONT_END_INITIALIZATION_CHECK(fs && fs.is_open(), "Cannot open model file."); + return &fs; } #endif - FRONT_END_INITIALIZATION_CHECK(ext_stream && ext_stream.is_open(), "Cannot open model file."); - return &ext_stream; + return nullptr; } +OPENVINO_SUPPRESS_DEPRECATED_END } // namespace FrontEnd::FrontEnd() : m_op_translators(paddle::get_supported_ops()) {} @@ -392,9 +402,17 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { #endif else if (variants[0].is()) { // Validating first stream, it must contain a model - auto p_model_stream = variants[0].as(); + // step 1: + // PDPD API ParseFromIstream always deconstructs the context in model stream. + // So, make a copy for variants[0] to avoid breaking the context in variants[0]. + const auto p_model_stream = variants[0].as(); + std::istream copy_model_stream(p_model_stream->rdbuf()); ::paddle::framework::proto::ProgramDesc fw; - return fw.ParseFromIstream(p_model_stream); + auto ret = fw.ParseFromIstream(©_model_stream); + // step 2: + // reset the stream position to the beginning. + p_model_stream->seekg(0, p_model_stream->beg); + return ret; } return false; } @@ -422,10 +440,10 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const } } else if (variants.size() == 2 + extra_variants_num) { // The case when .pdmodel and .pdparams files are provided - std::ifstream model_stream; - std::ifstream weights_stream; - std::istream* p_model_stream = paddle::variant_to_stream_ptr(variants[0], model_stream); - std::istream* p_weights_stream = paddle::variant_to_stream_ptr(variants[1], weights_stream); + std::fstream model_fstream, weights_fstream; + std::stringstream model_sstream, weights_sstream; + std::istream* p_model_stream = paddle::variant_to_stream_ptr(variants[0], model_fstream, model_sstream); + std::istream* p_weights_stream = paddle::variant_to_stream_ptr(variants[1], weights_fstream, weights_sstream); if (p_model_stream && p_weights_stream) { return std::make_shared(std::vector{p_model_stream, p_weights_stream}, m_telemetry); diff --git a/src/frontends/paddle/tests/read_paddle_model_test.cpp b/src/frontends/paddle/tests/read_paddle_model_test.cpp index 1095c2b8f98110..b362566f52af11 100644 --- a/src/frontends/paddle/tests/read_paddle_model_test.cpp +++ b/src/frontends/paddle/tests/read_paddle_model_test.cpp @@ -16,8 +16,62 @@ #include "frontend/shared/include/utils.hpp" #include "openvino/openvino.hpp" #include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset8.hpp" #include "openvino/pass/serialize.hpp" +TEST(Paddle_Reader_Tests, LoadModelMemoryToCore) { + auto model = std::string(TEST_PADDLE_MODELS_DIRNAME) + "conv2d_relu/conv2d_relu.pdmodel"; + auto param = std::string(TEST_PADDLE_MODELS_DIRNAME) + "conv2d_relu/conv2d_relu.pdiparams"; + + ov::Core core; + auto read_file = [&](const std::string& file_name, size_t& size) { + FILE* sFile = fopen(file_name.c_str(), "r"); + fseek(sFile, 0, SEEK_END); + size = ftell(sFile); + uint8_t* ss = (uint8_t*)malloc(size); + rewind(sFile); + const size_t length = fread(&ss[0], 1, size, sFile); + if (size != length) { + std::cerr << "file size is not correct\n"; + } + fclose(sFile); + return ss; + }; + + size_t xml_size, bin_size; + auto xml_ptr = read_file(model, xml_size); + auto bin_ptr = read_file(param, bin_size); + ov::Tensor weight_tensor = ov::Tensor(ov::element::u8, {1, bin_size}, bin_ptr); + std::string model_str = std::string((char*)xml_ptr, xml_size); + auto function = core.read_model(model_str, weight_tensor); + + const auto inputType = ov::element::f32; + const auto inputShape = ov::Shape{1, 3, 4, 4}; + const auto data = std::make_shared(inputType, inputShape); + data->set_friendly_name("xxx"); + data->output(0).get_tensor().add_names({"xxx"}); + const auto weight = std::make_shared(ov::element::f32, ov::Shape{5, 3, 1, 1}, 1.0); + const auto conv2d = std::make_shared(data->output(0), + weight->output(0), + ov::Strides({1, 1}), + ov::CoordinateDiff({1, 1}), + ov::CoordinateDiff({1, 1}), + ov::Strides({1, 1})); + conv2d->set_friendly_name("conv2d_0.tmp_0"); + conv2d->output(0).get_tensor().add_names({"conv2d_0.tmp_0"}); + const auto relu = std::make_shared(conv2d->output(0)); + relu->set_friendly_name("relu_0.tmp_0"); + relu->output(0).get_tensor().add_names({"relu_0.tmp_0"}); + const auto result = std::make_shared(relu->output(0)); + result->set_friendly_name("relu_0.tmp_0/Result"); + const auto reference = std::make_shared(ov::NodeVector{result}, ov::ParameterVector{data}, "Model0"); + const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::NONE); + const FunctionsComparator::Result res = func_comparator(function, reference); + ASSERT_TRUE(res.valid) << res.message; + free(xml_ptr); + free(bin_ptr); +} + TEST(Paddle_Reader_Tests, ImportBasicModelToCore) { auto model = std::string(TEST_PADDLE_MODELS_DIRNAME) + "relu/relu.pdmodel"; From fc4fe07a0e49b008243cab82ad54912275baef54 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 31 Oct 2023 08:51:18 +0400 Subject: [PATCH 140/275] [TF FE] Fix CTCLoss translator (#20775) * Fix CTCLoss translator Signed-off-by: Kazantsev, Roman * Expend layer tests for CTCLoss --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow_common/src/op/ctc_loss.cpp | 2 +- .../tensorflow_tests/test_tf_CTCLoss.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/frontends/tensorflow_common/src/op/ctc_loss.cpp b/src/frontends/tensorflow_common/src/op/ctc_loss.cpp index 1abba8801f2c64..8679379b1c72e3 100644 --- a/src/frontends/tensorflow_common/src/op/ctc_loss.cpp +++ b/src/frontends/tensorflow_common/src/op/ctc_loss.cpp @@ -36,7 +36,7 @@ OutputVector translate_ctc_loss_op(const NodeContext& node) { // retrieve all attributes for CTCLoss auto preprocess_collapse_repeated = node.get_attribute("preprocess_collapse_repeated", false); - auto ctc_merge_repeated = node.get_attribute("preprocess_collapse_repeated", true); + auto ctc_merge_repeated = node.get_attribute("ctc_merge_repeated", true); auto time_major = node.get_attribute("time_major", true); if (time_major) { diff --git a/tests/layer_tests/tensorflow_tests/test_tf_CTCLoss.py b/tests/layer_tests/tensorflow_tests/test_tf_CTCLoss.py index 0a2eae6303386e..805ab3ff52f6fd 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_CTCLoss.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_CTCLoss.py @@ -18,7 +18,7 @@ def _prepare_input(self, inputs_dict): inputs_dict[input] = np.random.randint(0, 5, inputs_dict[input]).astype(np.float32) return inputs_dict - def create_ctcloss_placeholder_const_net(self, inputs, targets): + def create_ctcloss_placeholder_const_net(self, inputs, targets, preprocess_collapse_repeated, ctc_merge_repeated): seq_lens = np.array([inputs[2]], dtype=np.int32) x = [targets] @@ -36,7 +36,9 @@ def create_ctcloss_placeholder_const_net(self, inputs, targets): tf_inputs = tf.compat.v1.placeholder(tf.float32, inputs, "inputs") ctc_loss = tf.raw_ops.CTCLoss(inputs=tf_inputs, labels_indices=indices, labels_values=vals, - sequence_length=seq_lens) + sequence_length=seq_lens, + preprocess_collapse_repeated=preprocess_collapse_repeated, + ctc_merge_repeated=ctc_merge_repeated) # compute exponent since CTCLoss value is -ln(prob) tf.math.exp(-ctc_loss[0]) @@ -54,11 +56,16 @@ def create_ctcloss_placeholder_const_net(self, inputs, targets): ] @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("preprocess_collapse_repeated", [True, False, None]) + @pytest.mark.parametrize("ctc_merge_repeated", [True, False, None]) @pytest.mark.precommit_tf_fe @pytest.mark.nightly @pytest.mark.skipif(platform == 'darwin', reason="Ticket - 122182") - def test_ctcloss_placeholder_const(self, params, ie_device, precision, ir_version, temp_dir, + def test_ctcloss_placeholder_const(self, params, preprocess_collapse_repeated, ctc_merge_repeated, + ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): - self._test(*self.create_ctcloss_placeholder_const_net(**params), + self._test(*self.create_ctcloss_placeholder_const_net(**params, + preprocess_collapse_repeated=preprocess_collapse_repeated, + ctc_merge_repeated=ctc_merge_repeated), ie_device, precision, ir_version, temp_dir=temp_dir, - use_new_frontend=use_new_frontend, use_old_api=use_old_api, custom_eps=1e-2) + use_new_frontend=use_new_frontend, use_old_api=use_old_api) From 50b6c5f0d707093000eb928f68f0983391816056 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Tue, 31 Oct 2023 09:02:03 +0400 Subject: [PATCH 141/275] [GPU] Fix for SoftmaxKernel_bf in dynamic case (#20769) --- .../kernels/softmax/softmax_kernel_bf.cpp | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp index d5304e4c78436e..c3e8f267c408ec 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp @@ -113,16 +113,26 @@ JitConstants SoftmaxKernel_bf::GetJitConstants(const softmax_params& params, Dis const auto& input = params.inputs[0]; DimensionAccessHelper dims(input); auto softmax_dim_y_bfyx = (params.dim == SoftmaxDim::Y && input.GetLayout() == DataLayout::bfyx); - const std::string flatten_bf = "(SOFTMAX_DIM_Y_BFYX&&(" + dims.f() + ">1))"; + auto softmax_dim_x_bfyx = (params.dim == SoftmaxDim::X && input.GetLayout() == DataLayout::bfyx); const std::string lws_0 = "get_local_size(0)"; - const std::string data_set_count = "(FLATTEN_BF?" + toVectorMulString({dims.f(), dims.b()}) + ":" + dims.b() + ")"; - const std::string data_set_size = "(FLATTEN_BF?" + dims.y() + ":" + toVectorMulString({dims.x(), dims.y(), dims.z(), dims.f()}) + ")"; + + std::string data_set_count; + std::string data_set_size; + if (softmax_dim_y_bfyx) { + data_set_count = toVectorMulString({dims.f(), dims.b()}); + data_set_size = dims.y(); + } else if (softmax_dim_x_bfyx) { + data_set_count = toVectorMulString({dims.f(), dims.b(), dims.y()}); + data_set_size = dims.x(); + } else { + data_set_count = dims.b(); + data_set_size = toVectorMulString({dims.x(), dims.y(), dims.z(), dims.f()}); + } + // It can be expected that the maximum possible itemsNum will not exceed 32 // Therefore, in dynamic shape, stack_size including additional buffer is set to 33 constexpr size_t stack_size = 33; // The size of stack for my_chunk jit.AddConstants({ - MakeJitConstant("SOFTMAX_DIM_Y_BFYX", softmax_dim_y_bfyx), - MakeJitConstant("FLATTEN_BF", flatten_bf), MakeJitConstant("LWS", lws_0), MakeJitConstant("SLM_SIZE", dispatchData.maxSlmSize), MakeJitConstant("DATA_SETS_COUNT", data_set_count), From 78f11965d877a04d9718ccbafccdbba32e97d8f1 Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Tue, 31 Oct 2023 13:23:58 +0800 Subject: [PATCH 142/275] [CPU] Fix user setting num_streams=0 (#20469) --- src/plugins/intel_cpu/src/plugin.cpp | 4 +++- .../ov_executable_network/properties.cpp | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index d2dd2b0eda08ce..b785dd2e755c0b 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -325,7 +325,9 @@ void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr Date: Tue, 31 Oct 2023 09:34:27 +0400 Subject: [PATCH 143/275] [CPU] Sparse weights decompression feature: changed comp_tile_len data type from int16_t to int (#20707) --- .../tests/functional/single_layer_tests/matmul_sparse.cpp | 3 ++- src/plugins/intel_cpu/thirdparty/onednn | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp index 209efbe1864819..cf62975e3fd14a 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp @@ -104,7 +104,7 @@ class MatMulSparseCPUTest : public testing::WithParamInterface dist_f(0.f, 1.f); - int countZero = 0; + size_t countZero = 0; res[0] = startFrom; res[vec_len - 1] = upTo; @@ -259,6 +259,7 @@ const std::vector IS2D_sparse_smoke = { }, {false, true} }, + {static_shapes_to_test_representation({{1, 4096}, {4096, 16384}}), {false, true}}, }; const auto testParams2D_i8_smoke = ::testing::Combine(::testing::ValuesIn(IS2D_sparse_smoke), diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index ff9205a8b42238..2ead5d4fe5993a 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit ff9205a8b42238e1fba992fad2429b722c4cfed0 +Subproject commit 2ead5d4fe5993a797d9a7a4b8b5557b96f6ec90e From 44925b580fcbd35c9d81778f32fde4d5e547de56 Mon Sep 17 00:00:00 2001 From: Andrei Gorbachev Date: Tue, 31 Oct 2023 05:50:20 +0000 Subject: [PATCH 144/275] [GPU] Refactor Comparison, Conversion, ConvertColorI420 (#20374) * Comparison * Conversion * ConvertColorI420 * fix * Comparison * Conversion * ConvertColorI420 * fix * fix after review * fix after review * fix after review * fixed comparison --------- Co-authored-by: Pavel Durandin --- .../single_layer_tests/comparison.cpp | 75 ++++++++++--------- .../single_layer_tests/conversion.cpp | 27 +++---- .../single_layer_tests/convert_color_i420.cpp | 66 +++++++++++++--- 3 files changed, 107 insertions(+), 61 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp index 8534f1bce523fb..8977a88cca667e 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp @@ -2,18 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/comparison.hpp" - -#include - +#include "single_op_tests/comparison.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace LayerTestsDefinitions::ComparisonParams; - namespace { +using ov::test::ComparisonLayerTest; -const std::map, std::vector>> inputShapes = { +std::map> input_shapes_combinations = { {{1}, {{1}, {17}, {1, 1}, {2, 18}, {1, 1, 2}, {2, 2, 3}, {1, 1, 2, 3}}}, {{5}, {{1}, {1, 1}, {2, 5}, {1, 1, 1}, {2, 2, 5}}}, {{2, 200}, {{1}, {200}, {1, 200}, {2, 200}, {2, 2, 200}}}, @@ -22,39 +17,52 @@ const std::map, std::vector>> inputShape {{2, 1, 1, 3, 1}, {{1}, {1, 3, 4}, {2, 1, 3, 4}, {1, 1, 1, 1, 1}}}, }; -const std::vector inputsPrecisions = { - InferenceEngine::Precision::FP32, +auto input_shapes_pair_vector = ov::test::utils::combineParams(input_shapes_combinations); + +auto converter = [] (const std::vector>& shapes) { + std::vector> result; + for (const auto& shape : shapes) { + result.push_back({shape.first, shape.second}); + } + return result; +}; + +auto input_shapes_static = converter(input_shapes_pair_vector); + +std::vector model_type = { + ov::element::f32, + ov::element::f16, + ov::element::i32, + ov::element::boolean, }; -const std::vector comparisonOpTypes = { - ngraph::helpers::ComparisonTypes::EQUAL, - ngraph::helpers::ComparisonTypes::NOT_EQUAL, - ngraph::helpers::ComparisonTypes::GREATER, - ngraph::helpers::ComparisonTypes::GREATER_EQUAL, - ngraph::helpers::ComparisonTypes::LESS, - ngraph::helpers::ComparisonTypes::LESS_EQUAL, +const std::vector comparisonOpTypes = { + ov::test::utils::ComparisonTypes::EQUAL, + ov::test::utils::ComparisonTypes::NOT_EQUAL, + ov::test::utils::ComparisonTypes::GREATER, + ov::test::utils::ComparisonTypes::GREATER_EQUAL, + ov::test::utils::ComparisonTypes::LESS, + ov::test::utils::ComparisonTypes::LESS_EQUAL, }; -const std::vector secondInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER, +const std::vector secondInputTypes = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER, }; const std::map additional_config = {}; INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, ComparisonLayerTest, - ::testing::Combine(::testing::ValuesIn(ov::test::utils::combineParams(inputShapes)), - ::testing::ValuesIn(inputsPrecisions), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::ValuesIn(comparisonOpTypes), ::testing::ValuesIn(secondInputTypes), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::ValuesIn(model_type), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), ComparisonLayerTest::getTestCaseName); -const std::vector inputShapesIsOps = { +const std::vector> inputShapesIsOps = { {{5}, {1}}, {{2, 2}, {1}}, {{2, 2, 2}, {1}}, @@ -63,20 +71,17 @@ const std::vector inputShapesIsOps = { {{2, 17, 3, 4, 8, 2}, {1}}, }; -const std::vector inputsPrecisionsIsOps = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +std::vector comparisonOpTypesIs = { + ov::test::utils::ComparisonTypes::IS_FINITE, + ov::test::utils::ComparisonTypes::IS_NAN }; INSTANTIATE_TEST_SUITE_P(smoke_IsOp, ComparisonLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapesIsOps), - ::testing::ValuesIn(inputsPrecisionsIsOps), - ::testing::Values(ngraph::helpers::ComparisonTypes::IS_FINITE, - ngraph::helpers::ComparisonTypes::IS_NAN), - ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::BOOL), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesIsOps)), + ::testing::ValuesIn(comparisonOpTypesIs), + ::testing::Values(ov::test::utils::InputLayerType::CONSTANT), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::Values(additional_config)), ComparisonLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp index b0faddf52f0a95..04fe768d07057d 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/conversion.cpp @@ -4,34 +4,31 @@ #include -#include "single_layer_tests/conversion.hpp" +#include "single_op_tests/conversion.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector conversionOpTypes = { - ngraph::helpers::ConversionTypes::CONVERT, - ngraph::helpers::ConversionTypes::CONVERT_LIKE, +using ov::test::ConversionLayerTest; +const std::vector conversionOpTypes = { + ov::test::utils::ConversionTypes::CONVERT, + ov::test::utils::ConversionTypes::CONVERT_LIKE, }; -const std::vector> inShape = {{1, 2, 3, 4}}; +const std::vector> inShape = {{{1, 2, 3, 4}}}; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I8, +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::u8, + ov::element::i8, }; INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, ConversionLayerTest, ::testing::Combine( ::testing::ValuesIn(conversionOpTypes), - ::testing::Values(inShape), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShape)), ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConversionLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_i420.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_i420.cpp index 182043ee90c76a..225d7cb67c057e 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_i420.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convert_color_i420.cpp @@ -4,42 +4,86 @@ #include -#include "single_layer_tests/convert_color_i420.hpp" +#include "single_op_tests/convert_color_i420.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::ConvertColorI420LayerTest; const std::vector inShapes_nhwc = { {1, 10, 10, 1} }; const std::vector inTypes = { - ov::element::u8, ov::element::f32 + ov::element::u8, + ov::element::f32 +}; + +auto generate_input_static_shapes = [] (const std::vector& original_shapes, bool single_plane) { + std::vector> result_shapes; + for (const auto& original_shape : original_shapes) { + std::vector one_result_shapes; + if (single_plane) { + auto shape = original_shape; + shape[1] = shape[1] * 3 / 2; + one_result_shapes.push_back(shape); + } else { + auto shape = original_shape; + one_result_shapes.push_back(shape); + auto uvShape = ov::Shape{shape[0], shape[1] / 2, shape[2] / 2, 1}; + one_result_shapes.push_back(uvShape); + one_result_shapes.push_back(uvShape); + } + result_shapes.push_back(one_result_shapes); + } + return result_shapes; }; -INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420, +auto in_shapes_single_plane_static = generate_input_static_shapes(inShapes_nhwc, true); +auto in_shapes_three_planes_static = generate_input_static_shapes(inShapes_nhwc, false); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420SinglePlane, ConvertColorI420LayerTest, - ::testing::Combine(::testing::ValuesIn(inShapes_nhwc), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_single_plane_static)), ::testing::ValuesIn(inTypes), ::testing::Bool(), + ::testing::Values(true), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + ConvertColorI420LayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420ThreePlanes, + ConvertColorI420LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(in_shapes_three_planes_static)), + ::testing::ValuesIn(inTypes), ::testing::Bool(), + ::testing::Values(false), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvertColorI420LayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420_acc, - ConvertColorI420AccuracyTest, - ::testing::Combine(::testing::Values(ov::Shape{1, 16 * 6, 16, 1}), +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420SinglePlane_acc, + ConvertColorI420LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + generate_input_static_shapes({{1, 16 * 6, 16, 1}}, true))), ::testing::Values(ov::element::u8), ::testing::Bool(), + ::testing::Values(true), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + ConvertColorI420LayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420ThreePlanes_acc, + ConvertColorI420LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + generate_input_static_shapes({{1, 16 * 6, 16, 1}}, false))), + ::testing::Values(ov::element::u8), ::testing::Bool(), + ::testing::Values(false), ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvertColorI420LayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorI420_acc, - ConvertColorI420AccuracyTest, - ::testing::Combine(::testing::Values(ov::Shape{1, 256 * 256, 256, 1}), + ConvertColorI420LayerTest, + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + generate_input_static_shapes({{1, 256 * 256, 256, 1}}, true))), ::testing::Values(ov::element::u8), ::testing::Values(false), ::testing::Values(true), From 9e97d7ef1ad30200c5ca404697a081e4d413a65e Mon Sep 17 00:00:00 2001 From: River Li Date: Tue, 31 Oct 2023 13:53:47 +0800 Subject: [PATCH 145/275] [C-API] add ov_get_last_error_msg() API (#20643) --- src/bindings/c/include/openvino/c/ov_common.h | 7 ++++ src/bindings/c/src/common.h | 12 ++++--- src/bindings/c/src/ov_core.cpp | 20 +++++++++++ src/bindings/c/tests/ov_core_test.cpp | 34 +++++++++++++++++++ 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/src/bindings/c/include/openvino/c/ov_common.h b/src/bindings/c/include/openvino/c/ov_common.h index faf9369050c839..bbbf3dd35c2db1 100644 --- a/src/bindings/c/include/openvino/c/ov_common.h +++ b/src/bindings/c/include/openvino/c/ov_common.h @@ -204,3 +204,10 @@ ov_get_error_info(ov_status_e status); */ OPENVINO_C_API(void) ov_free(const char* content); + +/** + * @brief Get the last error msg. + * @ingroup ov_base_c_api + */ +OPENVINO_C_API(const char*) +ov_get_last_err_msg(); \ No newline at end of file diff --git a/src/bindings/c/src/common.h b/src/bindings/c/src/common.h index ff2e0bcac07312..50784981d1c430 100644 --- a/src/bindings/c/src/common.h +++ b/src/bindings/c/src/common.h @@ -14,13 +14,15 @@ #include "openvino/core/except.hpp" #include "openvino/openvino.hpp" -#define CATCH_IE_EXCEPTION(StatusCode, ExceptionType) \ - catch (const InferenceEngine::ExceptionType&) { \ - return ov_status_e::StatusCode; \ +#define CATCH_IE_EXCEPTION(StatusCode, ExceptionType) \ + catch (const InferenceEngine::ExceptionType& ex) { \ + dup_last_err_msg(ex.what()); \ + return ov_status_e::StatusCode; \ } #define CATCH_OV_EXCEPTION(StatusCode, ExceptionType) \ - catch (const ov::ExceptionType&) { \ + catch (const ov::ExceptionType& ex) { \ + dup_last_err_msg(ex.what()); \ return ov_status_e::StatusCode; \ } @@ -41,6 +43,7 @@ CATCH_IE_EXCEPTION(NETWORK_NOT_READ, NetworkNotRead) \ CATCH_IE_EXCEPTION(INFER_CANCELLED, InferCancelled) \ catch (...) { \ + dup_last_err_msg("An unknown exception occurred"); \ return ov_status_e::UNKNOW_EXCEPTION; \ } @@ -224,3 +227,4 @@ struct mem_istream : virtual mem_stringbuf, std::istream { char* str_to_char_array(const std::string& str); ov::element::Type get_element_type(ov_element_type_e type); +void dup_last_err_msg(const char* msg); diff --git a/src/bindings/c/src/ov_core.cpp b/src/bindings/c/src/ov_core.cpp index 6e292dc7abf331..2624b93d8d0b28 100644 --- a/src/bindings/c/src/ov_core.cpp +++ b/src/bindings/c/src/ov_core.cpp @@ -17,6 +17,22 @@ char* str_to_char_array(const std::string& str) { return char_array; } +static std::string last_err_msg; +static std::mutex last_msg_mutex; +void dup_last_err_msg(const char* msg) { + std::lock_guard lock(last_msg_mutex); + last_err_msg = std::string(msg); +} + +const char* ov_get_last_err_msg() { + std::lock_guard lock(last_msg_mutex); + char* res = nullptr; + if (!last_err_msg.empty()) { + res = str_to_char_array(last_err_msg); + } + return res; +} + ov_status_e ov_get_openvino_version(ov_version_t* version) { if (!version) { return ov_status_e::INVALID_C_PARAM; @@ -66,6 +82,10 @@ ov_status_e ov_core_create(ov_core_t** core) { void ov_core_free(ov_core_t* core) { if (core) delete core; + + // release err msg buffer, there will be no err msg after core is freed. + std::lock_guard lock(last_msg_mutex); + last_err_msg.clear(); } ov_status_e ov_core_read_model(const ov_core_t* core, diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 5e116e7326d34e..90ed036cec32b4 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -33,6 +33,7 @@ class ov_core_test : public ov_capi_test_base { ov_capi_test_base::TearDown(); } }; + INSTANTIATE_TEST_SUITE_P(ov_core, ov_core_test, ::testing::Values("CPU")); TEST_P(ov_core_test, ov_core_create_with_config) { @@ -699,4 +700,37 @@ TEST_P(ov_core_test, ov_core_compile_model_from_file_unicode) { } #endif +using ov_util_test = ov_core_test; +INSTANTIATE_TEST_SUITE_P(ov_capi_test, ov_util_test, ::testing::Values("CPU")); + +TEST_P(ov_util_test, ov_get_last_err_msg_check) { + auto device_name = GetParam(); + ov_core_t* core = nullptr; + OV_EXPECT_OK(ov_core_create(&core)); + EXPECT_NE(nullptr, core); + + const char* key = ov_property_key_inference_num_threads; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, "abc")); + + char* ret = nullptr; + OV_EXPECT_NOT_OK(ov_core_get_property(core, device_name.c_str(), key, &ret)); + + auto err_msg = ov_get_last_err_msg(); + EXPECT_NE(nullptr, err_msg); + ov_free(err_msg); + ov_free(ret); + ov_core_free(core); +} + +TEST_P(ov_util_test, ov_get_last_err_msg_check_empty_msg) { + auto device_name = GetParam(); + ov_core_t* core = nullptr; + OV_EXPECT_OK(ov_core_create(&core)); + EXPECT_NE(nullptr, core); + + auto err_msg = ov_get_last_err_msg(); + EXPECT_EQ(nullptr, err_msg); + ov_core_free(core); +} + } // namespace From 991b9848fac9f1a15a1fbf827d3243c239dbfd15 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Tue, 31 Oct 2023 10:21:20 +0400 Subject: [PATCH 146/275] `PSROIPooling` and `Proposal` layer tests to API2.0 (#20761) * `PSROIPoolingLayerTest` to API2.0 * `ProposalLayerTest` to API2.0 --- .../single_layer_tests/proposal.cpp | 54 +++---- .../single_layer_tests/psroi_pooling.cpp | 14 +- .../include/single_op_tests/proposal.hpp | 15 ++ .../include/single_op_tests/psroi_pooling.hpp | 15 ++ .../single_op/proposal.hpp | 44 ++++++ .../single_op/psroi_pooling.hpp | 43 ++++++ .../src/single_op/proposal.cpp | 139 ++++++++++++++++++ .../src/single_op/psroi_pooling.cpp | 63 ++++++++ 8 files changed, 353 insertions(+), 34 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/proposal.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/psroi_pooling.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/proposal.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/psroi_pooling.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/proposal.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/psroi_pooling.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/proposal.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/proposal.cpp index 3fdabf89c5db0e..e9cbb9ce682b69 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/proposal.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/proposal.cpp @@ -4,44 +4,44 @@ #include -#include "single_layer_tests/proposal.hpp" +#include "single_op_tests/proposal.hpp" #include "common_test_utils/test_constants.hpp" -using namespace ngraph::helpers; -using namespace LayerTestsDefinitions; +using ov::test::ProposalLayerTest; namespace { /* ============= Proposal ============= */ -const std::vector base_size_ = {16}; -const std::vector pre_nms_topn_ = {100}; -const std::vector post_nms_topn_ = {100}; -const std::vector nms_thresh_ = {0.7f}; -const std::vector min_size_ = {1}; -const std::vector ratio_ = {{1.0f, 2.0f}}; -const std::vector scale_ = {{1.2f, 1.5f}}; -const std::vector clip_before_nms_ = {false}; -const std::vector clip_after_nms_ = {false}; +const std::vector base_size = {16}; +const std::vector pre_nms_topn = {100}; +const std::vector post_nms_topn = {100}; +const std::vector nms_thresh = {0.7f}; +const std::vector min_size = {1}; +const std::vector> ratio = {{1.0f, 2.0f}}; +const std::vector> scale = {{1.2f, 1.5f}}; +const std::vector clip_before_nms = {false}; +const std::vector clip_after_nms = {false}; // empty string corresponds to Caffe framework -const std::vector framework_ = {""}; - -const auto proposalParams = ::testing::Combine( - ::testing::ValuesIn(base_size_), - ::testing::ValuesIn(pre_nms_topn_), - ::testing::ValuesIn(post_nms_topn_), - ::testing::ValuesIn(nms_thresh_), - ::testing::ValuesIn(min_size_), - ::testing::ValuesIn(ratio_), - ::testing::ValuesIn(scale_), - ::testing::ValuesIn(clip_before_nms_), - ::testing::ValuesIn(clip_after_nms_), - ::testing::ValuesIn(framework_) +const std::vector framework = {""}; + +const auto proposal_params = ::testing::Combine( + ::testing::ValuesIn(base_size), + ::testing::ValuesIn(pre_nms_topn), + ::testing::ValuesIn(post_nms_topn), + ::testing::ValuesIn(nms_thresh), + ::testing::ValuesIn(min_size), + ::testing::ValuesIn(ratio), + ::testing::ValuesIn(scale), + ::testing::ValuesIn(clip_before_nms), + ::testing::ValuesIn(clip_after_nms), + ::testing::ValuesIn(framework) ); -INSTANTIATE_TEST_SUITE_P(smoke_Proposal_tests, ProposalLayerTest, +INSTANTIATE_TEST_SUITE_P(proposal_params, ProposalLayerTest, ::testing::Combine( - proposalParams, + proposal_params, + ::testing::Values(ov::element::f16), ::testing::Values(ov::test::utils::DEVICE_CPU)), ProposalLayerTest::getTestCaseName ); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/psroi_pooling.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/psroi_pooling.cpp index 7e8691b3e4ec63..8ff9c1e04f5c38 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/psroi_pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/psroi_pooling.cpp @@ -4,23 +4,23 @@ #include -#include "single_layer_tests/psroi_pooling.hpp" +#include "single_op_tests/psroi_pooling.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::PSROIPoolingLayerTest; -std::vector spatialScales = {1, 0.625}; +std::vector spatial_scales = {1, 0.625}; const auto PSROICases_average = ::testing::Combine( ::testing::Values(std::vector{3, 8, 16, 16}), ::testing::Values(std::vector{10, 5}), ::testing::Values(2), ::testing::Values(2), - ::testing::ValuesIn(spatialScales), + ::testing::ValuesIn(spatial_scales), ::testing::Values(1), ::testing::Values(1), ::testing::Values("average"), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); @@ -32,11 +32,11 @@ const auto PSROICases_bilinear = ::testing::Combine( ::testing::Values(std::vector{10, 5}), ::testing::Values(4), ::testing::Values(3), - ::testing::ValuesIn(spatialScales), + ::testing::ValuesIn(spatial_scales), ::testing::Values(4), ::testing::Values(2), ::testing::Values("bilinear"), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU) ); diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/proposal.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/proposal.hpp new file mode 100644 index 00000000000000..dbdd9641a13acd --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/proposal.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/proposal.hpp" + +namespace ov { +namespace test { +TEST_P(ProposalLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/psroi_pooling.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/psroi_pooling.hpp new file mode 100644 index 00000000000000..a9b57ad3b963ea --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/psroi_pooling.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/psroi_pooling.hpp" + +namespace ov { +namespace test { +TEST_P(PSROIPoolingLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/proposal.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/proposal.hpp new file mode 100644 index 00000000000000..edb4ec08a6d771 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/proposal.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { + +using proposalSpecificParams = std::tuple< + size_t, // base_size + size_t, // pre_nms_topn + size_t, // post_nms_topn + float, // nms_thresh + size_t, // min_size + std::vector, // ratio + std::vector, // scale + bool, // clip_before_nms + bool, // clip_after_nms + std::string // framework +>; +using proposalLayerTestParamsSet = std::tuple< + proposalSpecificParams, + ov::element::Type, + ov::test::TargetDevice +>; + +class ProposalLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/psroi_pooling.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/psroi_pooling.hpp new file mode 100644 index 00000000000000..401748acb61768 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/psroi_pooling.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "ov_models/builders.hpp" +#include "ov_models/utils/ov_helpers.hpp" + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { + +using psroiParams = std::tuple< + std::vector, // Input shape + std::vector, // Coords shape + size_t, // Output_dim + size_t, // group_size + float, // Spatial scale + size_t, // spatial_bins_x + size_t, // spatial_bins_y + std::string, // Mode + ov::element::Type, // Model type + ov::test::TargetDevice // Device name +>; + +class PSROIPoolingLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/proposal.cpp b/src/tests/functional/shared_test_classes/src/single_op/proposal.cpp new file mode 100644 index 00000000000000..1889d22d630594 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/proposal.cpp @@ -0,0 +1,139 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/proposal.hpp" + +namespace ov { +namespace test { + +const bool normalize = true; +const size_t feat_stride = 1; +const float box_size_scale = 2.0f; +const float box_coordinate_scale = 2.0f; + +// std::string ProposalLayerTest::SerializeProposalSpecificParams(proposalSpecificParams& params) { +// size_t base_size; +// size_t pre_nms_topn; +// size_t post_nms_topn; +// float nms_thresh; +// size_t min_size; +// std::vector ratio; +// std::vector scale; +// bool clip_before_nms; +// bool clip_after_nms; +// std::string framework; +// std::tie(base_size, pre_nms_topn, +// post_nms_topn, +// nms_thresh, +// min_size, +// ratio, +// scale, +// clip_before_nms, +// clip_after_nms, +// framework) = params; + +// std::ostringstream result; +// result << "base_size=" << base_size << "_"; +// result << "pre_nms_topn=" << pre_nms_topn << "_"; +// result << "post_nms_topn=" << post_nms_topn << "_"; +// result << "nms_thresh=" << nms_thresh << "_"; +// result << "feat_stride=" << feat_stride << "_"; +// result << "min_size=" << min_size << "_"; +// result << "ratio = " << ov::test::utils::vec2str(ratio) << "_"; +// result << "scale = " << ov::test::utils::vec2str(scale) << "_"; +// result << "clip_before_nms=" << clip_before_nms << "_"; +// result << "clip_after_nms=" << clip_after_nms << "_"; +// result << "normalize=" << normalize << "_"; +// result << "box_size_scale=" << box_size_scale << "_"; +// result << "box_coordinate_scale=" << box_coordinate_scale << "_"; +// result << "framework=" << framework << "_"; + +// return result.str(); +// } + +std::string ProposalLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + proposalSpecificParams proposal_params; + ov::element::Type model_type; + std::string target_device; + std::tie(proposal_params, model_type, target_device) = obj.param; + size_t base_size, pre_nms_topn, post_nms_topn, min_size; + float nms_thresh; + std::vector ratio, scale; + bool clip_before_nms, clip_after_nms; + std::string framework; + + std::tie(base_size, pre_nms_topn, post_nms_topn, + nms_thresh, min_size, ratio, scale, + clip_before_nms, clip_after_nms, framework) = proposal_params; + + std::ostringstream result; + result << "base_size=" << base_size << "_"; + result << "pre_nms_topn=" << pre_nms_topn << "_"; + result << "post_nms_topn=" << post_nms_topn << "_"; + result << "nms_thresh=" << nms_thresh << "_"; + result << "feat_stride=" << feat_stride << "_"; + result << "min_size=" << min_size << "_"; + result << "ratio = " << ov::test::utils::vec2str(ratio) << "_"; + result << "scale = " << ov::test::utils::vec2str(scale) << "_"; + result << "clip_before_nms=" << clip_before_nms << "_"; + result << "clip_after_nms=" << clip_after_nms << "_"; + result << "normalize=" << normalize << "_"; + result << "box_size_scale=" << box_size_scale << "_"; + result << "box_coordinate_scale=" << box_coordinate_scale << "_"; + result << "framework=" << framework << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ProposalLayerTest::SetUp() { + std::vector img_info = {225.0f, 225.0f, 1.0f}; + + proposalSpecificParams proposal_params; + ov::element::Type model_type; + + std::tie(proposal_params, model_type, targetDevice) = this->GetParam(); + size_t base_size, pre_nms_topn, post_nms_topn, min_size; + float nms_thresh; + std::vector ratio, scale; + bool clip_before_nms, clip_after_nms; + std::string framework; + + std::tie(base_size, pre_nms_topn, post_nms_topn, + nms_thresh, min_size, ratio, scale, + clip_before_nms, clip_after_nms, framework) = proposal_params; + + + size_t bottom_w = base_size; + size_t bottom_h = base_size; + size_t num_anchors = ratio.size() * scale.size(); + + ov::Shape scores_shape = {1, 2 * num_anchors, bottom_h, bottom_w}; + ov::Shape boxes_shape = {1, 4 * num_anchors, bottom_h, bottom_w}; + ov::ParameterVector params{std::make_shared(model_type, scores_shape), + std::make_shared(model_type, boxes_shape)}; + + ov::op::v4::Proposal::Attributes attrs; + attrs.base_size = base_size; + attrs.pre_nms_topn = pre_nms_topn; + attrs.post_nms_topn = post_nms_topn; + attrs.nms_thresh = nms_thresh; + attrs.feat_stride = feat_stride; + attrs.min_size = min_size; + attrs.ratio = ratio; + attrs.scale = scale; + attrs.clip_before_nms = clip_before_nms; + attrs.clip_after_nms = clip_after_nms; + attrs.normalize = normalize; + attrs.box_size_scale = box_size_scale; + attrs.box_coordinate_scale = box_coordinate_scale; + attrs.framework = framework; + attrs.infer_probs = true; + + auto image_shape = std::make_shared(ov::element::f32, ov::Shape{3}, img_info); + auto proposal = std::make_shared(params[0], params[1], image_shape, attrs); + function = std::make_shared(proposal->outputs(), params, "proposal"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/psroi_pooling.cpp b/src/tests/functional/shared_test_classes/src/single_op/psroi_pooling.cpp new file mode 100644 index 00000000000000..197f875fea8726 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/psroi_pooling.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/psroi_pooling.hpp" + +namespace ov { +namespace test { +std::string PSROIPoolingLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector input_shape; + std::vector coords_shape; + size_t output_dim; + size_t group_size; + float spatial_scale; + size_t spatial_bins_x; + size_t spatial_bins_y; + std::string mode; + ov::element::Type model_type; + std::string target_device; + std::tie(input_shape, coords_shape, output_dim, group_size, spatial_scale, spatial_bins_x, spatial_bins_y, mode, model_type, target_device) = obj.param; + + std::ostringstream result; + + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "coord_shape=" << ov::test::utils::vec2str(coords_shape) << "_"; + result << "out_dim=" << output_dim << "_"; + result << "group_size=" << group_size << "_"; + result << "scale=" << spatial_scale << "_"; + result << "bins_x=" << spatial_bins_x << "_"; + result << "bins_y=" << spatial_bins_y << "_"; + result << "mode=" << mode << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void PSROIPoolingLayerTest::SetUp() { + std::vector input_shape; + std::vector coords_shape; + size_t output_dim; + size_t group_size; + float spatial_scale; + size_t spatial_bins_x; + size_t spatial_bins_y; + std::string mode; + ov::element::Type model_type; + std::tie(input_shape, coords_shape, output_dim, group_size, spatial_scale, + spatial_bins_x, spatial_bins_y, mode, model_type, targetDevice) = this->GetParam(); + + ov::ParameterVector params {std::make_shared(model_type, ov::Shape(input_shape)), + std::make_shared(model_type, ov::Shape(coords_shape))}; + auto psroi_pooling = std::make_shared(params[0], + params[1], + output_dim, + group_size, + spatial_scale, + spatial_bins_x, + spatial_bins_y, + mode); + function = std::make_shared(psroi_pooling->outputs(), params, "psroi_pooling"); +} +} // namespace test +} // namespace ov From b13cb8ce12ca2c6dc0bd39ad82720c02a6137483 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 31 Oct 2023 10:25:35 +0400 Subject: [PATCH 147/275] Make RPM packages compatible with wide range of RHEL systems (#20762) --- .github/workflows/android_arm64.yml | 2 + .github/workflows/fedora.yml | 1 - .github/workflows/linux.yml | 1 - .github/workflows/linux_riscv.yml | 5 +- .../developer_package/packaging/rpm/changelog | 6 +- .../developer_package/packaging/rpm/rpm.cmake | 3 +- cmake/features.cmake | 13 +- cmake/packaging/debian.cmake | 6 - cmake/packaging/rpm.cmake | 4 +- conan.lock | 3 - conanfile.txt | 6 +- .../installing-openvino-yum.md | 11 +- samples/cpp/CMakeLists.txt | 43 +--- samples/cpp/benchmark_app/CMakeLists.txt | 70 ++----- samples/cpp/speech_sample/CMakeLists.txt | 16 +- .../install_openvino_dependencies.sh | 107 +++++----- thirdparty/dependencies.cmake | 185 +++++------------- vcpkg.json | 2 - 18 files changed, 158 insertions(+), 326 deletions(-) diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index deacec70e344ea..8c1106fdb2457f 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -65,6 +65,8 @@ jobs: run: | pushd ${OPENVINO_REPO} git submodule update --init -- ${OPENVINO_REPO}/src/plugins + git submodule update --init -- ${OPENVINO_REPO}/thirdparty/zlib + git submodule update --init -- ${OPENVINO_REPO}/thirdparty/json git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gtest git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gflags git submodule update --init -- ${OPENVINO_REPO}/thirdparty/open_model_zoo diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index b7ae5765971206..522fc5fb53fa6e 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -111,7 +111,6 @@ jobs: -DENABLE_STRICT_DEPENDENCIES=OFF \ -DENABLE_SYSTEM_TBB=ON \ -DENABLE_SYSTEM_OPENCL=ON \ - -DENABLE_SYSTEM_PUGIXML=ON \ -DENABLE_PYTHON_PACKAGING=ON \ -DCPACK_GENERATOR=TGZ \ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 06825e2ece1903..6b4328f77dc3fe 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -153,7 +153,6 @@ jobs: -DENABLE_STRICT_DEPENDENCIES=OFF \ -DENABLE_SYSTEM_TBB=ON \ -DENABLE_SYSTEM_OPENCL=ON \ - -DENABLE_SYSTEM_PUGIXML=ON \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCPACK_GENERATOR=TGZ \ -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index 25528d96e151db..cdeccfa643678a 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -71,8 +71,11 @@ jobs: - name: Init submodules for non-Conan dependencies run: | pushd ${OPENVINO_REPO} - git submodule update --init -- ${OPENVINO_REPO}/src/plugins/intel_cpu + git submodule update --init -- ${OPENVINO_REPO}/thirdparty/zlib + git submodule update --init -- ${OPENVINO_REPO}/thirdparty/json git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gtest + git submodule update --init -- ${OPENVINO_REPO}/thirdparty/gflags + git submodule update --init -- ${OPENVINO_REPO}/src/plugins/intel_cpu git submodule update --init -- ${OPENVINO_REPO}/thirdparty/open_model_zoo popd diff --git a/cmake/developer_package/packaging/rpm/changelog b/cmake/developer_package/packaging/rpm/changelog index e5eb84a96fc1c3..028dc3fa071995 100644 --- a/cmake/developer_package/packaging/rpm/changelog +++ b/cmake/developer_package/packaging/rpm/changelog @@ -1,10 +1,10 @@ -* Sun Jul 4 12:00:00 2010 OpenVINO Developers - 2022.3.0-1 +* Sun Dec 25 2022 OpenVINO Developers - 2022.3.0-1 - Implemented Debian, RPM packages support - Support of Ubuntu 22.04 - Support of python 3.10 - Introduced OpenVINO Runtime 2.0 C API -* Thu Jul 28 12:00:00 2022 OpenVINO Developers - 2022.2.0-1 +* Thu Jul 28 2022 OpenVINO Developers - 2022.2.0-1 - Broader model and hardware support - Optimize & deploy with ease across an expanded range of deep learning models including NLP, and access AI acceleration across an expanded range of hardware. - NEW: Support for Intel 13th Gen Core Processor for desktop (code named Raptor Lake). - NEW: Preview support for Intel’s discrete graphics cards, Intel® Data Center GPU Flex Series and Intel® Arc™ GPU for DL inferencing workloads in intelligent cloud, edge and media analytics workloads. Hundreds of models enabled. @@ -18,6 +18,6 @@ - NEW: Introducing new performance hint (”Cumulative throughput”) in AUTO device, enabling multiple accelerators (e.g. multiple GPUs) to be used at once to maximize inferencing performance. - NEW: Introducing Intel® FPGA AI Suite support which enables real-time, low-latency, and low-power deep learning inference in this easy-to-use package   -* Thu Mar 22 12:00:00 2022 OpenVINO Developers - 2022.1.0-1 +* Thu Mar 22 2022 OpenVINO Developers - 2022.1.0-1 - Implemented OpenVINO 2.0 Runtime API for both C++ and Python - Introduced IR v11 which is aligned with frameworks model format diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake index 9660226e696582..a7c0ec2cf61087 100644 --- a/cmake/developer_package/packaging/rpm/rpm.cmake +++ b/cmake/developer_package/packaging/rpm/rpm.cmake @@ -120,8 +120,7 @@ macro(ov_rpm_specific_settings) # group set(CPACK_RPM_PACKAGE_GROUP "Development/Libraries") # changelog file - # TODO: fix "error: bad date in %changelog" - # set(CPACK_RPM_CHANGELOG_FILE "${OpenVINO_SOURCE_DIR}/cmake/developer_package/packaging/rpm/changelog") + set(CPACK_RPM_CHANGELOG_FILE "${OpenVINO_SOURCE_DIR}/cmake/developer_package/packaging/rpm/changelog") # use rpmlint to check packages in post-build step set(CPACK_POST_BUILD_SCRIPTS "${OpenVINODeveloperScripts_DIR}/packaging/rpm/post_build.cmake") # enable for debug cpack run diff --git a/cmake/features.cmake b/cmake/features.cmake index 2e3ef5d4aa82b1..209fb56b71b2bf 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -152,15 +152,6 @@ else() set(ENABLE_SYSTEM_LIBS_DEFAULT OFF) endif() -if(BUILD_SHARED_LIBS) - set(ENABLE_SYSTEM_PUGIXML_DEFAULT ${ENABLE_SYSTEM_LIBS_DEFAULT}) -else() - # for static libraries case libpugixml.a must be compiled with -fPIC - # but we still need an ability to compile with system PugiXML and BUILD_SHARED_LIBS - # for Conan case where everything is compiled statically - set(ENABLE_SYSTEM_PUGIXML_DEFAULT OFF) -endif() - if(ANDROID) # when protobuf from /usr/include is used, then Android toolchain ignores include paths # but if we build for Android using vcpkg / conan / etc where flatbuffers is not located in @@ -179,9 +170,7 @@ endif() ov_dependent_option (ENABLE_SYSTEM_TBB "Enables use of system TBB" ${ENABLE_SYSTEM_TBB_DEFAULT} "THREADING MATCHES TBB" OFF) -# TODO: turn it off by default during the work on cross-os distribution, because pugixml is not -# available out of box on all systems (like RHEL, UBI) -ov_option (ENABLE_SYSTEM_PUGIXML "Enables use of system PugiXML" ${ENABLE_SYSTEM_PUGIXML_DEFAULT}) +ov_option (ENABLE_SYSTEM_PUGIXML "Enables use of system PugiXML" OFF) # the option is on by default, because we use only flatc compiler and don't use any libraries ov_dependent_option(ENABLE_SYSTEM_FLATBUFFERS "Enables use of system flatbuffers" ${ENABLE_SYSTEM_FLATBUFFERS_DEFAULT} "ENABLE_OV_TF_LITE_FRONTEND" OFF) diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index fda353d6ab512e..5b5091fd0b4fd4 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -347,18 +347,12 @@ macro(ov_cpack_settings) set(samples_build_deps "cmake, g++, gcc, libc6-dev, make, pkg-config") set(samples_build_deps_suggest "libopencv-core-dev, libopencv-imgproc-dev, libopencv-imgcodecs-dev") set(samples_opencl_suggest "ocl-icd-opencl-dev, opencl-headers") - # Ubuntu 18.04, Debian 9 cases have nlohmann-json-dev - # newer systems have nlohmann-json3-dev - # according to https://www.debian.org/doc/debian-policy/ch-relationships.html#syntax-of-relationship-fields - # we can use | (pipe) to provide alternative package names - set(json_library "nlohmann-json3-dev | nlohmann-json-dev") # c_samples / cpp_samples set(CPACK_COMPONENT_SAMPLES_DESCRIPTION "Intel(R) Distribution of OpenVINO(TM) Toolkit C / C++ Samples") set(CPACK_COMPONENT_SAMPLES_DEPENDS "${OV_CPACK_COMP_CORE_DEV}") set(CPACK_DEBIAN_SAMPLES_PACKAGE_NAME "openvino-samples-${cpack_name_ver}") set(CPACK_DEBIAN_SAMPLES_PACKAGE_SUGGESTS "${samples_build_deps_suggest}, ${samples_opencl_suggest}, ${all_plugins_suggest}") - set(CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS "libgflags-dev, zlib1g-dev, ${json_library}") # can be skipped with --no-install-recommends set(CPACK_DEBIAN_SAMPLES_PACKAGE_RECOMMENDS "${samples_build_deps}") set(CPACK_DEBIAN_SAMPLES_PACKAGE_ARCHITECTURE "all") diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index 9c8103e12bd8ad..f94d3db0f36f76 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -307,7 +307,7 @@ macro(ov_cpack_settings) # SUGGESTS may be unsupported, it's part of RPM 4.12.0 (Sep 16th 2014) only # see https://rpm.org/timeline.html set(CPACK_RPM_SAMPLES_PACKAGE_SUGGESTS "${samples_build_deps_suggest}, ${samples_opencl_deps_suggest}, ${plugin_packages}") - set(CPACK_RPM_SAMPLES_PACKAGE_REQUIRES "${core_dev_package}, ${samples_build_deps}, gflags-devel, json-devel, zlib-devel") + set(CPACK_RPM_SAMPLES_PACKAGE_REQUIRES "${core_dev_package}, ${samples_build_deps}") set(CPACK_RPM_SAMPLES_PACKAGE_ARCHITECTURE "noarch") ov_rpm_generate_conflicts(${OV_CPACK_COMP_CPP_SAMPLES} ${conflicting_versions}) @@ -315,8 +315,6 @@ macro(ov_cpack_settings) # contains samples source codes "devel-file-in-non-devel-package /usr/${OV_CPACK_SAMPLESDIR}/cpp/*" "devel-file-in-non-devel-package /usr/${OV_CPACK_SAMPLESDIR}/c/*" - # depends on gflags-devel - "devel-dependency gflags-devel" # duplicated files are OK "files-duplicate /usr/${OV_CPACK_SAMPLESDIR}/cpp/CMakeLists.txt /usr/${OV_CPACK_SAMPLESDIR}/c/CMakeLists.txt" "files-duplicate /usr/${OV_CPACK_SAMPLESDIR}/cpp/build_samples.sh /usr/${OV_CPACK_SAMPLESDIR}/c/build_samples.sh" diff --git a/conan.lock b/conan.lock index 81f455dd8efbd7..22567fb002e7e5 100644 --- a/conan.lock +++ b/conan.lock @@ -13,17 +13,14 @@ "opencl-clhpp-headers/2023.04.17#7c62fcc7ac2559d4839150d2ebaac5c8%1685450803.672", "onnx/1.14.1#d95f4e64bedf3dc6898253847ac69005%1693130309.828", "onetbb/2021.10.0#cbb2fc43088070b48f6e4339bc8fa0e1%1693812561.235", - "nlohmann_json/3.11.2#a35423bb6e1eb8f931423557e282c7ed%1666619820.488", "ittapi/3.24.0#9246125f13e7686dee2b0c992b71db94%1682969872.743", "hwloc/2.9.2#1c63e2eccac57048ae226e6c946ebf0e%1688677682.002", - "gflags/2.2.2#48d1262ffac8d30c3224befb8275a533%1676224985.343", "flatbuffers/23.5.26#b153646f6546daab4c7326970b6cd89c%1685838458.449", "ade/0.1.2c#8c03c130df6dc35186b38ba73a40a71d%1694253992.577" ], "build_requires": [ "zlib/1.2.13#97d5730b529b4224045fe7090592d4c1%1692672717.049", "protobuf/3.21.12#d9f5f4e3b86552552dda4c0a2e928eeb%1685218275.69", - "protobuf/3.21.9#515ceb0a1653cf84363d9968b812d6be%1678364058.993", "pkgconf/1.9.5#743ca0d41d35a84b1f89af337ddaa1a0%1688570267.802", "patchelf/0.13#0eaada8970834919c3ce14355afe7fac%1680534241.341", "ninja/1.11.1#77587f8c8318662ac8e5a7867eb4be21%1684431244.21", diff --git a/conanfile.txt b/conanfile.txt index 2fc71b7f384f0c..bdde5ea9dc1498 100644 --- a/conanfile.txt +++ b/conanfile.txt @@ -4,14 +4,11 @@ onetbb/[>=2021.2.1] pugixml/[>=1.10] protobuf/3.21.12 ittapi/[>=3.23.0] -zlib/[>=1.2.8] opencl-icd-loader/[>=2023.04.17] rapidjson/[>=1.1.0] xbyak/[>=6.62] snappy/[>=1.1.7] -gflags/2.2.2 onnx/1.14.1 -nlohmann_json/[>=3.1.1] pybind11/[>=2.10.1] flatbuffers/[>=22.9.24] @@ -19,11 +16,12 @@ flatbuffers/[>=22.9.24] cmake/[>=3.20] pkgconf/1.9.5 patchelf/[>=0.12] -protobuf/3.21.9 +protobuf/3.21.12 flatbuffers/[>=22.9.24] [options] protobuf/*:lite=True +protobuf/*:with_zlib=False protobuf/*:shared=False flatbuffers/*:header_only=True diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md index c9646b212828fe..88730d374995b1 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md @@ -25,7 +25,16 @@ .. note:: - Installing OpenVINO from YUM is only supported on RHEL 8.2 and higher versions. CentOS 7 is not supported for this installation method. + OpenVINO RPM packages are compatible with and can be run on the following operating systems: + - RHEL 8.2 and higher + - Amazon Linux 2022 and 2023 + - Rocky Linux 8.7, 8.8 and 9.2 + - Alma Linux 8.7, 8.8 and 9.2 + - Oracle Linix 8.7, 8.8 and 9.2 + - Fedora 29 and higher up to 40 + - OpenEuler 20.03 and 22.03 + - Anolis OS 8.6 and 8.8 + - CentOS Stream 8 and 9 .. tab-item:: Processor Notes :sync: processor-notes diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt index de884bb05d279c..c814cc37e2c7f6 100644 --- a/samples/cpp/CMakeLists.txt +++ b/samples/cpp/CMakeLists.txt @@ -108,42 +108,15 @@ endif() if(TARGET gflags) set(GFLAGS_TARGET gflags) elseif(gflags_required) - if(EXISTS /etc/debian_version) - set(gflags_component nothreads_static) + set(GFLAGS_TARGET gflags_nothreads_static) + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags") + add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL) + elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/gflags") + # Allow running samples CMakeLists.txt as stand alone from openvino sources + add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/gflags" + "${CMAKE_CURRENT_BINARY_DIR}/thirdparty/gflags" EXCLUDE_FROM_ALL) else() - set(gflags_component shared) - endif() - find_package(gflags QUIET OPTIONAL_COMPONENTS ${gflags_component}) - if(gflags_FOUND) - if(TARGET ${GFLAGS_TARGET}) - # nothing - elseif(TARGET gflags_nothreads-static) - # Debian 9: gflag_component is ignored - set(GFLAGS_TARGET gflags_nothreads-static) - elseif(TARGET gflags-shared) - # gflags shared case for CentOS / RHEL / Fedora - set(GFLAGS_TARGET gflags-shared) - elseif(TARGET gflags::gflags) - # gflags case for Conan - set(GFLAGS_TARGET gflags::gflags) - else() - message(FATAL_ERROR "Internal error: failed to find imported target 'gflags' using '${gflags_component}' component") - endif() - - message(STATUS "gflags (${gflags_VERSION}) is found at ${gflags_DIR} using '${gflags_component}' component") - endif() - - if(NOT gflags_FOUND) - set(GFLAGS_TARGET gflags_nothreads_static) - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags") - add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL) - elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/gflags") - # Allow running samples CMakeLists.txt as stand alone from openvino sources - add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/gflags" - "${CMAKE_CURRENT_BINARY_DIR}/thirdparty/gflags" EXCLUDE_FROM_ALL) - else() - message(FATAL_ERROR "Failed to find 'gflags' library using '${gflags_component}' component") - endif() + message(FATAL_ERROR "Failed to find 'gflags' library using '${gflags_component}' component") endif() endif() diff --git a/samples/cpp/benchmark_app/CMakeLists.txt b/samples/cpp/benchmark_app/CMakeLists.txt index c786fde4c2fbe8..da6e5676f11a31 100644 --- a/samples/cpp/benchmark_app/CMakeLists.txt +++ b/samples/cpp/benchmark_app/CMakeLists.txt @@ -15,62 +15,20 @@ ov_add_sample(NAME ${TARGET_NAME} # Required nlohmann_json dependency if(NOT TARGET nlohmann_json::nlohmann_json) - find_package(PkgConfig QUIET) - find_package(nlohmann_json QUIET - # exception for Ubuntu 18.04, where cmake files for nlohmann_json - # are located in a wrong directory - HINTS /usr/lib/cmake) - if(TARGET nlohmann_json) - # Ubuntu 18.04 case where target 'nlohmann_json' is here, but nlohmann_json_FOUND is OFF - if(NOT TARGET nlohmann_json::nlohmann_json) - set_target_properties(nlohmann_json PROPERTIES IMPORTED_GLOBAL ON) - add_library(nlohmann_json::nlohmann_json ALIAS nlohmann_json) - endif() - set(nlohmann_json_FOUND ON) - endif() - if(nlohmann_json_FOUND) - message(STATUS "nlohmann_json (${nlohmann_json_VERSION}) is found at ${nlohmann_json_DIR}") - elseif(PkgConfig_FOUND) - pkg_search_module(nlohmann_json QUIET - IMPORTED_TARGET GLOBAL - nlohmann_json) - if(nlohmann_json_FOUND) - add_library(nlohmann_json::nlohmann_json ALIAS PkgConfig::nlohmann_json) - endif() - endif() - - if(NOT nlohmann_json_FOUND) - # try to find header file json.hpp - # for example, on debian 9 there is no cmake / pkgconfig files - find_file(nlohmann_include_file - NAMES "json.hpp" - "Path to json.hpp (nlohmann-json-dev)") - if(nlohmann_include_file) - add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED) - get_filename_component(nlohmann_include_dir "${nlohmann_include_file}" PATH) - set_target_properties(nlohmann_json::nlohmann_json PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${nlohmann_include_dir}" - INTERFACE_COMPILE_DEFINITIONS JSON_HEADER) - set(nlohmann_json_FOUND ON) - endif() - endif() - - if(NOT nlohmann_json_FOUND) - if(EXISTS "${Samples_SOURCE_DIR}/thirdparty/nlohmann_json") - # OpenVINO package puts thirdparty to samples dir - # suppress shadowing names warning - set(JSON_SystemInclude ON CACHE BOOL "" FORCE) - add_subdirectory("${Samples_SOURCE_DIR}/thirdparty/nlohmann_json" - "${Samples_BINARY_DIR}/thirdparty/nlohmann_json" EXCLUDE_FROM_ALL) - elseif(EXISTS "${Samples_SOURCE_DIR}/../../thirdparty/json/nlohmann_json") - # Allow running samples CMakeLists.txt as stand alone from openvino sources - # suppress shadowing names warning - set(JSON_SystemInclude ON CACHE BOOL "" FORCE) - add_subdirectory("${Samples_SOURCE_DIR}/../../thirdparty/json/nlohmann_json" - "${Samples_BINARY_DIR}/thirdparty/nlohmann_json" EXCLUDE_FROM_ALL) - else() - message(FATAL_ERROR "Failed to find / build nlohmann_json library") - endif() + if(EXISTS "${Samples_SOURCE_DIR}/thirdparty/nlohmann_json") + # OpenVINO package puts thirdparty to samples dir + # suppress shadowing names warning + set(JSON_SystemInclude ON CACHE BOOL "" FORCE) + add_subdirectory("${Samples_SOURCE_DIR}/thirdparty/nlohmann_json" + "${Samples_BINARY_DIR}/thirdparty/nlohmann_json" EXCLUDE_FROM_ALL) + elseif(EXISTS "${Samples_SOURCE_DIR}/../../thirdparty/json/nlohmann_json") + # Allow running samples CMakeLists.txt as stand alone from openvino sources + # suppress shadowing names warning + set(JSON_SystemInclude ON CACHE BOOL "" FORCE) + add_subdirectory("${Samples_SOURCE_DIR}/../../thirdparty/json/nlohmann_json" + "${Samples_BINARY_DIR}/thirdparty/nlohmann_json" EXCLUDE_FROM_ALL) + else() + message(FATAL_ERROR "Failed to find / build nlohmann_json library") endif() endif() diff --git a/samples/cpp/speech_sample/CMakeLists.txt b/samples/cpp/speech_sample/CMakeLists.txt index a9e8c0e9d256fa..bda2a162d7fc8e 100644 --- a/samples/cpp/speech_sample/CMakeLists.txt +++ b/samples/cpp/speech_sample/CMakeLists.txt @@ -8,22 +8,8 @@ file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp) # Required zlib and cnpy dependencies -find_package(ZLIB QUIET) - if(NOT TARGET ZLIB::ZLIB) - find_package(PkgConfig QUIET) - if(PkgConfig_FOUND) - pkg_search_module(zlib QUIET - IMPORTED_TARGET GLOBAL - zlib) - if(zlib_FOUND) - add_library(ZLIB::ZLIB ALIAS PkgConfig::zlib) - endif() - endif() - - if(zlib_FOUND) - message(STATUS "${PKG_CONFIG_EXECUTABLE}: zlib (${zlib_VERSION}) is found at ${zlib_PREFIX}") - elseif(EXISTS "${Samples_SOURCE_DIR}/thirdparty/zlib") + if(EXISTS "${Samples_SOURCE_DIR}/thirdparty/zlib") # OpenVINO package puts thirdparty to samples dir add_subdirectory("${Samples_SOURCE_DIR}/thirdparty/zlib" "${Samples_BINARY_DIR}/thirdparty/zlib" EXCLUDE_FROM_ALL) diff --git a/scripts/install_dependencies/install_openvino_dependencies.sh b/scripts/install_dependencies/install_openvino_dependencies.sh index 79c987535e2849..4c964182f160d4 100755 --- a/scripts/install_dependencies/install_openvino_dependencies.sh +++ b/scripts/install_dependencies/install_openvino_dependencies.sh @@ -94,10 +94,16 @@ if [ "$os" == "auto" ] ; then os="rhel8" fi case $os in - centos7|centos8|rhel8|rhel9.1|\ - almalinux8.7|amzn2|\ - opensuse-leap15.3| \ - fedora34|fedora35|fedora36|fedora37|fedora38|\ + centos7|centos8|centos9|\ + rhel8|rhel9.1|\ + anolis8.6|anolis8.8|\ + openEuler20.03|openEuler22.03|openEuler23.03|\ + almalinux8.7|almalinux8.8|almalinux9.2|\ + amzn2|amzn2022|amzn2023|\ + ol8.7|ol8.8|ol9.2|\ + rocky8.7|rocky8.8|rocky9.2|\ + fedora29|fedora30|fedora31|fedora32|fedora33|fedora34|fedora35|fedora36|fedora37|fedora38|fedora39|fedora40|\ + opensuse-leap15.3|\ raspbian9|debian9|ubuntu18.04|\ raspbian10|debian10|ubuntu20.04|ubuntu20.10|ubuntu21.04|\ raspbian11|debian11|ubuntu21.10|ubuntu22.04|\ @@ -118,26 +124,26 @@ if [ "$os" == "raspbian9" ] || [ "$os" == "debian9" ] ; then # - cmake version is 3.7.2 # which are not supported by OpenVINO - pkgs_core=(libpugixml1v5) + pkgs_core=() pkgs_gpu=(ocl-icd-libopencl1) pkgs_python=() - pkgs_dev=(pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo) + pkgs_dev=(pkg-config g++ gcc libc6-dev make sudo) elif [ "$os" == "ubuntu18.04" ] ; then - pkgs_core=(libtbb2 libpugixml1v5) + pkgs_core=(libtbb2) pkgs_gpu=(ocl-icd-libopencl1) pkgs_python=(python3.8 libpython3.8 python3.8-venv python3-pip) - pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo) + pkgs_dev=(cmake pkg-config g++ gcc libc6-dev make sudo) elif [ "$os" == "ubuntu20.04" ] || [ "$os" == "debian10" ] || [ "$os" == "raspbian10" ] || [ "$os" == "ubuntu21.10" ] || [ "$os" == "ubuntu22.04" ] || [ "$os" == "debian11" ] || [ "$os" == "raspbian11" ] || [ "$os" == "ubuntu22.10" ] || [ "$os" == "ubuntu23.04" ] || [ "$os" == "debian12" ] || [ "$os" == "raspbian12" ]; then - pkgs_core=(libpugixml1v5) + pkgs_core=() pkgs_gpu=(ocl-icd-libopencl1) pkgs_python=(python3 python3-venv python3-pip) - pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json3-dev make curl sudo) + pkgs_dev=(cmake pkg-config g++ gcc libc6-dev make sudo) if [ "$os" == "ubuntu22.04" ] || [ "$os" == "ubuntu22.10" ] || [ "$os" == "ubuntu23.04" ] || [ "$os" == "debian12" ] || [ "$os" == "raspbian12" ] ; then @@ -160,11 +166,17 @@ elif [ "$os" == "ubuntu20.04" ] || [ "$os" == "debian10" ] || [ "$os" == "raspbi pkgs_python+=(libpython3.11) fi -elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] || +elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] || [ "$os" == "centos9" ] || [ "$os" == "rhel8" ] || [ "$os" == "rhel9.1" ] || - [ "$os" == "fedora34" ] || [ "$os" == "fedora35" ] || [ "$os" == "fedora36" ] || - [ "$os" == "fedora36" ] || [ "$os" == "fedora38" ] || - [ "$os" == "almalinux8.7" ] || [ "$os" == "amzn2" ] ; then + [ "$os" == "anolis8.6" ] || [ "$os" == "anolis8.8" ] || + [ "$os" == "openEuler20.03" ] || [ "$os" == "openEuler22.03" ] || [ "$os" == "openEuler23.03" ] || + [ "$os" == "fedora29" ] || [ "$os" == "fedora30" ] || [ "$os" == "fedora31" ] || [ "$os" == "fedora32" ] || + [ "$os" == "fedora33" ] || [ "$os" == "fedora34" ] || [ "$os" == "fedora35" ] || [ "$os" == "fedora36" ] || + [ "$os" == "fedora37" ] || [ "$os" == "fedora38" ] || [ "$os" == "fedora39" ] || [ "$os" == "fedora40" ] || + [ "$os" == "ol8.7" ] || [ "$os" == "ol8.8" ] || [ "$os" == "ol9.2" ] || + [ "$os" == "rocky8.7" ] || [ "$os" == "rocky8.8" ] || [ "$os" == "rocky9.2" ] || + [ "$os" == "almalinux8.7" ] || [ "$os" == "almalinux8.8" ] || [ "$os" == "almalinux9.2" ] || + [ "$os" == "amzn2" ] || [ "$os" == "amzn2022" ] || [ "$os" == "amzn2023" ] ; then arch=$(uname -m) @@ -172,8 +184,10 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] || amazon-linux-extras install epel python3.8 fi - pkgs_dev=(gcc gcc-c++ make glibc libstdc++ libgcc cmake3 "json-devel.$arch" "zlib-devel.$arch" sudo) + pkgs_core=() pkgs_gpu=() + pkgs_python=() + pkgs_dev=(gcc gcc-c++ make glibc libstdc++ libgcc cmake3 sudo) if [ "$os" == "centos7" ] || [ "$os" == "amzn2" ] ; then pkgs_dev+=(pkgconfig) @@ -181,53 +195,39 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] || pkgs_dev+=(pkgconf-pkg-config) fi - if [ "$os" == "rhel9.1" ] ; then - pkgs_dev+=(curl-minimal) - else - pkgs_dev+=(curl) - fi - - if [ "$os" == "fedora35" ] || [ "$os" == "fedora35" ] || [ "$os" == "fedora36" ] || - [ "$os" == "fedora36" ] || [ "$os" == "fedora38" ] ; then - pkgs_core=("tbb.$arch" "pugixml.$arch" "gflags.$arch") - pkgs_python=(python3 python3-pip) - pkgs_dev+=("gflags-devel.$arch") + if [ "$os" == "fedora29" ] || [ "$os" == "fedora30" ] || [ "$os" == "fedora31" ] || [ "$os" == "fedora32" ] || + [ "$os" == "fedora33" ] || [ "$os" == "fedora34" ] || [ "$os" == "fedora35" ] || [ "$os" == "fedora36" ] || + [ "$os" == "fedora37" ] || [ "$os" == "fedora38" ] || [ "$os" == "fedora39" ] || [ "$os" == "fedora40" ] || + [ "$os" == "ol8.7" ] || [ "$os" == "ol8.8" ] || [ "$os" == "ol9.2" ] || + [ "$os" == "rocky8.7" ] || [ "$os" == "rocky8.8" ] || [ "$os" == "rocky9.2" ] || + [ "$os" == "almalinux8.7" ] || [ "$os" == "almalinux8.8" ] || [ "$os" == "almalinux9.2" ] || + [ "$os" == "centos8" ] || [ "$os" == "centos9" ] || + [ "$os" == "amzn2022" ] || [ "$os" == "amzn2023" ] || + [ "$os" == "anolis8.6" ] || [ "$os" == "anolis8.8" ] || + [ "$os" == "openEuler20.03" ] || [ "$os" == "openEuler22.03" ] || [ "$os" == "openEuler23.03" ] ; then + pkgs_core+=("tbb.$arch") + pkgs_python+=(python3 python3-pip) fi if [ "$os" == "centos7" ] || [ "$os" == "amzn2" ] ; then - pkgs_core=("tbb.$arch" "pugixml.$arch" "gflags.$arch") pkgs_gpu+=("ocl-icd.$arch") - pkgs_dev+=("gflags-devel.$arch") extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm") - elif [ "$os" == "centos8" ] || [ "$os" == "rhel8" ] || [ "$os" == "almalinux8.7" ] ; then - pkgs_core+=( - "https://vault.centos.org/centos/8/AppStream/$arch/os/Packages/tbb-2018.2-9.el8.$arch.rpm" - "https://dl.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm" - "https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-2.1.2-6.el8.$arch.rpm" - ) + elif [ "$os" == "rhel8" ] ; then + pkgs_core+=("https://vault.centos.org/centos/8/AppStream/$arch/os/Packages/tbb-2018.2-9.el8.$arch.rpm") pkgs_gpu+=("http://mirror.centos.org/centos/8-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.12-1.el8.$arch.rpm") pkgs_python+=(python38 python38-pip) - pkgs_dev+=( - "https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-devel-2.1.2-6.el8.$arch.rpm" - "https://dl.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/j/json-devel-3.6.1-2.el8.$arch.rpm" - ) extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm") elif [ "$os" == "rhel9.1" ] ; then - pkgs_core=( - "http://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/tbb-2020.3-8.el9.$arch.rpm" - "https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/p/pugixml-1.13-1.el9.$arch.rpm" - "https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-2.2.2-9.el9.$arch.rpm" - ) + pkgs_core+=("http://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/tbb-2020.3-8.el9.$arch.rpm") pkgs_gpu+=("https://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.13-4.el9.$arch.rpm") - pkgs_python=(python3 python3-pip) - pkgs_dev+=("https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm") + pkgs_python+=(python3 python3-pip) extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm") fi elif [ "$os" == "opensuse-leap15.3" ] ; then - pkgs_core=(libtbb2 libtbbmalloc2 libpugixml1) + pkgs_core=(libtbb2 libtbbmalloc2) pkgs_gpu=(libOpenCL1) pkgs_python=(python39-base python39 python39-venv python39-pip) - pkgs_dev=(cmake pkg-config gcc-c++ gcc gflags-devel-static zlib-devel nlohmann_json-devel make curl sudo) + pkgs_dev=(cmake pkg-config gcc-c++ gcc make sudo) else echo "Internal script error: invalid OS (${os}) after check (package selection)" >&2 exit 3 @@ -279,11 +279,18 @@ if [ "$os" == "debian9" ] || [ "$os" == "raspbian9" ] || [ "$os" == "ubuntu18.04 apt-get update && apt-get install --no-install-recommends "$iopt" "${pkgs[@]}" -elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] || +elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] || [ "$os" == "centos9" ] || [ "$os" == "rhel8" ] || [ "$os" == "rhel9.1" ] || - [ "$os" == "fedora34" ] || [ "$os" == "fedora35" ] || [ "$os" == "fedora36" ] || + [ "$os" == "anolis8.6" ] || [ "$os" == "anolis8.8" ] || + [ "$os" == "openEuler20.03" ] || [ "$os" == "openEuler22.03" ] || [ "$os" == "openEuler23.03" ] || + [ "$os" == "fedora29" ] || [ "$os" == "fedora30" ] || [ "$os" == "fedora31" ] || [ "$os" == "fedora32" ] || + [ "$os" == "fedora33" ] || [ "$os" == "fedora34" ] || [ "$os" == "fedora35" ] || [ "$os" == "fedora36" ] || + [ "$os" == "fedora37" ] || [ "$os" == "fedora38" ] || [ "$os" == "fedora39" ] || [ "$os" == "fedora40" ] || [ "$os" == "fedora36" ] || [ "$os" == "fedora38" ] || - [ "$os" == "almalinux8.7" ] || [ "$os" == "amzn2" ] ; then + [ "$os" == "ol8.7" ] || [ "$os" == "ol8.8" ] || [ "$os" == "ol9.2" ] || + [ "$os" == "rocky8.7" ] || [ "$os" == "rocky8.8" ] || [ "$os" == "rocky9.2" ] || + [ "$os" == "almalinux8.7" ] || [ "$os" == "almalinux8.8" ] || [ "$os" == "almalinux9.2" ] || + [ "$os" == "amzn2" ] || [ "$os" == "amzn2022" ] || [ "$os" == "amzn2023" ] ; then [ -z "$interactive" ] && iopt="--assumeyes" [ -n "$dry" ] && iopt="--downloadonly" diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake index 4eed13c9a79af6..6b89325fed4a2f 100644 --- a/thirdparty/dependencies.cmake +++ b/thirdparty/dependencies.cmake @@ -124,26 +124,7 @@ endif() # if(ENABLE_SAMPLES OR ENABLE_TESTS) - find_package(ZLIB QUIET) - if(ZLIB_FOUND) - # FindZLIB module defines ZLIB::ZLIB, no extra steps are required - endif() - - # cmake has failed to find zlib, let's try pkg-config - if(NOT ZLIB_FOUND AND PkgConfig_FOUND) - pkg_search_module(zlib QUIET - IMPORTED_TARGET - zlib) - if(zlib_FOUND) - add_library(ZLIB::ZLIB INTERFACE IMPORTED) - set_target_properties(ZLIB::ZLIB PROPERTIES INTERFACE_LINK_LIBRARIES PkgConfig::zlib) - message(STATUS "${PKG_CONFIG_EXECUTABLE}: zlib (${zlib_VERSION}) is found at ${zlib_PREFIX}") - endif() - endif() - - if(NOT (zlib_FOUND OR ZLIB_FOUND)) - add_subdirectory(thirdparty/zlib EXCLUDE_FROM_ALL) - endif() + add_subdirectory(thirdparty/zlib EXCLUDE_FROM_ALL) endif() # @@ -324,51 +305,8 @@ endif() # if(ENABLE_SAMPLES OR ENABLE_TESTS) - if(OV_VCPKG_BUILD OR OV_CONAN_BUILD) - # vcpkg contains only libs compiled with threads - # conan case - find_package(gflags QUIET) - elseif(APPLE OR WIN32) - # on Windows and macOS we don't use gflags, because will be dynamically linked - elseif(CMAKE_HOST_LINUX AND LINUX) - if(OV_OS_RHEL) - set(gflag_component nothreads_shared) - elseif(OV_OS_DEBIAN) - set(gflag_component nothreads_static) - endif() - find_package(gflags QUIET OPTIONAL_COMPONENTS ${gflag_component}) - endif() - - if(gflags_FOUND) - if(TARGET gflags) - # no extra steps - elseif(TARGET gflags_nothreads-static) - # Debian 9: gflag_component is ignored - set(gflags_target gflags_nothreads-static) - elseif(TARGET gflags_nothreads-shared) - # CentOS / RHEL / Fedora case - set(gflags_target gflags_nothreads-shared) - elseif(TARGET ${GFLAGS_TARGET}) - set(gflags_target ${GFLAGS_TARGET}) - else() - message(FATAL_ERROR "Internal error: failed to find imported target 'gflags' using '${gflag_component}' component") - endif() - - if(gflags_target) - if(OV_PkgConfig_VISILITY) - # need to set GLOBAL visibility in order to create ALIAS for this target - set_target_properties(${gflags_target} PROPERTIES IMPORTED_GLOBAL ON) - endif() - add_library(gflags ALIAS ${gflags_target}) - endif() - - message(STATUS "gflags (${gflags_VERSION}) is found at ${gflags_DIR} using '${gflag_component}' component") - endif() - - if(NOT TARGET gflags) - add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL) - ov_developer_package_export_targets(TARGET gflags) - endif() + add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL) + ov_developer_package_export_targets(TARGET gflags) endif() # @@ -607,73 +545,58 @@ endif() # Install # -if(CPACK_GENERATOR MATCHES "^(DEB|RPM|CONDA-FORGE|BREW|CONAN|VCPKG)$") - # These libraries are dependencies for openvino-samples package - if(ENABLE_SAMPLES OR ENABLE_TESTS) - if(NOT gflags_FOUND AND CPACK_GENERATOR MATCHES "^(DEB|RPM)$") - message(FATAL_ERROR "gflags must be used as a ${CPACK_GENERATOR} package. Install libgflags-dev / gflags-devel") - endif() - if(NOT (zlib_FOUND OR ZLIB_FOUND)) - message(FATAL_ERROR "zlib must be used as a ${CPACK_GENERATOR} package. Install zlib1g-dev / zlib-devel") - endif() - endif() +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags + DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty + COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} + ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL} + PATTERN bazel EXCLUDE + PATTERN doc EXCLUDE + PATTERN .git EXCLUDE + PATTERN appveyor.yml EXCLUDE + PATTERN AUTHORS.txt EXCLUDE + PATTERN BUILD EXCLUDE + PATTERN ChangeLog.txt EXCLUDE + PATTERN .gitattributes EXCLUDE + PATTERN .gitignore EXCLUDE + PATTERN .gitmodules EXCLUDE + PATTERN test EXCLUDE + PATTERN INSTALL.md EXCLUDE + PATTERN README.md EXCLUDE + PATTERN .travis.yml EXCLUDE + PATTERN src/gflags_completions.sh EXCLUDE + PATTERN WORKSPACE EXCLUDE) + +file(GLOB zlib_sources ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib/zlib/*.c + ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib/zlib/*.h) +install(FILES ${zlib_sources} + DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty/zlib/zlib + COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} + ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL}) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib/CMakeLists.txt + DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty/zlib + COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} + ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL}) - if(NOT ENABLE_SYSTEM_PUGIXML) - message(FATAL_ERROR "Pugixml must be used as a ${CPACK_GENERATOR} package. Install libpugixml-dev / pugixml-devel") - endif() -elseif(APPLE OR WIN32) - install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags - DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty - COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} - ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL} - PATTERN bazel EXCLUDE - PATTERN doc EXCLUDE - PATTERN .git EXCLUDE - PATTERN appveyor.yml EXCLUDE - PATTERN AUTHORS.txt EXCLUDE - PATTERN BUILD EXCLUDE - PATTERN ChangeLog.txt EXCLUDE - PATTERN .gitattributes EXCLUDE - PATTERN .gitignore EXCLUDE - PATTERN .gitmodules EXCLUDE - PATTERN test EXCLUDE - PATTERN INSTALL.md EXCLUDE - PATTERN README.md EXCLUDE - PATTERN .travis.yml EXCLUDE - PATTERN WORKSPACE EXCLUDE) - - file(GLOB zlib_sources ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib/zlib/*.c - ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib/zlib/*.h) - install(FILES ${zlib_sources} - DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty/zlib/zlib - COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} - ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL}) - install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib/CMakeLists.txt - DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty/zlib - COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} - ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL}) - - install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/json/nlohmann_json - DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty - COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} - ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL} - PATTERN ChangeLog.md EXCLUDE - PATTERN CITATION.cff EXCLUDE - PATTERN .clang-format EXCLUDE - PATTERN .clang-tidy EXCLUDE - PATTERN docs EXCLUDE - PATTERN .git EXCLUDE - PATTERN .github EXCLUDE - PATTERN .gitignore EXCLUDE - PATTERN .lgtm.yml EXCLUDE - PATTERN Makefile EXCLUDE - PATTERN meson.build EXCLUDE - PATTERN README.md EXCLUDE - PATTERN .reuse EXCLUDE - PATTERN tests EXCLUDE - PATTERN tools EXCLUDE - PATTERN wsjcpp.yml EXCLUDE) -endif() +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/json/nlohmann_json + DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty + COMPONENT ${OV_CPACK_COMP_CPP_SAMPLES} + ${OV_CPACK_COMP_CPP_SAMPLES_EXCLUDE_ALL} + PATTERN ChangeLog.md EXCLUDE + PATTERN CITATION.cff EXCLUDE + PATTERN .clang-format EXCLUDE + PATTERN .clang-tidy EXCLUDE + PATTERN docs EXCLUDE + PATTERN .git EXCLUDE + PATTERN .github EXCLUDE + PATTERN .gitignore EXCLUDE + PATTERN .lgtm.yml EXCLUDE + PATTERN Makefile EXCLUDE + PATTERN meson.build EXCLUDE + PATTERN README.md EXCLUDE + PATTERN .reuse EXCLUDE + PATTERN tests EXCLUDE + PATTERN tools EXCLUDE + PATTERN wsjcpp.yml EXCLUDE) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnpy DESTINATION ${OV_CPACK_SAMPLESDIR}/cpp/thirdparty diff --git a/vcpkg.json b/vcpkg.json index fc30546a81f6d8..2fa0218e839c31 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -17,8 +17,6 @@ "builtin-baseline": "db0473513e5dc73ec6b6f431ff05d2f398eea042", "dependencies": [ "ade", - "gflags", - "nlohmann-json", { "name": "pkgconf", "host": true From 138447184971b5dfed731e5ccd4b0e5e67637167 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Tue, 31 Oct 2023 08:19:11 +0100 Subject: [PATCH 148/275] [ONNX][Code refactor] ONNX GroupNormalization - Optimize getting inputs * Optimize getting inputs and use more const * Fix typo --- .../frontend/src/op/group_normalization.cpp | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/frontends/onnx/frontend/src/op/group_normalization.cpp b/src/frontends/onnx/frontend/src/op/group_normalization.cpp index d0f32a89ae4515..b0dc8f786d8aef 100644 --- a/src/frontends/onnx/frontend/src/op/group_normalization.cpp +++ b/src/frontends/onnx/frontend/src/op/group_normalization.cpp @@ -12,30 +12,34 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector group_normalization(const Node& node) { - const auto data = node.get_ng_inputs().at(0); // Shape [N, C, ...] - auto scale = node.get_ng_inputs().at(1); // Shape [num_groups] - auto bias = node.get_ng_inputs().at(2); // Shape [num_groups] + const auto inputs = node.get_ng_inputs(); + OPENVINO_ASSERT(inputs.size() == 3); - auto eps = node.get_attribute_value("epsilon", 1e-05f); - auto num_groups = node.get_attribute_value("num_groups"); + const auto& data = inputs[0]; // Shape [N, C, ...] + const auto& scale = inputs[1]; // Shape [num_groups] + const auto& bias = inputs[2]; // Shape [num_groups] - auto zero = default_opset::Constant::create(element::i64, Shape{1}, {0}); - auto one = default_opset::Constant::create(element::i64, Shape{1}, {1}); - auto c_dim = std::make_shared(std::make_shared(data), one, zero); - auto g_dim = default_opset::Constant::create(element::i64, Shape{1}, {num_groups}); + const auto eps = node.get_attribute_value("epsilon", 1e-05f); + const auto num_groups = node.get_attribute_value("num_groups"); - auto c_g_div = std::make_shared(c_dim, g_dim); + const auto zero = default_opset::Constant::create(element::i64, Shape{1}, {0}); + const auto one = default_opset::Constant::create(element::i64, Shape{1}, {1}); + const auto c_dim = + std::make_shared(std::make_shared(data), one, zero); + const auto g_dim = default_opset::Constant::create(element::i64, Shape{1}, {num_groups}); + + const auto c_g_div = std::make_shared(c_dim, g_dim); // Adjust scale and bias shape, [G] -> [G, C/G] -> [C] - scale = std::make_shared(scale, one); - auto broadcast_scale = - std::make_shared(scale, c_g_div, ov::op::BroadcastType::BIDIRECTIONAL); - auto c_scale = std::make_shared(broadcast_scale, c_dim, false); - - bias = std::make_shared(bias, one); - auto broadcast_bias = - std::make_shared(bias, c_g_div, ov::op::BroadcastType::BIDIRECTIONAL); - auto c_bias = std::make_shared(broadcast_bias, c_dim, false); + const auto scale_unsq = std::make_shared(scale, one); + const auto broadcast_scale = + std::make_shared(scale_unsq, c_g_div, ov::op::BroadcastType::BIDIRECTIONAL); + const auto c_scale = std::make_shared(broadcast_scale, c_dim, false); + + const auto bias_unsq = std::make_shared(bias, one); + const auto broadcast_bias = + std::make_shared(bias_unsq, c_g_div, ov::op::BroadcastType::BIDIRECTIONAL); + const auto c_bias = std::make_shared(broadcast_bias, c_dim, false); return {std::make_shared(data, c_scale, c_bias, num_groups, eps)}; } From 7f04ad69c253278c0d8b6ad27d95e15eb9869faa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 11:33:51 +0400 Subject: [PATCH 149/275] Bump awalsh128/cache-apt-pkgs-action from 1.3.0 to 1.3.1 (#20779) Bumps [awalsh128/cache-apt-pkgs-action](https://github.com/awalsh128/cache-apt-pkgs-action) from 1.3.0 to 1.3.1. - [Release notes](https://github.com/awalsh128/cache-apt-pkgs-action/releases) - [Commits](https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1.3.0...v1.3.1) --- updated-dependencies: - dependency-name: awalsh128/cache-apt-pkgs-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build_doc.yml | 2 +- .github/workflows/code_snippets.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index fca75e99dc4109..e22582f9c1fd4f 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -21,7 +21,7 @@ jobs: lfs: 'true' - name: Install apt-get dependencies - uses: awalsh128/cache-apt-pkgs-action@v1.3.0 + uses: awalsh128/cache-apt-pkgs-action@v1.3.1 with: packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9 version: 3.0 diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml index 581a584b88805f..e4091556da1ac0 100644 --- a/.github/workflows/code_snippets.yml +++ b/.github/workflows/code_snippets.yml @@ -30,7 +30,7 @@ jobs: submodules: 'true' - name: Install OpenCL - uses: awalsh128/cache-apt-pkgs-action@v1.3.0 + uses: awalsh128/cache-apt-pkgs-action@v1.3.1 if: runner.os == 'Linux' with: packages: ocl-icd-opencl-dev opencl-headers From 53820c0cf2ade841b415ff259955eed1bf04205d Mon Sep 17 00:00:00 2001 From: rsato10 <89757445+rsato10@users.noreply.github.com> Date: Tue, 31 Oct 2023 00:39:16 -0700 Subject: [PATCH 150/275] [TF FE]Support Inv operation for TensorFlow models (#20720) * [TF FE]Support Inv operation for TensorFlow models * added test tests/layer_tests/tensorflow_tests/test_tf_Inv.py and src/frontends/tensorflow_common/src/op/inv.cpp * Update tests/layer_tests/tensorflow_tests/test_tf_Inv.py * Update tests/layer_tests/tensorflow_tests/test_tf_Inv.py * Update tests/layer_tests/tensorflow_tests/test_tf_Inv.py --------- Co-authored-by: Roman Kazantsev --- src/frontends/tensorflow/src/op_table.cpp | 1 + .../include/common_op_table.hpp | 1 + .../tensorflow_common/src/op/inv.cpp | 30 +++++++++++++ .../tensorflow_tests/test_tf_Inv.py | 44 +++++++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 src/frontends/tensorflow_common/src/op/inv.cpp create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Inv.py diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 4643994b728b5b..4926ac159cecbb 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -194,6 +194,7 @@ const std::map get_supported_ops() { {"HashTableV2", CreatorFunction(translate_hash_table_op)}, {"Identity", CreatorFunction(translate_identity_op)}, {"IdentityN", CreatorFunction(translate_identity_n_op)}, + {"Inv", CreatorFunction(translate_inv_op)}, {"If", CreatorFunction(translate_if_op)}, {"input_arg", CreatorFunction(translate_input_arg_op)}, {"Iterator", CreatorFunction(translate_iterator_op)}, diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 29efb83547d263..3601a07f6c45d0 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -76,6 +76,7 @@ OP_CONVERTER(translate_gather_tree_op); OP_CONVERTER(translate_identity_op); OP_CONVERTER(translate_identity_n_op); OP_CONVERTER(translate_input_arg_op); +OP_CONVERTER(translate_inv_op); OP_CONVERTER(translate_invert_permutation_op); OP_CONVERTER(translate_output_arg_op); OP_CONVERTER(translate_interpolate_op); diff --git a/src/frontends/tensorflow_common/src/op/inv.cpp b/src/frontends/tensorflow_common/src/op/inv.cpp new file mode 100644 index 00000000000000..ec2196219f5033 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/inv.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/divide.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_inv_op(const NodeContext& node) { + default_op_checks(node, 1, {"Inv"}); + auto x = node.get_input(0); + + // prepare auxiliary one constants of the same type as the inputs + auto one = create_same_type_const_scalar(x, 1); + + auto inv = make_shared(one, x); + set_node_name(node.get_name(), inv); + return inv->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Inv.py b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py new file mode 100644 index 00000000000000..af6e57dda83a93 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestInv(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + x_shape = inputs_info['x'] + inputs_data = {} + inputs_data['x'] = np.random.choice([-5, -4, -3, -2, -1, 1, 2, 3, 4, 5], x_shape).astype(np.float32) + + return inputs_data + + def create_inv_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + tf.raw_ops.Inv(x=x) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[], input_type=np.float32), + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[2, 3, 4], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_inv_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_inv_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) \ No newline at end of file From 1b46c73b5161f68263ab3a103f64221736d45f3c Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Tue, 31 Oct 2023 11:42:44 +0400 Subject: [PATCH 151/275] Remove commented code from `Proposal` layer test (#20781) --- .../src/single_op/proposal.cpp | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/src/tests/functional/shared_test_classes/src/single_op/proposal.cpp b/src/tests/functional/shared_test_classes/src/single_op/proposal.cpp index 1889d22d630594..c6d5858eef46a9 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/proposal.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/proposal.cpp @@ -12,46 +12,6 @@ const size_t feat_stride = 1; const float box_size_scale = 2.0f; const float box_coordinate_scale = 2.0f; -// std::string ProposalLayerTest::SerializeProposalSpecificParams(proposalSpecificParams& params) { -// size_t base_size; -// size_t pre_nms_topn; -// size_t post_nms_topn; -// float nms_thresh; -// size_t min_size; -// std::vector ratio; -// std::vector scale; -// bool clip_before_nms; -// bool clip_after_nms; -// std::string framework; -// std::tie(base_size, pre_nms_topn, -// post_nms_topn, -// nms_thresh, -// min_size, -// ratio, -// scale, -// clip_before_nms, -// clip_after_nms, -// framework) = params; - -// std::ostringstream result; -// result << "base_size=" << base_size << "_"; -// result << "pre_nms_topn=" << pre_nms_topn << "_"; -// result << "post_nms_topn=" << post_nms_topn << "_"; -// result << "nms_thresh=" << nms_thresh << "_"; -// result << "feat_stride=" << feat_stride << "_"; -// result << "min_size=" << min_size << "_"; -// result << "ratio = " << ov::test::utils::vec2str(ratio) << "_"; -// result << "scale = " << ov::test::utils::vec2str(scale) << "_"; -// result << "clip_before_nms=" << clip_before_nms << "_"; -// result << "clip_after_nms=" << clip_after_nms << "_"; -// result << "normalize=" << normalize << "_"; -// result << "box_size_scale=" << box_size_scale << "_"; -// result << "box_coordinate_scale=" << box_coordinate_scale << "_"; -// result << "framework=" << framework << "_"; - -// return result.str(); -// } - std::string ProposalLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { proposalSpecificParams proposal_params; ov::element::Type model_type; From 00705e6e5809f86855d842d13f231a7b493df241 Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Tue, 31 Oct 2023 17:24:35 +0900 Subject: [PATCH 152/275] [GPU] Bugfix adjust padding size for reshape optimizing (#20773) Signed-off-by: Min, Byungil --- src/plugins/intel_gpu/src/graph/include/reshape_inst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h index d34bb9635fb0be..c49a1f6cb0dde7 100644 --- a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h @@ -63,7 +63,7 @@ struct typed_program_node : public typed_program_node_base { } void adjust_output_padding() { - if (!is_in_place() || !has_padding()) + if (!has_padding()) return; auto input_layout = input().get_output_layout(false); From 38cd138d0d1763071879de2de355cf4f02b0b369 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 31 Oct 2023 09:34:49 +0100 Subject: [PATCH 153/275] [core]Migrate Sqrt operator to new API (#20632) * Migrate Sqrt operator to new API * Remove 'visit_attributes' is same as base --- src/core/include/openvino/op/sqrt.hpp | 5 +- .../include/openvino/reference/sqrt.hpp | 34 ++++--- src/core/src/op/sqrt.cpp | 98 ++++++++----------- 3 files changed, 66 insertions(+), 71 deletions(-) diff --git a/src/core/include/openvino/op/sqrt.hpp b/src/core/include/openvino/op/sqrt.hpp index 9fad4ba2a76c0b..d09521e95cb0a3 100644 --- a/src/core/include/openvino/op/sqrt.hpp +++ b/src/core/include/openvino/op/sqrt.hpp @@ -35,11 +35,8 @@ class OPENVINO_API Sqrt : public util::UnaryElementwiseArithmetic { Sqrt(const Output& arg); Sqrt() = default; - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/sqrt.hpp b/src/core/reference/include/openvino/reference/sqrt.hpp index e0d4d4f1ba089b..1562b7cca23bad 100644 --- a/src/core/reference/include/openvino/reference/sqrt.hpp +++ b/src/core/reference/include/openvino/reference/sqrt.hpp @@ -6,21 +6,33 @@ #include #include -#include + +#include "openvino/reference/utils/type_util.hpp" namespace ov { namespace reference { -template -typename std::enable_if::value>::type sqrt(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = std::sqrt(arg[i]); - } +namespace func { +template ()>::type* = nullptr> +T sqrt(const T in) { + return std::sqrt(in); +} + +template ::value>::type* = nullptr> +T sqrt(const T in) { + return static_cast(std::round(std::sqrt(in))); } -template -typename std::enable_if::value>::type sqrt(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(std::round(std::sqrt(arg[i]))); - } +} // namespace func + +/** + * @brief Reference implementation of Sqrt operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ +template +void sqrt(const T* arg, T* out, const size_t count) { + std::transform(arg, arg + count, out, func::sqrt); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/sqrt.cpp b/src/core/src/op/sqrt.cpp index fe9c3830e9cc5c..0c05c6833bfcf0 100644 --- a/src/core/src/op/sqrt.cpp +++ b/src/core/src/op/sqrt.cpp @@ -2,80 +2,66 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/sqrt.hpp" +#include "openvino/op/sqrt.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/add.hpp" -#include "ngraph/op/divide.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/sqrt.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace sqrt { +struct Evaluate : element::NoAction { + using element::NoAction::visit; -op::Sqrt::Sqrt(const Output& arg) : UnaryElementwiseArithmetic(arg) { - constructor_validate_and_infer_types(); -} + template > + static result_type visit(const Tensor& arg0, Tensor& out, const size_t count) { + reference::sqrt(arg0.data(), out.data(), count); + return true; + } +}; +} // namespace sqrt -bool ngraph::op::v0::Sqrt::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Sqrt_visit_attrinutes); - return true; +namespace v0 { +Sqrt::Sqrt(const Output& arg) : UnaryElementwiseArithmetic(arg) { + constructor_validate_and_infer_types(); } -shared_ptr op::Sqrt::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Sqrt::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Sqrt_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace sqrtop { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::sqrt(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} - -bool evaluate_sqrt(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_sqrt, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sqrt, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sqrt, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sqrt, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sqrt, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sqrt, f32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sqrt, f64, arg0, out, count); - default: - rc = false; - break; - } - return rc; + return std::make_shared(new_args.at(0)); } -} // namespace -} // namespace sqrtop -bool op::Sqrt::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Sqrt::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_Sqrt_evaluate); - return sqrtop::evaluate_sqrt(inputs[0], outputs[0], shape_size(inputs[0]->get_shape())); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); + + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool op::Sqrt::has_evaluate() const { +bool Sqrt::has_evaluate() const { OV_OP_SCOPE(v0_Sqrt_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: - case ngraph::element::f64: + case element::f16: + case element::f32: + case element::f64: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From 38b60921203aee035903d40dfdc2d7db0dbed03f Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 31 Oct 2023 12:46:36 +0400 Subject: [PATCH 154/275] [TF FE] Switch off TF1 While support totally (#20774) * [TF FE] Switch off TF1 While support totally This is a total switch off due to GPU limitation Signed-off-by: Kazantsev, Roman * Need additional fallback in Enter to avoid shapes problem * Disable tests with While op Signed-off-by: Kazantsev, Roman * Disable layer test for TF1 While * Remove extra spaces --------- Signed-off-by: Kazantsev, Roman --- src/frontends/tensorflow/src/op/enter.cpp | 3 +++ src/frontends/tensorflow/src/op/loop_cond.cpp | 3 +++ src/frontends/tensorflow/tests/convert_unsupported.cpp | 2 +- tests/layer_tests/tensorflow_tests/test_tf_While.py | 4 ++-- tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py | 1 + 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/frontends/tensorflow/src/op/enter.cpp b/src/frontends/tensorflow/src/op/enter.cpp index c0719f83e36ccb..190f6adebec6c7 100644 --- a/src/frontends/tensorflow/src/op/enter.cpp +++ b/src/frontends/tensorflow/src/op/enter.cpp @@ -22,6 +22,9 @@ OutputVector translate_enter_op(const NodeContext& node) { auto data = node.get_input(0); auto frame_name = node.get_attribute("frame_name"); + // TODO 123651: remove this fallback to the legacy FE once GPU fixes dynamism for Loop operation + TENSORFLOW_OP_VALIDATION(node, false, "Fallback to legacy FE: Switch off TF1 While support due to GPU limitation"); + auto enter_node = make_shared(data, frame_name, node.get_decoder()); set_node_name(node.get_name(), enter_node); diff --git a/src/frontends/tensorflow/src/op/loop_cond.cpp b/src/frontends/tensorflow/src/op/loop_cond.cpp index 286192a017f283..3b16518456a849 100644 --- a/src/frontends/tensorflow/src/op/loop_cond.cpp +++ b/src/frontends/tensorflow/src/op/loop_cond.cpp @@ -22,6 +22,9 @@ OutputVector translate_loop_cond_op(const NodeContext& node) { default_op_checks(node, 1, {"LoopCond"}); auto input = node.get_input(0); + // TODO 123651: remove this fallback to the legacy FE once GPU fixes dynamism for Loop operation + TENSORFLOW_OP_VALIDATION(node, false, "Fallback to legacy FE: Switch off TF1 While support due to GPU limitation"); + auto loop_cond_node = make_shared(input, node.get_decoder()); set_node_name(node.get_name(), loop_cond_node); diff --git a/src/frontends/tensorflow/tests/convert_unsupported.cpp b/src/frontends/tensorflow/tests/convert_unsupported.cpp index 7d9a83045b5606..aef4a0bea9d115 100644 --- a/src/frontends/tensorflow/tests/convert_unsupported.cpp +++ b/src/frontends/tensorflow/tests/convert_unsupported.cpp @@ -154,7 +154,7 @@ TEST(FrontEndConvertModelTest, test_unsupported_tf1_while_and_incorrect_less_tra } } -TEST(FrontEndConvertModelTest, conversion_with_unknown_exception) { +TEST(FrontEndConvertModelTest, DISABLED_conversion_with_unknown_exception) { shared_ptr model = nullptr; try { auto conv_ext = diff --git a/tests/layer_tests/tensorflow_tests/test_tf_While.py b/tests/layer_tests/tensorflow_tests/test_tf_While.py index 2a112700f30ad5..cd66024d1e8294 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_While.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_While.py @@ -51,7 +51,7 @@ def body(x, y): test_data_basic = [ dict(y_shape=[2, 3], data_type=np.int32, lower_control_flow=False), dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=False), - dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=True) + pytest.param(dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=True), marks=pytest.mark.xfail(reason="123651")) ] @pytest.mark.parametrize("params", test_data_basic) @@ -110,7 +110,7 @@ def body(x, y): test_data_basic = [ dict(y_shape=[2, 3], lower_control_flow=False), dict(y_shape=[2, 1, 4], lower_control_flow=False), - dict(y_shape=[2, 1, 4], lower_control_flow=True) + pytest.param(dict(y_shape=[2, 1, 4], lower_control_flow=True), marks=pytest.mark.xfail(reason="123651")) ] @pytest.mark.parametrize("params", test_data_basic) diff --git a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py index 26ea01b77d6722..9902f18ad3d874 100644 --- a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py +++ b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py @@ -239,6 +239,7 @@ def test_conversion_tf1_while_default(self): self.basic("ctc_model_based.pbtxt", None, None, None, None, None, None, True, True, False, False) + @unittest.skip("123651: enable when GPU fixes dynamism in Loop operation") def test_conversion_tf1_while_use_new_frontend(self): self.basic("ctc_model_based.pbtxt", None, None, None, None, None, None, True, True, True, False) From 1f961f903ceb9619650b932899f49594e839c4ad Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Tue, 31 Oct 2023 10:05:54 +0100 Subject: [PATCH 155/275] Fixed info logs in OVC (#20586) * Corrected memory consumption log, removed unused log_level info. * Minor correction. * Trace memory only if verbose is set. * Use get_traced_memory(). --- tools/ovc/openvino/tools/ovc/cli_parser.py | 1 - tools/ovc/openvino/tools/ovc/convert_impl.py | 44 +++++++++++--------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/tools/ovc/openvino/tools/ovc/cli_parser.py b/tools/ovc/openvino/tools/ovc/cli_parser.py index 87fc0225206fa5..7e0a626db8099c 100644 --- a/tools/ovc/openvino/tools/ovc/cli_parser.py +++ b/tools/ovc/openvino/tools/ovc/cli_parser.py @@ -420,7 +420,6 @@ def get_common_cli_options(argv, is_python_api_used): if not is_python_api_used: model_name = get_model_name_from_args(argv) d['output_model'] = ['- IR output name', lambda _: model_name] - d['log_level'] = '- Log level' d['input'] = ['- Input layers', lambda x: x if x else 'Not specified, inherited from the model'] d['output'] = ['- Output layers', lambda x: x if x else 'Not specified, inherited from the model'] return d diff --git a/tools/ovc/openvino/tools/ovc/convert_impl.py b/tools/ovc/openvino/tools/ovc/convert_impl.py index cf09a2abfe26d0..3a746f646d8730 100644 --- a/tools/ovc/openvino/tools/ovc/convert_impl.py +++ b/tools/ovc/openvino/tools/ovc/convert_impl.py @@ -7,6 +7,7 @@ import os import sys import traceback +import tracemalloc from collections import OrderedDict from pathlib import Path from typing import Iterable, Callable @@ -39,7 +40,7 @@ from openvino.frontend import FrontEndManager, OpConversionFailure, TelemetryExtension from openvino.runtime import get_version as get_rt_version from openvino.runtime import Type, PartialShape -import re + try: from openvino.frontend.tensorflow.utils import create_tf_graph_iterator, type_supported_by_tf_fe, \ @@ -221,29 +222,13 @@ def check_model_object(argv): def driver(argv: argparse.Namespace, non_default_params: dict): - if not hasattr(argv, 'log_level'): - argv.log_level = 'ERROR' - init_logger(argv.log_level.upper(), argv.verbose) + init_logger('ERROR', argv.verbose) # Log dictionary with non-default cli parameters where complex classes are excluded. log.debug(str(non_default_params)) - start_time = datetime.datetime.now() - ov_model = moc_emit_ir(prepare_ir(argv), argv) - if argv.verbose: - elapsed_time = datetime.datetime.now() - start_time - print('[ SUCCESS ] Total execution time: {:.2f} seconds. '.format(elapsed_time.total_seconds())) - try: - import resource - mem_usage = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024) - if sys.platform == 'darwin': - mem_usage = round(mem_usage / 1024) - print('[ SUCCESS ] Memory consumed: {} MB. '.format(mem_usage)) - except ImportError: - pass - return ov_model def get_non_default_params(argv, cli_parser): @@ -416,11 +401,21 @@ def pack_params_to_args_namespace(args: dict, cli_parser: argparse.ArgumentParse return argv -def is_verbose(argv: argparse.Namespace): - return argv is not None and hasattr(argv, 'verbose') and argv.verbose +def is_verbose(argv, args=None): + if argv is not None and hasattr(argv, 'verbose') and argv.verbose: + return True + if args is not None and 'verbose' in args and args['verbose']: + return True + if '--verbose' in sys.argv: + return True + return False def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used): + start_time = datetime.datetime.now() + if is_verbose(None, args): + tracemalloc.start() + simplified_ie_version = VersionChecker().get_ie_simplified_version() telemetry = init_mo_telemetry() telemetry.start_session('ovc') @@ -500,6 +495,15 @@ def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used): print(ov_update_message) send_conversion_result('success') + + if is_verbose(argv): + elapsed_time = datetime.datetime.now() - start_time + print('[ SUCCESS ] Total execution time: {:.2f} seconds. '.format(elapsed_time.total_seconds())) + + _, peak_size = tracemalloc.get_traced_memory() + print("[ SUCCESS ] Peak memory consumption (includes only memory allocated in Python): {:.2f} MB. ".format(peak_size / (1024 * 1024))) + tracemalloc.stop() + return ov_model, argv except Exception as e: From bb28b718492d0b9c16c2737862995d890c42f4c4 Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Tue, 31 Oct 2023 19:21:56 +0900 Subject: [PATCH 156/275] [GPU] if the reorder which is an user of reshpae has truncation mode, it should not split the reorder node (#20749) --- .../graph/graph_optimizer/handle_reshape.cpp | 3 +- .../unit/test_cases/reshape_gpu_test.cpp | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp index 7b5eb3b02d33e6..c148b311d55744 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp @@ -98,7 +98,8 @@ void handle_reshape::run(program& p) { // find the users of reshape that are reorder type, if none present then skip the current node // find users who are onednn impl for (const auto& user : node->get_users()) { - if (user->is_type()) + if (user->is_type() && + (*user).as().get_primitive()->truncate == false) // not to split conversion only reorder reorder_node_to_split.push_back(user); if (user->get_preferred_impl_type() == cldnn::impl_types::onednn) onednn_users.push_back(user); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp index 0d7c6cbe271f41..6f7cc179465667 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp @@ -734,6 +734,55 @@ TEST(reshape_gpu_f32, shrink_chain_out) { test_shrink_chain_out(false); } +template +void test_shrink_chain_partial_reorder_truncate(bool is_caching_test) { + auto& engine = get_test_engine(); + auto batch_num = 2; + auto feature_num = 2; + auto x_size = 1; + auto y_size = 1; + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}}); + auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); + + std::vector scale_vals = {0.f, 1.f, 2.f, 3.f}; + std::vector scale_shifts = {5.f, 10.f, 15.f, 20.0f}; + set_values(scale_in, scale_vals); + set_values(shift_in, scale_shifts); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(data("scale_in", scale_in)); + topology.add(data("shift_in", shift_in)); + topology.add(activation("relu", input_info("input"), activation_func::relu)); + topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2)))); + topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32, {}, reorder_mean_mode::subtract, padding(), true)); + topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4)))); + topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod)); + topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum)); + topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32)); + + std::vector input_vec = {-1.f, 2.f, -3.f, 4.f}; + std::vector out = {5.f, 12.f, 15.f, 32.0f}; + set_values(input, input_vec); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + network->set_input_data("input", input); + auto outputs = network->execute(); + + auto output = outputs.at("out_reorder").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < out.size(); i++) + ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i; +} + +TEST(reshape_gpu_f32, shrink_chain_partial_reorder_truncate) { + test_shrink_chain_partial_reorder_truncate(false); +} + TEST(reshape_gpu_f32, basic_runtime_static_shape) { // input: bfwzyx, (3, 3, 2, 2, 1, 1) // reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1) From a20c9a5a22ed5f50e3caa8479ac6c36f3540b190 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 31 Oct 2023 14:45:54 +0400 Subject: [PATCH 157/275] [TF FE] Update TF FE dev docs with Inv (#20788) Signed-off-by: Kazantsev, Roman --- src/frontends/tensorflow/docs/supported_ops.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index 5794e3f16653fd..e9b9a499f55a76 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -529,7 +529,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | InplaceSub | NO | | | InplaceUpdate | NO | | | InterleaveDataset | NO | | -| Inv | NO | | +| Inv | YES | | | InvGrad | NO | | | Invert | NO | | | InvertPermutation | YES | | From da1f0199a0438f80d59c36880fe65c2b567366cd Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 31 Oct 2023 14:55:28 +0400 Subject: [PATCH 158/275] Removed WA for static protobuf for vcpkg installation path (#20784) --- .../installing-openvino-vcpkg.md | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md index d0d502cfced7e2..4da210edb0d581 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-shared/installing-openvino-vcpkg.md @@ -65,37 +65,6 @@ Installing OpenVINO Runtime Note that the vcpkg installation means building all packages and dependencies from source, which means the compiler stage will require additional time to complete the process. -.. important:: - - If you are building OpenVINO as dynamic libraries and you want to use either Paddle, TensorFlow or ONNX frontends, you need to create `custom vcpkg `__ triplet file, like ``/triplets/community/x64-linux-release-dynamic.cmake``, which builds ``protobuf`` dependency statically: - - .. code-block:: sh - - # typical values of vcpkg toolchain - set(VCPKG_TARGET_ARCHITECTURE x64) - set(VCPKG_CRT_LINKAGE dynamic) - # by default, all libraries are built dynamically - set(VCPKG_LIBRARY_LINKAGE dynamic) - - set(VCPKG_CMAKE_SYSTEM_NAME Linux) - set(VCPKG_BUILD_TYPE release) - - set(VCPKG_FIXUP_ELF_RPATH ON) - - # OpenVINO specific additions: build statically the following internal dependencies - # IMPORTANT: you need to build at least protobuf statically, others can be dynamic - if(PORT MATCHES "^(ade|hwloc|onnx|protobuf|pugixml|snappy)$") - set(VCPKG_LIBRARY_LINKAGE static) - endif() - - - Then, you can use such a triplet file with the following command: - - .. code-block:: sh - - vcpkg install 'openvino:x64-linux-release-dynamic' - - After installation, you can use OpenVINO in your product's cmake scripts: .. code-block:: sh From 2932e9e9381c8f07cef200931470fe19044590d5 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Tue, 31 Oct 2023 12:00:52 +0100 Subject: [PATCH 159/275] ReshapeBMatMul and ReshapeAMatMul: avoid circular dependencies creation (#20771) --- .../smart_reshape/matmul_sr.cpp | 10 ++++-- .../tests/functional/matmul_sr_tests.cpp | 35 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp b/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp index 870b69d9a55901..ff7dc8c927d0ae 100644 --- a/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp +++ b/src/common/transformations/src/transformations/smart_reshape/matmul_sr.cpp @@ -61,8 +61,11 @@ ov::pass::ReshapeAMatMul::ReshapeAMatMul() { auto other_input_label = pattern::any_input(); auto reshape_input_label = pattern::any_input(); auto reshape_pattern_label = pattern::any_input(); + auto reshape_predicate = [](ov::Output output) -> bool { + return ov::pass::pattern::rank_equals(2)(output) && ov::pass::pattern::consumers_count(1)(output); + }; auto reshape_label = ov::pass::pattern::wrap_type({reshape_input_label, reshape_pattern_label}, - ov::pass::pattern::rank_equals(2)); + reshape_predicate); auto matmul_label = ov::pass::pattern::wrap_type({reshape_label, other_input_label}); matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { @@ -83,8 +86,11 @@ ov::pass::ReshapeBMatMul::ReshapeBMatMul() { auto other_input_label = pattern::any_input(); auto reshape_input_label = pattern::any_input(); auto reshape_pattern_label = pattern::any_input(); + auto reshape_predicate = [](ov::Output output) -> bool { + return ov::pass::pattern::rank_equals(2)(output) && ov::pass::pattern::consumers_count(1)(output); + }; auto reshape_label = ov::pass::pattern::wrap_type({reshape_input_label, reshape_pattern_label}, - ov::pass::pattern::rank_equals(2)); + reshape_predicate); auto matmul_label = ov::pass::pattern::wrap_type({other_input_label, reshape_label}); matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { diff --git a/src/inference/tests/functional/matmul_sr_tests.cpp b/src/inference/tests/functional/matmul_sr_tests.cpp index 27a294e656e171..3d17cfd915fa58 100644 --- a/src/inference/tests/functional/matmul_sr_tests.cpp +++ b/src/inference/tests/functional/matmul_sr_tests.cpp @@ -10,11 +10,14 @@ #include "cnn_network_ngraph_impl.hpp" #include "common_test_utils/graph_comparator.hpp" +#include "common_test_utils/ov_test_utils.hpp" #include "common_test_utils/test_common.hpp" #include "ie_common.h" +#include "openvino/op/add.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/matmul.hpp" #include "openvino/op/parameter.hpp" +#include "openvino/op/reduce_max.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/variadic_split.hpp" @@ -357,3 +360,35 @@ TEST(SmartReshapeTransposeMatMulTests, TransposeBothMatMulWithAttrFuse) { auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; } + +TEST_F(TransformationTestsF, SmartReshapeReshapeAMatMulSeveralConsumers) { + // Reshape has 2 consumers: matmul and reduce. + // Since reshape movement leads to loop creation (circular dependencies), the transformation can't be applied + auto data_A = std::make_shared(ov::element::f32, ov::Shape{3, 2, 3}); + auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, {2}, {3, 6}); + auto reshape = std::make_shared(data_A, reshape_const, false); + + auto data_B = std::make_shared(ov::element::f32, ov::Shape{6, 12}); + auto reduction_axes = ov::op::v0::Constant::create(ov::element::i32, {2}, {0, 1}); + auto reduce = std::make_shared(reshape, reduction_axes); + auto sum = std::make_shared(data_B, reduce); + auto matmul = std::make_shared(reshape, sum); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{data_A, data_B}); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, SmartReshapeReshapeBMatMulSeveralConsumers) { + // Reshape has 2 consumers: matmul and reduce. + // Since reshape movement leads to loop creation (circular dependencies), the transformation can't be applied + auto data_B = std::make_shared(ov::element::f32, ov::Shape{3, 2, 3}); + auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, {2}, {6, 3}); + auto reshape = std::make_shared(data_B, reshape_const, false); + + auto data_A = std::make_shared(ov::element::f32, ov::Shape{12, 6}); + auto reduction_axes = ov::op::v0::Constant::create(ov::element::i32, {2}, {0, 1}); + auto reduce = std::make_shared(reshape, reduction_axes); + auto sum = std::make_shared(data_A, reduce); + auto matmul = std::make_shared(sum, reshape); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{data_A, data_B}); + manager.register_pass(); +} From 3077bad26fe652503cdcbd2da4b75dd345a97809 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 31 Oct 2023 12:11:29 +0100 Subject: [PATCH 160/275] [core]Migrate Sigmoid operator to new API (#20780) * Migrate Sigmoid operator to new API * Add missing include --- src/core/include/openvino/op/sigmoid.hpp | 4 +- .../include/openvino/reference/sigmoid.hpp | 32 +++--- src/core/src/op/sigmoid.cpp | 98 ++++++++----------- 3 files changed, 61 insertions(+), 73 deletions(-) diff --git a/src/core/include/openvino/op/sigmoid.hpp b/src/core/include/openvino/op/sigmoid.hpp index eaf6bfa14afad4..9c244e2681f7fc 100644 --- a/src/core/include/openvino/op/sigmoid.hpp +++ b/src/core/include/openvino/op/sigmoid.hpp @@ -18,9 +18,7 @@ class OPENVINO_API Sigmoid : public util::UnaryElementwiseArithmetic { Sigmoid(const Output& arg); Sigmoid() = default; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/sigmoid.hpp b/src/core/reference/include/openvino/reference/sigmoid.hpp index d30aedf21ae9f4..4e1daafeff3d0b 100644 --- a/src/core/reference/include/openvino/reference/sigmoid.hpp +++ b/src/core/reference/include/openvino/reference/sigmoid.hpp @@ -4,28 +4,30 @@ #pragma once +#include #include #include -#include + +#include "openvino/reference/utils/type_util.hpp" namespace ov { namespace reference { -template ::value, bool>::type = true> -void sigmoid(const T* arg, T* out, size_t count) { - T exp_value; - for (size_t i = 0; i < count; i++) { - exp_value = static_cast(std::exp(-static_cast::type>(arg[i]))); - out[i] = static_cast(1 / (1 + exp_value)); - } +namespace func { +template ::value>::type* = nullptr> +T sigmoid(const T value) { + const auto exp_value = static_cast(std::exp(-static_cast::type>(value))); + return 1 / (1 + exp_value); +} + +template ()>::type* = nullptr> +T sigmoid(const T value) { + return 1 / (1 + std::exp(-value)); } +} // namespace func -template ::value, bool>::type = true> -void sigmoid(const T* arg, T* out, size_t count) { - T exp_value; - for (size_t i = 0; i < count; i++) { - exp_value = static_cast(std::exp(-arg[i])); - out[i] = static_cast(1 / (1 + exp_value)); - } +template +void sigmoid(const T* arg, T* out, const size_t count) { + std::transform(arg, arg + count, out, func::sigmoid); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/sigmoid.cpp b/src/core/src/op/sigmoid.cpp index 9966dbcab8d69b..a4ce31db1e3a97 100644 --- a/src/core/src/op/sigmoid.cpp +++ b/src/core/src/op/sigmoid.cpp @@ -2,80 +2,68 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/sigmoid.hpp" - -#include +#include "openvino/op/sigmoid.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/log.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/util.hpp" #include "openvino/reference/sigmoid.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace sigmoid { + +struct Evaluate : element::NoAction { + using element::NoAction::visit; -shared_ptr ov::op::v0::Sigmoid::clone_with_new_inputs(const OutputVector& new_args) const { + template > + static result_type visit(const Tensor& arg0, Tensor& out, const size_t count) { + reference::sigmoid(arg0.data(), out.data(), count); + return true; + } +}; +} // namespace sigmoid + +namespace v0 { + +std::shared_ptr Sigmoid::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Sigmoid_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -ov::op::v0::Sigmoid::Sigmoid(const Output& arg) : UnaryElementwiseArithmetic(arg) { +Sigmoid::Sigmoid(const Output& arg) : UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace sigmoid { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::sigmoid(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} - -bool evaluate_sigmoid(const HostTensorPtr& arg0, const HostTensorPtr& out) { - bool rc = true; - size_t count = shape_size(arg0->get_shape()); - out->set_unary(arg0); +bool Sigmoid::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Sigmoid_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_sigmoid, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sigmoid, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sigmoid, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sigmoid, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sigmoid, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sigmoid, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace sigmoid + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); -bool ov::op::v0::Sigmoid::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Sigmoid_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return sigmoid::evaluate_sigmoid(inputs[0], outputs[0]); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool ov::op::v0::Sigmoid::has_evaluate() const { +bool Sigmoid::has_evaluate() const { OV_OP_SCOPE(v0_Sigmoid_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From 57571d36e6c9717d5f73dfb54cbe5b8ff4fa8361 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Tue, 31 Oct 2023 16:10:52 +0400 Subject: [PATCH 161/275] [CPU] NMSRotated operation implementation. (#20410) --- .../sort/NMSRotated_13.md | 4 +- .../src/transformations/convert_precision.cpp | 47 + src/plugins/intel_cpu/src/cpu_types.cpp | 1 + src/plugins/intel_cpu/src/node.cpp | 37 +- src/plugins/intel_cpu/src/node.h | 1 + .../nodes/kernels/x64/non_max_suppression.cpp | 465 ++++++ .../nodes/kernels/x64/non_max_suppression.hpp | 152 ++ .../src/nodes/non_max_suppression.cpp | 1467 +++++++---------- .../intel_cpu/src/nodes/non_max_suppression.h | 172 +- .../skip_tests_config.cpp | 2 + .../instances/common/nms_rotated.cpp | 95 ++ .../non_max_suppression.cpp | 30 +- .../include/single_op_tests/nms_rotated.hpp | 15 + .../single_op/nms_rotated.hpp | 47 + .../src/single_op/nms_rotated.cpp | 207 +++ .../skip_configs/CPU/expected_failures_OP.csv | 1 - 16 files changed, 1792 insertions(+), 951 deletions(-) create mode 100644 src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp create mode 100644 src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/nms_rotated.cpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/nms_rotated.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/nms_rotated.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/nms_rotated.cpp diff --git a/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sort/NMSRotated_13.md b/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sort/NMSRotated_13.md index 5ae29954802563..964f9bdb522380 100644 --- a/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sort/NMSRotated_13.md +++ b/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sort/NMSRotated_13.md @@ -27,13 +27,13 @@ The general algorithm is described below: Here ``func(rotated_iou(b_i, b)) = 1 if rotated_iou(b_i, b) <= iou_threshold else 0``. -Having two bouding boxes ``B1`` and ``B2`` the following steps are performed to calculate ``rotated_iou(B1, B2)``: +Having two bounding boxes ``B1`` and ``B2`` the following steps are performed to calculate ``rotated_iou(B1, B2)``: 1. Calculate rotated vertices, (x, y) coordinates of the 4 corners of each box transformed by the corresponding angle in radians according to the direction specified by the *clockwise* attribute. 2. Find all intersection points between edges of ``B1`` and ``B2``. Add them to the ``intersection_points``. 3. Find all corners of ``B1`` within area of ``B2``, and all corners of ``B2`` within area of ``B1``. Add them to the ``intersection_points``. 4. Calculate ``intersection_area`` of the polygon described by ``intersection_points`` (see Sholeace formula). -5. Calculate ``union_area`` (the common area of ``B1`` and ``B2``), `union_area = (B1_area + B2_area) - intersection_area`. +5. Calculate ``union_area`` (the common area of ``B1`` and ``B2``), `union_area = B1_area + B2_area`. 6. Return intersection over union ``rotated_iou = intersection_area / (union_area - intersection_area)``. diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index a1e9dd7a820e16..4fd52934dd415f 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -49,6 +49,7 @@ bool fuse_type_to_nms3(const std::shared_ptr& node, const precisions_m bool fuse_type_to_nms4(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_nms5(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_nms9(const std::shared_ptr& node, const precisions_map& precisions); +bool fuse_type_to_nms_rotated(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_matrix_nms(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_multiclass_nms(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_generate_proposals(const std::shared_ptr& node, const precisions_map& precisions); @@ -383,6 +384,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& {opset4::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms4}, {opset5::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms5}, {opset9::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms9}, + {op::v13::NMSRotated::get_type_info_static(), fuse_type_to_nms_rotated}, {opset8::MatrixNms::get_type_info_static(), fuse_type_to_matrix_nms}, {opset8::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms}, {opset9::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms}, @@ -691,6 +693,51 @@ bool fuse_type_to_nms9(const std::shared_ptr& node, const precisions_m return res; } +bool fuse_type_to_nms_rotated(const std::shared_ptr& node, const precisions_map& precisions) { + auto nms = ov::as_type_ptr(node); + if (!nms) { + return false; + } + + bool res = false; + auto it = precisions.find(node->get_output_element_type(0)); + if (it != precisions.end()) { + const auto& to = it->second; + if (to == ov::element::i32 || to == ov::element::i64) { + nms->set_output_type_attr(to); + res = true; + if (precisions.count(node->get_output_element_type(1)) == 0) { + return res; + } + } + } + + auto type_relaxed = std::dynamic_pointer_cast(node); + ov::element::TypeVector output_types; + for (size_t i = 0; i < node->get_output_size(); i++) { + it = precisions.find(node->get_output_element_type(i)); + if (it == precisions.end()) { + output_types.push_back(node->get_output_element_type(i)); + continue; + } + const auto& to = it->second; + if (type_relaxed) { + type_relaxed->set_overridden_output_type(to, i); + res = true; + } + output_types.push_back(to); + } + + if (!type_relaxed) { + auto relaxed_op = + std::make_shared>(*nms, ov::element::TypeVector{}, output_types); + replace_node(node, relaxed_op); + res = true; + } + + return res; +} + namespace { bool update_type(size_t idx, diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 139685f5882103..56cdbe32a2da4e 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -201,6 +201,7 @@ static const TypeToNameMap& get_type_to_name_tbl() { { "ExtractImagePatches", Type::ExtractImagePatches}, { "NonMaxSuppression", Type::NonMaxSuppression}, { "NonMaxSuppressionIEInternal", Type::NonMaxSuppression}, + { "NMSRotated", Type::NonMaxSuppression}, { "MatrixNms", Type::MatrixNms}, { "MulticlassNms", Type::MulticlassNms}, { "MulticlassNmsIEInternal", Type::MulticlassNms}, diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index ab02ae44dd6ce2..c36815ee048091 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -615,26 +615,31 @@ bool Node::outputShapeDataDependency() const { void Node::redefineOutputMemory(const std::vector &newOutputShapes) { if (newOutputShapes.size() != outputShapes.size()) { - IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName(); + THROW_CPU_NODE_ERR("has shapes number mismatch with real outputs number."); } - for (size_t i = 0; i < outputShapes.size(); i++) { - const auto edges = getChildEdgesAtPort(i); + for (size_t i = 0lu; i < outputShapes.size(); i++) { + redefineOutputMemory(i, newOutputShapes[i]); + } +} - // avoid 0D shape incompatible - auto newOutputShape = newOutputShapes[i]; - if (newOutputShape.empty()) { - newOutputShape.push_back(1); - } +void Node::redefineOutputMemory(const size_t port, const VectorDims& new_output_shape) { + const auto edges = getChildEdgesAtPort(port); - const auto &currDesc = edges[0]->getMemory().getDesc(); - if (currDesc.getShape().isStatic() && currDesc.getShape().getStaticDims() == newOutputShape) - continue; + // avoid 0D shape incompatible + auto new_shape = new_output_shape; + if (new_shape.empty()) { + new_shape.push_back(1); + } - const bool hasZeroDims = std::count(std::begin(newOutputShape), std::end(newOutputShape), 0) > 0; - const auto memDesc = getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newOutputShape, hasZeroDims); - for (size_t j = 0; j < edges.size(); j++) { - edges[j]->getMemoryPtr()->redefineDesc(memDesc); - } + const auto& curr_desc = edges[0]->getMemory().getDesc(); + if (curr_desc.getShape().isStatic() && curr_desc.getShape().getStaticDims() == new_shape) { + return; + } + + const bool has_zero_dims = std::count(std::begin(new_shape), std::end(new_shape), 0lu) > 0; + const auto mem_desc = getBaseMemDescAtOutputPort(port)->cloneWithNewDims(new_shape, has_zero_dims); + for (size_t j = 0lu; j < edges.size(); j++) { + edges[j]->getMemoryPtr()->redefineDesc(mem_desc); } } diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 864c08a95b04c6..4b6fa3a87f72dd 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -366,6 +366,7 @@ class Node { void updateDynamicParams(); void executeDynamic(dnnl::stream strm); virtual void redefineOutputMemory(const std::vector &newShapes); + void redefineOutputMemory(const size_t port, const VectorDims& new_output_shape); bool outputShapeDataDependency() const; virtual void initSupportedPrimitiveDescriptors(); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp new file mode 100644 index 00000000000000..f9c665ec9c5eea --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.cpp @@ -0,0 +1,465 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "non_max_suppression.hpp" +#include "utils/general_utils.h" + +using namespace InferenceEngine; +using namespace dnnl::impl::cpu; + +#define GET_OFF(field) offsetof(NmsCallArgs, field) + +namespace ov { +namespace intel_cpu { +namespace kernel { + +template +void NonMaxSuppression::generate() { + load_vector_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, vector_step)); + load_scalar_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, scalar_step)); + + exp_injector.reset(new x64::jit_uni_eltwise_injector_f32(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f)); + + this->preamble(); + + uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + + load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; + store_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx())}; + store_pool_vec_idxs = {static_cast(vmm_zero.getIdx())}; + + mov(reg_boxes_coord0, ptr[reg_params + GET_OFF(selected_boxes_coord[0])]); + mov(reg_boxes_coord1, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 1 * sizeof(size_t)]); + mov(reg_boxes_coord2, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 2 * sizeof(size_t)]); + mov(reg_boxes_coord3, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 3 * sizeof(size_t)]); + mov(reg_candidate_box, ptr[reg_params + GET_OFF(candidate_box)]); + mov(reg_candidate_status, ptr[reg_params + GET_OFF(candidate_status)]); + mov(reg_boxes_num, ptr[reg_params + GET_OFF(selected_boxes_num)]); + mov(reg_iou_threshold, ptr[reg_params + GET_OFF(iou_threshold)]); + // soft + mov(reg_score_threshold, ptr[reg_params + GET_OFF(score_threshold)]); + mov(reg_score, ptr[reg_params + GET_OFF(score)]); + mov(reg_scale, ptr[reg_params + GET_OFF(scale)]); + + // could use rcx(reg_table) and rdi(reg_temp) now as abi parse finished + mov(reg_table, l_table_constant); + if (x64::mayiuse(x64::avx512_core)) { + kmovw(k_mask_one, word[reg_table + vlen]); + } + uni_vbroadcastss(vmm_iou_threshold, ptr[reg_iou_threshold]); + uni_vbroadcastss(vmm_score_threshold, ptr[reg_score_threshold]); + + uni_vbroadcastss(vmm_candidate_coord0, ptr[reg_candidate_box]); + uni_vbroadcastss(vmm_candidate_coord1, ptr[reg_candidate_box + 1 * sizeof(float)]); + uni_vbroadcastss(vmm_candidate_coord2, ptr[reg_candidate_box + 2 * sizeof(float)]); + uni_vbroadcastss(vmm_candidate_coord3, ptr[reg_candidate_box + 3 * sizeof(float)]); + + if (m_jcp.box_encode_type == NMSBoxEncodeType::CORNER) { + // box format: y1, x1, y2, x2 + uni_vminps(vmm_temp1, vmm_candidate_coord0, vmm_candidate_coord2); + uni_vmaxps(vmm_temp2, vmm_candidate_coord0, vmm_candidate_coord2); + uni_vmovups(vmm_candidate_coord0, vmm_temp1); + uni_vmovups(vmm_candidate_coord2, vmm_temp2); + + uni_vminps(vmm_temp1, vmm_candidate_coord1, vmm_candidate_coord3); + uni_vmaxps(vmm_temp2, vmm_candidate_coord1, vmm_candidate_coord3); + uni_vmovups(vmm_candidate_coord1, vmm_temp1); + uni_vmovups(vmm_candidate_coord3, vmm_temp2); + } else { + // box format: x_center, y_center, width, height --> y1, x1, y2, x2 + uni_vmulps(vmm_temp1, vmm_candidate_coord2, ptr[reg_table]); // width/2 + uni_vmulps(vmm_temp2, vmm_candidate_coord3, ptr[reg_table]); // height/2 + + uni_vaddps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center + width/2 + uni_vmovups(vmm_candidate_coord3, vmm_temp3); + + uni_vaddps(vmm_temp3, vmm_candidate_coord1, vmm_temp2); // y_center + height/2 + uni_vmovups(vmm_candidate_coord2, vmm_temp3); + + uni_vsubps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center - width/2 + uni_vsubps(vmm_temp4, vmm_candidate_coord1, vmm_temp2); // y_center - height/2 + + uni_vmovups(vmm_candidate_coord1, vmm_temp3); + uni_vmovups(vmm_candidate_coord0, vmm_temp4); + } + + // check from last to first + imul(reg_temp_64, reg_boxes_num, sizeof(float)); + add(reg_boxes_coord0, reg_temp_64); // y1 + add(reg_boxes_coord1, reg_temp_64); // x1 + add(reg_boxes_coord2, reg_temp_64); // y2 + add(reg_boxes_coord3, reg_temp_64); // x2 + + Xbyak::Label hard_nms_label; + Xbyak::Label nms_end_label; + + mov(reg_temp_32, ptr[reg_scale]); + test(reg_temp_32, reg_temp_32); + jz(hard_nms_label, T_NEAR); + + soft_nms(); + + jmp(nms_end_label, T_NEAR); + + L(hard_nms_label); + + hard_nms(); + + L(nms_end_label); + + this->postamble(); + + load_vector_emitter->emit_data(); + load_scalar_emitter->emit_data(); + + prepare_table(); + exp_injector->prepare_table(); +} + + +template +void NonMaxSuppression::hard_nms() { + Xbyak::Label main_loop_label_hard; + Xbyak::Label main_loop_end_label_hard; + Xbyak::Label tail_loop_label_hard; + Xbyak::Label terminate_label_hard; + L(main_loop_label_hard); + { + cmp(reg_boxes_num, vector_step); + jl(main_loop_end_label_hard, T_NEAR); + + sub(reg_boxes_coord0, vector_step * sizeof(float)); + sub(reg_boxes_coord1, vector_step * sizeof(float)); + sub(reg_boxes_coord2, vector_step * sizeof(float)); + sub(reg_boxes_coord3, vector_step * sizeof(float)); + + // iou result is in vmm_temp3 + iou(vector_step); + + sub(reg_boxes_num, vector_step); + + suppressed_by_iou(false); + + // if zero continue, else set result to suppressed and terminate + jz(main_loop_label_hard, T_NEAR); + + uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0); + + jmp(terminate_label_hard, T_NEAR); + } + L(main_loop_end_label_hard); + + L(tail_loop_label_hard); + { + cmp(reg_boxes_num, 1); + jl(terminate_label_hard, T_NEAR); + + sub(reg_boxes_coord0, scalar_step * sizeof(float)); + sub(reg_boxes_coord1, scalar_step * sizeof(float)); + sub(reg_boxes_coord2, scalar_step * sizeof(float)); + sub(reg_boxes_coord3, scalar_step * sizeof(float)); + + // iou result is in vmm_temp3 + iou(scalar_step); + + sub(reg_boxes_num, scalar_step); + + suppressed_by_iou(true); + + jz(tail_loop_label_hard, T_NEAR); + + uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0); + + jmp(terminate_label_hard, T_NEAR); + } + + L(terminate_label_hard); +} + +template +void NonMaxSuppression::soft_nms() { + uni_vbroadcastss(vmm_scale, ptr[reg_scale]); + + Xbyak::Label main_loop_label; + Xbyak::Label main_loop_end_label; + Xbyak::Label tail_loop_label; + Xbyak::Label terminate_label; + + Xbyak::Label main_loop_label_soft; + Xbyak::Label tail_loop_label_soft; + L(main_loop_label); + { + cmp(reg_boxes_num, vector_step); + jl(main_loop_end_label, T_NEAR); + + sub(reg_boxes_coord0, vector_step * sizeof(float)); + sub(reg_boxes_coord1, vector_step * sizeof(float)); + sub(reg_boxes_coord2, vector_step * sizeof(float)); + sub(reg_boxes_coord3, vector_step * sizeof(float)); + + // result(iou and weight) is in vmm_temp3 + iou(vector_step); + sub(reg_boxes_num, vector_step); + + // soft suppressed by iou_threshold + if (m_jcp.is_soft_suppressed_by_iou) { + suppressed_by_iou(false); + + // if zero continue soft suppression, else set result to suppressed and terminate + jz(main_loop_label_soft, T_NEAR); + + uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0); + + jmp(terminate_label, T_NEAR); + + L(main_loop_label_soft); + } + + // weight: std::exp(scale * iou * iou) + soft_coeff(); + + // vector weights multiply + horizontal_mul(); + + uni_vbroadcastss(vmm_temp1, ptr[reg_score]); + + // new score in vmm3[0] + uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1); + // store new score + uni_vmovss(ptr[reg_score], vmm_temp3); + + // cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold + suppressed_by_score(); + + jz(main_loop_label, T_NEAR); + + uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0); + + jmp(terminate_label, T_NEAR); + } + L(main_loop_end_label); + + L(tail_loop_label); + { + cmp(reg_boxes_num, 1); + jl(terminate_label, T_NEAR); + + sub(reg_boxes_coord0, scalar_step * sizeof(float)); + sub(reg_boxes_coord1, scalar_step * sizeof(float)); + sub(reg_boxes_coord2, scalar_step * sizeof(float)); + sub(reg_boxes_coord3, scalar_step * sizeof(float)); + + iou(scalar_step); + sub(reg_boxes_num, scalar_step); + + // soft suppressed by iou_threshold + if (m_jcp.is_soft_suppressed_by_iou) { + suppressed_by_iou(true); + + jz(tail_loop_label_soft, T_NEAR); + + uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0); + + jmp(terminate_label, T_NEAR); + + L(tail_loop_label_soft); + } + + soft_coeff(); + + uni_vbroadcastss(vmm_temp1, ptr[reg_score]); + + // vmm3[0] is valide, no need horizontal mul. + uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1); + + uni_vmovss(ptr[reg_score], vmm_temp3); + + // cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold + suppressed_by_score(); + + jz(tail_loop_label, T_NEAR); + + uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0); + + jmp(terminate_label, T_NEAR); + } + + L(terminate_label); +} + +template +void NonMaxSuppression::suppressed_by_iou(bool is_scalar) { + if (x64::mayiuse(x64::avx512_core)) { + vcmpps(k_mask, vmm_temp3, vmm_iou_threshold, 0x0D); // _CMP_GE_OS. vcmpps w/ kmask only on V5 + if (is_scalar) + kandw(k_mask, k_mask, k_mask_one); + kortestw(k_mask, k_mask); // bitwise check if all zero + } else if (x64::mayiuse(x64::avx)) { + // vex instructions with xmm on avx and ymm on avx2 + vcmpps(vmm_temp4, vmm_temp3, vmm_iou_threshold, 0x0D); // xmm and ymm only on V1. + if (is_scalar) { + uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0); + test(reg_temp_32, reg_temp_32); + } else { + uni_vtestps(vmm_temp4, vmm_temp4); // vtestps: sign bit check if all zeros, ymm and xmm only on V1, N/A on V5 + } + } else { + // pure sse path, make sure don't spoil vmm_temp3, which may used in after soft-suppression + uni_vmovups(vmm_temp4, vmm_temp3); + cmpps(vmm_temp4, vmm_iou_threshold, 0x07); // order compare, 0 for at least one is NaN + + uni_vmovups(vmm_temp2, vmm_temp3); + cmpps(vmm_temp2, vmm_iou_threshold, 0x05); // _CMP_GE_US on sse, no direct _CMP_GE_OS supported. + + uni_vandps(vmm_temp4, vmm_temp4, vmm_temp2); + if (is_scalar) { + uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0); + test(reg_temp_32, reg_temp_32); + } else { + uni_vtestps(vmm_temp4, vmm_temp4); // ptest: bitwise check if all zeros, on sse41 + } + } +} + +template +void NonMaxSuppression::suppressed_by_score() { + if (x64::mayiuse(x64::avx512_core)) { + vcmpps(k_mask, vmm_temp3, vmm_score_threshold, 0x02); // vcmpps w/ kmask only on V5, w/o kmask version N/A on V5 + kandw(k_mask, k_mask, k_mask_one); + kortestw(k_mask, k_mask); // bitwise check if all zero + } else if (x64::mayiuse(x64::avx)) { + vcmpps(vmm_temp4, vmm_temp3, vmm_score_threshold, 0x02); + uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0); + test(reg_temp_32, reg_temp_32); + } else { + cmpps(vmm_temp3, vmm_score_threshold, 0x02); // _CMP_LE_OS on sse + uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp3.getIdx()), 0); + test(reg_temp_32, reg_temp_32); + } +} + +template +void NonMaxSuppression::iou(int ele_num) { + auto load = [&](Xbyak::Reg64 reg_src, Vmm vmm_dst) { + if (ele_num != scalar_step && ele_num != vector_step) + OPENVINO_THROW("NMS JIT implementation supports load emitter with only element count scalar_step or vector_step! Get: ", ele_num); + + const auto& load_emitter = ele_num == 1 ? load_scalar_emitter : load_vector_emitter; + load_emitter->emit_code({static_cast(reg_src.getIdx())}, {static_cast(vmm_dst.getIdx())}, + {}, {load_pool_gpr_idxs}); + }; + load(reg_boxes_coord0, vmm_boxes_coord0); + load(reg_boxes_coord1, vmm_boxes_coord1); + load(reg_boxes_coord2, vmm_boxes_coord2); + load(reg_boxes_coord3, vmm_boxes_coord3); + + if (m_jcp.box_encode_type == NMSBoxEncodeType::CORNER) { + // box format: y1, x1, y2, x2 + uni_vminps(vmm_temp1, vmm_boxes_coord0, vmm_boxes_coord2); + uni_vmaxps(vmm_temp2, vmm_boxes_coord0, vmm_boxes_coord2); + uni_vmovups(vmm_boxes_coord0, vmm_temp1); + uni_vmovups(vmm_boxes_coord2, vmm_temp2); + + uni_vminps(vmm_temp1, vmm_boxes_coord1, vmm_boxes_coord3); + uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_boxes_coord3); + uni_vmovups(vmm_boxes_coord1, vmm_temp1); + uni_vmovups(vmm_boxes_coord3, vmm_temp2); + } else { + // box format: x_center, y_center, width, height --> y1, x1, y2, x2 + uni_vmulps(vmm_temp1, vmm_boxes_coord2, ptr[reg_table]); // width/2 + uni_vmulps(vmm_temp2, vmm_boxes_coord3, ptr[reg_table]); // height/2 + + uni_vaddps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center + width/2 + uni_vmovups(vmm_boxes_coord3, vmm_temp3); + + uni_vaddps(vmm_temp3, vmm_boxes_coord1, vmm_temp2); // y_center + height/2 + uni_vmovups(vmm_boxes_coord2, vmm_temp3); + + uni_vsubps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center - width/2 + uni_vsubps(vmm_temp4, vmm_boxes_coord1, vmm_temp2); // y_center - height/2 + + uni_vmovups(vmm_boxes_coord1, vmm_temp3); + uni_vmovups(vmm_boxes_coord0, vmm_temp4); + } + + uni_vsubps(vmm_temp1, vmm_boxes_coord2, vmm_boxes_coord0); + uni_vsubps(vmm_temp2, vmm_boxes_coord3, vmm_boxes_coord1); + uni_vmulps(vmm_temp1, vmm_temp1, vmm_temp2); // boxes area + + uni_vsubps(vmm_temp2, vmm_candidate_coord2, vmm_candidate_coord0); + uni_vsubps(vmm_temp3, vmm_candidate_coord3, vmm_candidate_coord1); + uni_vmulps(vmm_temp2, vmm_temp2, vmm_temp3); // candidate(bc) area // candidate area calculate once and check if 0 + + uni_vaddps(vmm_temp1, vmm_temp1, vmm_temp2); // areaI + areaJ to free vmm_temp2 + + // y of intersection + uni_vminps(vmm_temp3, vmm_boxes_coord2, vmm_candidate_coord2); // min(Ymax) + uni_vmaxps(vmm_temp4, vmm_boxes_coord0, vmm_candidate_coord0); // max(Ymin) + uni_vsubps(vmm_temp3, vmm_temp3, vmm_temp4); // min(Ymax) - max(Ymin) + uni_vmaxps(vmm_temp3, vmm_temp3, vmm_zero); + + // x of intersection + uni_vminps(vmm_temp4, vmm_boxes_coord3, vmm_candidate_coord3); // min(Xmax) + uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_candidate_coord1); // max(Xmin) + uni_vsubps(vmm_temp4, vmm_temp4, vmm_temp2); // min(Xmax) - max(Xmin) + uni_vmaxps(vmm_temp4, vmm_temp4, vmm_zero); + + // intersection_area + uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp4); + + // iou: intersection_area / (areaI + areaJ - intersection_area); + uni_vsubps(vmm_temp1, vmm_temp1, vmm_temp3); + uni_vdivps(vmm_temp3, vmm_temp3, vmm_temp1); +} + +// std::exp(scale * iou * iou) +template +void NonMaxSuppression::soft_coeff() { + uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp3); + uni_vmulps(vmm_temp3, vmm_temp3, vmm_scale); + exp_injector->compute_vector_range(vmm_temp3.getIdx(), vmm_temp3.getIdx() + 1); +} + +template +void NonMaxSuppression::horizontal_mul_xmm(const Xbyak::Xmm &xmm_weight, const Xbyak::Xmm &xmm_aux) { + uni_vmovshdup(xmm_aux, xmm_weight); // weight:1,2,3,4; aux:2,2,4,4 + uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2,2*2,3*4,4*4 + uni_vmovhlps(xmm_aux, xmm_aux, xmm_weight); // aux:3*4,4*4,4,4 + uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2*3*4,... +} + +// horizontal mul for vmm_weight(Vmm(3)), temp1 and temp2 as aux +template +inline void NonMaxSuppression::horizontal_mul() { + Xbyak::Xmm xmm_weight = Xbyak::Xmm(vmm_temp3.getIdx()); + Xbyak::Xmm xmm_temp1 = Xbyak::Xmm(vmm_temp1.getIdx()); + Xbyak::Xmm xmm_temp2 = Xbyak::Xmm(vmm_temp2.getIdx()); + if (isa == x64::sse41) { + horizontal_mul_xmm(xmm_weight, xmm_temp1); + } else if (isa == x64::avx2) { + Xbyak::Ymm ymm_weight = Xbyak::Ymm(vmm_temp3.getIdx()); + vextractf128(xmm_temp1, ymm_weight, 0); + vextractf128(xmm_temp2, ymm_weight, 1); + uni_vmulps(xmm_weight, xmm_temp1, xmm_temp2); + horizontal_mul_xmm(xmm_weight, xmm_temp1); + } else { + Xbyak::Zmm zmm_weight = Xbyak::Zmm(vmm_temp3.getIdx()); + vextractf32x4(xmm_temp1, zmm_weight, 0); + vextractf32x4(xmm_temp2, zmm_weight, 1); + uni_vmulps(xmm_temp1, xmm_temp1, xmm_temp2); + vextractf32x4(xmm_temp2, zmm_weight, 2); + vextractf32x4(xmm_weight, zmm_weight, 3); + uni_vmulps(xmm_weight, xmm_weight, xmm_temp2); + uni_vmulps(xmm_weight, xmm_weight, xmm_temp1); + horizontal_mul_xmm(xmm_weight, xmm_temp1); + } +} + +template class NonMaxSuppression; +template class NonMaxSuppression; +template class NonMaxSuppression; + +} // namespace kernel +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.hpp new file mode 100644 index 00000000000000..859f687db8dc14 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/non_max_suppression.hpp @@ -0,0 +1,152 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "jit_kernel_base.hpp" + +#if defined(OPENVINO_ARCH_X86_64) +#include "emitters/x64/jit_load_store_emitters.hpp" +#include "cpu/x64/injectors/jit_uni_eltwise_injector.hpp" +#endif // OPENVINO_ARCH_X86_64 + +namespace ov { +namespace intel_cpu { + +enum class NMSBoxEncodeType { + CORNER, + CENTER +}; + +#if defined(OPENVINO_ARCH_X86_64) + +namespace kernel { + +struct NmsCompileParams { + NMSBoxEncodeType box_encode_type; + bool is_soft_suppressed_by_iou; +}; + +struct NmsCallArgs { + const void* selected_boxes_coord[4]; + size_t selected_boxes_num; + const void* candidate_box; + const void* iou_threshold; + void* candidate_status; + // for soft suppression, score *= scale * iou * iou; + const void* score_threshold; + const void* scale; + void* score; +}; + + +template +class NonMaxSuppression : public JitKernel { +public: + DECLARE_CPU_JIT_AUX_FUNCTIONS(NonMaxSuppression) + + explicit NonMaxSuppression(const NmsCompileParams& jcp) : JitKernel(jit_name(), jcp, isa) {} + + void generate() override; + +private: + using Vmm = typename dnnl::impl::utils::conditional3::type; + uint32_t vlen = dnnl::impl::cpu::x64::cpu_isa_traits::vlen; + const int vector_step = vlen / sizeof(float); + const int scalar_step = 1; + + Xbyak::Reg64 reg_boxes_coord0 = r8; + Xbyak::Reg64 reg_boxes_coord1 = r9; + Xbyak::Reg64 reg_boxes_coord2 = r10; + Xbyak::Reg64 reg_boxes_coord3 = r11; + Xbyak::Reg64 reg_candidate_box = r12; + Xbyak::Reg64 reg_candidate_status = r13; + Xbyak::Reg64 reg_boxes_num = r14; + Xbyak::Reg64 reg_iou_threshold = r15; + // more for soft + Xbyak::Reg64 reg_score_threshold = rdx; + Xbyak::Reg64 reg_score = rbp; + Xbyak::Reg64 reg_scale = rsi; + + Xbyak::Reg64 reg_load_table = rax; + Xbyak::Reg64 reg_load_store_mask = rbx; + + // reuse + Xbyak::Label l_table_constant; + Xbyak::Reg64 reg_table = rcx; + Xbyak::Reg64 reg_temp_64 = rdi; + Xbyak::Reg32 reg_temp_32 = edi; + + const Xbyak::Reg64 reg_params = Xbyak::Reg64(dnnl::impl::cpu::x64::abi_param_regs[0]); + + std::unique_ptr load_vector_emitter = nullptr; + std::unique_ptr load_scalar_emitter = nullptr; + + std::vector store_pool_gpr_idxs; + std::vector store_pool_vec_idxs; + std::vector load_pool_gpr_idxs; + + Vmm vmm_boxes_coord0 = Vmm(1); + Vmm vmm_boxes_coord1 = Vmm(2); + Vmm vmm_boxes_coord2 = Vmm(3); + Vmm vmm_boxes_coord3 = Vmm(4); + Vmm vmm_candidate_coord0 = Vmm(5); + Vmm vmm_candidate_coord1 = Vmm(6); + Vmm vmm_candidate_coord2 = Vmm(7); + Vmm vmm_candidate_coord3 = Vmm(8); + Vmm vmm_temp1 = Vmm(9); + Vmm vmm_temp2 = Vmm(10); + Vmm vmm_temp3 = Vmm(11); + Vmm vmm_temp4 = Vmm(12); + + Vmm vmm_iou_threshold = Vmm(13); + Vmm vmm_zero = Vmm(15); + + // soft + Vmm vmm_score_threshold = Vmm(14); + Vmm vmm_scale = Vmm(0); + + Xbyak::Opmask k_mask = Xbyak::Opmask(7); + Xbyak::Opmask k_mask_one = Xbyak::Opmask(6); + + std::shared_ptr> exp_injector; + + inline void hard_nms(); + + inline void soft_nms(); + + inline void suppressed_by_iou(bool is_scalar); + + inline void suppressed_by_score(); + + inline void iou(int ele_num); + + inline void soft_coeff(); + + inline void horizontal_mul_xmm(const Xbyak::Xmm& xmm_weight, const Xbyak::Xmm& xmm_aux); + + inline void horizontal_mul(); + + inline void prepare_table() { + auto broadcast_d = [&](int val) { + for (size_t d = 0; d < vlen / sizeof(int); ++d) { + dd(val); + } + }; + + align(64); + L(l_table_constant); + broadcast_d(0x3f000000); // 0.5f + dw(0x0001); + } +}; + +} // namespace kernel + +#endif // OPENVINO_ARCH_X86_64 + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index d2a46ac97da017..79112a3afa34a7 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -1,571 +1,41 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +// Copyright (c) Facebook, Inc. and its affiliates. +// The implementation for rotated boxes intersection is based on the code from: +// https://github.com/facebookresearch/detectron2/blob/v0.6/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h +// -#include -#include -#include -#include -#include -#include #include "non_max_suppression.h" + #include "ie_parallel.hpp" -#include -#include #include "utils/general_utils.h" +#include "shape_inference/shape_inference_internal_dyn.hpp" +#include "openvino/op/nms_rotated.hpp" +#include "openvino/op/non_max_suppression.hpp" +#include "ov_ops/nms_ie_internal.hpp" -#include "cpu/x64/jit_generator.hpp" -#include "emitters/x64/jit_load_store_emitters.hpp" -#include -#include +#include using namespace InferenceEngine; -using namespace dnnl; -using namespace dnnl::impl; -using namespace dnnl::impl::cpu::x64; -using namespace dnnl::impl::utils; -using namespace Xbyak; - -#define GET_OFF(field) offsetof(jit_nms_args, field) namespace ov { namespace intel_cpu { namespace node { -#if defined(OPENVINO_ARCH_X86_64) -template -struct jit_uni_nms_kernel_f32 : public jit_uni_nms_kernel, public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_nms_kernel_f32) - - explicit jit_uni_nms_kernel_f32(jit_nms_config_params jcp_) : jit_uni_nms_kernel(jcp_), jit_generator(jit_name()) {} - - void create_ker() override { - jit_generator::create_kernel(); - ker_ = (decltype(ker_))jit_ker(); - } - - void generate() override { - load_vector_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, vector_step)); - load_scalar_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, scalar_step)); - - exp_injector.reset(new jit_uni_eltwise_injector_f32(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.0f)); - - this->preamble(); - - uni_vpxor(vmm_zero, vmm_zero, vmm_zero); - - load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; - store_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx())}; - store_pool_vec_idxs = {static_cast(vmm_zero.getIdx())}; - - mov(reg_boxes_coord0, ptr[reg_params + GET_OFF(selected_boxes_coord[0])]); - mov(reg_boxes_coord1, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 1 * sizeof(size_t)]); - mov(reg_boxes_coord2, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 2 * sizeof(size_t)]); - mov(reg_boxes_coord3, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 3 * sizeof(size_t)]); - mov(reg_candidate_box, ptr[reg_params + GET_OFF(candidate_box)]); - mov(reg_candidate_status, ptr[reg_params + GET_OFF(candidate_status)]); - mov(reg_boxes_num, ptr[reg_params + GET_OFF(selected_boxes_num)]); - mov(reg_iou_threshold, ptr[reg_params + GET_OFF(iou_threshold)]); - // soft - mov(reg_score_threshold, ptr[reg_params + GET_OFF(score_threshold)]); - mov(reg_score, ptr[reg_params + GET_OFF(score)]); - mov(reg_scale, ptr[reg_params + GET_OFF(scale)]); - - // could use rcx(reg_table) and rdi(reg_temp) now as abi parse finished - mov(reg_table, l_table_constant); - if (mayiuse(cpu::x64::avx512_core)) { - kmovw(k_mask_one, word[reg_table + vlen]); - } - uni_vbroadcastss(vmm_iou_threshold, ptr[reg_iou_threshold]); - uni_vbroadcastss(vmm_score_threshold, ptr[reg_score_threshold]); - - uni_vbroadcastss(vmm_candidate_coord0, ptr[reg_candidate_box]); - uni_vbroadcastss(vmm_candidate_coord1, ptr[reg_candidate_box + 1 * sizeof(float)]); - uni_vbroadcastss(vmm_candidate_coord2, ptr[reg_candidate_box + 2 * sizeof(float)]); - uni_vbroadcastss(vmm_candidate_coord3, ptr[reg_candidate_box + 3 * sizeof(float)]); - - if (jcp.box_encode_type == NMSBoxEncodeType::CORNER) { - // box format: y1, x1, y2, x2 - uni_vminps(vmm_temp1, vmm_candidate_coord0, vmm_candidate_coord2); - uni_vmaxps(vmm_temp2, vmm_candidate_coord0, vmm_candidate_coord2); - uni_vmovups(vmm_candidate_coord0, vmm_temp1); - uni_vmovups(vmm_candidate_coord2, vmm_temp2); - - uni_vminps(vmm_temp1, vmm_candidate_coord1, vmm_candidate_coord3); - uni_vmaxps(vmm_temp2, vmm_candidate_coord1, vmm_candidate_coord3); - uni_vmovups(vmm_candidate_coord1, vmm_temp1); - uni_vmovups(vmm_candidate_coord3, vmm_temp2); - } else { - // box format: x_center, y_center, width, height --> y1, x1, y2, x2 - uni_vmulps(vmm_temp1, vmm_candidate_coord2, ptr[reg_table]); // width/2 - uni_vmulps(vmm_temp2, vmm_candidate_coord3, ptr[reg_table]); // height/2 - - uni_vaddps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center + width/2 - uni_vmovups(vmm_candidate_coord3, vmm_temp3); - - uni_vaddps(vmm_temp3, vmm_candidate_coord1, vmm_temp2); // y_center + height/2 - uni_vmovups(vmm_candidate_coord2, vmm_temp3); - - uni_vsubps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center - width/2 - uni_vsubps(vmm_temp4, vmm_candidate_coord1, vmm_temp2); // y_center - height/2 - - uni_vmovups(vmm_candidate_coord1, vmm_temp3); - uni_vmovups(vmm_candidate_coord0, vmm_temp4); - } - - // check from last to first - imul(reg_temp_64, reg_boxes_num, sizeof(float)); - add(reg_boxes_coord0, reg_temp_64); // y1 - add(reg_boxes_coord1, reg_temp_64); // x1 - add(reg_boxes_coord2, reg_temp_64); // y2 - add(reg_boxes_coord3, reg_temp_64); // x2 - - Xbyak::Label hard_nms_label; - Xbyak::Label nms_end_label; - - mov(reg_temp_32, ptr[reg_scale]); - test(reg_temp_32, reg_temp_32); - jz(hard_nms_label, T_NEAR); - - soft_nms(); - - jmp(nms_end_label, T_NEAR); - - L(hard_nms_label); - - hard_nms(); - - L(nms_end_label); - - this->postamble(); - - load_vector_emitter->emit_data(); - load_scalar_emitter->emit_data(); - - prepare_table(); - exp_injector->prepare_table(); - } - -private: - using Vmm = typename conditional3::type; - uint32_t vlen = cpu_isa_traits::vlen; - const int vector_step = vlen / sizeof(float); - const int scalar_step = 1; - - Xbyak::Reg64 reg_boxes_coord0 = r8; - Xbyak::Reg64 reg_boxes_coord1 = r9; - Xbyak::Reg64 reg_boxes_coord2 = r10; - Xbyak::Reg64 reg_boxes_coord3 = r11; - Xbyak::Reg64 reg_candidate_box = r12; - Xbyak::Reg64 reg_candidate_status = r13; - Xbyak::Reg64 reg_boxes_num = r14; - Xbyak::Reg64 reg_iou_threshold = r15; - // more for soft - Xbyak::Reg64 reg_score_threshold = rdx; - Xbyak::Reg64 reg_score = rbp; - Xbyak::Reg64 reg_scale = rsi; - - Xbyak::Reg64 reg_load_table = rax; - Xbyak::Reg64 reg_load_store_mask = rbx; - - // reuse - Xbyak::Label l_table_constant; - Xbyak::Reg64 reg_table = rcx; - Xbyak::Reg64 reg_temp_64 = rdi; - Xbyak::Reg32 reg_temp_32 = edi; - - Xbyak::Reg64 reg_params = abi_param1; - - std::unique_ptr load_vector_emitter = nullptr; - std::unique_ptr load_scalar_emitter = nullptr; - - std::vector store_pool_gpr_idxs; - std::vector store_pool_vec_idxs; - std::vector load_pool_gpr_idxs; - - Vmm vmm_boxes_coord0 = Vmm(1); - Vmm vmm_boxes_coord1 = Vmm(2); - Vmm vmm_boxes_coord2 = Vmm(3); - Vmm vmm_boxes_coord3 = Vmm(4); - Vmm vmm_candidate_coord0 = Vmm(5); - Vmm vmm_candidate_coord1 = Vmm(6); - Vmm vmm_candidate_coord2 = Vmm(7); - Vmm vmm_candidate_coord3 = Vmm(8); - Vmm vmm_temp1 = Vmm(9); - Vmm vmm_temp2 = Vmm(10); - Vmm vmm_temp3 = Vmm(11); - Vmm vmm_temp4 = Vmm(12); - - Vmm vmm_iou_threshold = Vmm(13); - Vmm vmm_zero = Vmm(15); - - // soft - Vmm vmm_score_threshold = Vmm(14); - Vmm vmm_scale = Vmm(0); - - Xbyak::Opmask k_mask = Xbyak::Opmask(7); - Xbyak::Opmask k_mask_one = Xbyak::Opmask(6); - - std::shared_ptr> exp_injector; - - inline void hard_nms() { - Xbyak::Label main_loop_label_hard; - Xbyak::Label main_loop_end_label_hard; - Xbyak::Label tail_loop_label_hard; - Xbyak::Label terminate_label_hard; - L(main_loop_label_hard); - { - cmp(reg_boxes_num, vector_step); - jl(main_loop_end_label_hard, T_NEAR); - - sub(reg_boxes_coord0, vector_step * sizeof(float)); - sub(reg_boxes_coord1, vector_step * sizeof(float)); - sub(reg_boxes_coord2, vector_step * sizeof(float)); - sub(reg_boxes_coord3, vector_step * sizeof(float)); - - // iou result is in vmm_temp3 - iou(vector_step); - - sub(reg_boxes_num, vector_step); - - suppressed_by_iou(false); - - // if zero continue, else set result to suppressed and terminate - jz(main_loop_label_hard, T_NEAR); - - uni_vpextrd(ptr[reg_candidate_status], Xmm(vmm_zero.getIdx()), 0); - - jmp(terminate_label_hard, T_NEAR); - } - L(main_loop_end_label_hard); - - L(tail_loop_label_hard); - { - cmp(reg_boxes_num, 1); - jl(terminate_label_hard, T_NEAR); - - sub(reg_boxes_coord0, scalar_step * sizeof(float)); - sub(reg_boxes_coord1, scalar_step * sizeof(float)); - sub(reg_boxes_coord2, scalar_step * sizeof(float)); - sub(reg_boxes_coord3, scalar_step * sizeof(float)); - - // iou result is in vmm_temp3 - iou(scalar_step); - - sub(reg_boxes_num, scalar_step); - - suppressed_by_iou(true); - - jz(tail_loop_label_hard, T_NEAR); - - uni_vpextrd(ptr[reg_candidate_status], Xmm(vmm_zero.getIdx()), 0); - - jmp(terminate_label_hard, T_NEAR); - } - - L(terminate_label_hard); - } - - inline void soft_nms() { - uni_vbroadcastss(vmm_scale, ptr[reg_scale]); - - Xbyak::Label main_loop_label; - Xbyak::Label main_loop_end_label; - Xbyak::Label tail_loop_label; - Xbyak::Label terminate_label; - - Xbyak::Label main_loop_label_soft; - Xbyak::Label tail_loop_label_soft; - L(main_loop_label); - { - cmp(reg_boxes_num, vector_step); - jl(main_loop_end_label, T_NEAR); - - sub(reg_boxes_coord0, vector_step * sizeof(float)); - sub(reg_boxes_coord1, vector_step * sizeof(float)); - sub(reg_boxes_coord2, vector_step * sizeof(float)); - sub(reg_boxes_coord3, vector_step * sizeof(float)); - - // result(iou and weight) is in vmm_temp3 - iou(vector_step); - sub(reg_boxes_num, vector_step); - - // soft suppressed by iou_threshold - if (jcp.is_soft_suppressed_by_iou) { - suppressed_by_iou(false); - - // if zero continue soft suppression, else set result to suppressed and terminate - jz(main_loop_label_soft, T_NEAR); - - uni_vpextrd(ptr[reg_candidate_status], Xmm(vmm_zero.getIdx()), 0); - - jmp(terminate_label, T_NEAR); - - L(main_loop_label_soft); - } - - // weight: std::exp(scale * iou * iou) - soft_coeff(); - - // vector weights multiply - horizontal_mul(); - - uni_vbroadcastss(vmm_temp1, ptr[reg_score]); - - // new score in vmm3[0] - uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1); - // store new score - uni_vmovss(ptr[reg_score], vmm_temp3); - - // cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold - suppressed_by_score(); - - jz(main_loop_label, T_NEAR); - - uni_vpextrd(ptr[reg_candidate_status], Xmm(vmm_zero.getIdx()), 0); - - jmp(terminate_label, T_NEAR); - } - L(main_loop_end_label); - - L(tail_loop_label); - { - cmp(reg_boxes_num, 1); - jl(terminate_label, T_NEAR); - - sub(reg_boxes_coord0, scalar_step * sizeof(float)); - sub(reg_boxes_coord1, scalar_step * sizeof(float)); - sub(reg_boxes_coord2, scalar_step * sizeof(float)); - sub(reg_boxes_coord3, scalar_step * sizeof(float)); - - iou(scalar_step); - sub(reg_boxes_num, scalar_step); - - // soft suppressed by iou_threshold - if (jcp.is_soft_suppressed_by_iou) { - suppressed_by_iou(true); - - jz(tail_loop_label_soft, T_NEAR); - - uni_vpextrd(ptr[reg_candidate_status], Xmm(vmm_zero.getIdx()), 0); - - jmp(terminate_label, T_NEAR); - - L(tail_loop_label_soft); - } - - soft_coeff(); - - uni_vbroadcastss(vmm_temp1, ptr[reg_score]); - - // vmm3[0] is valide, no need horizontal mul. - uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1); - - uni_vmovss(ptr[reg_score], vmm_temp3); - - // cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold - suppressed_by_score(); - - jz(tail_loop_label, T_NEAR); - - uni_vpextrd(ptr[reg_candidate_status], Xmm(vmm_zero.getIdx()), 0); - - jmp(terminate_label, T_NEAR); - } - - L(terminate_label); - } - - inline void suppressed_by_iou(bool is_scalar) { - if (mayiuse(cpu::x64::avx512_core)) { - vcmpps(k_mask, vmm_temp3, vmm_iou_threshold, 0x0D); // _CMP_GE_OS. vcmpps w/ kmask only on V5 - if (is_scalar) - kandw(k_mask, k_mask, k_mask_one); - kortestw(k_mask, k_mask); // bitwise check if all zero - } else if (mayiuse(cpu::x64::avx)) { - // vex instructions with xmm on avx and ymm on avx2 - vcmpps(vmm_temp4, vmm_temp3, vmm_iou_threshold, 0x0D); // xmm and ymm only on V1. - if (is_scalar) { - uni_vpextrd(reg_temp_32, Xmm(vmm_temp4.getIdx()), 0); - test(reg_temp_32, reg_temp_32); - } else { - uni_vtestps(vmm_temp4, vmm_temp4); // vtestps: sign bit check if all zeros, ymm and xmm only on V1, N/A on V5 - } - } else { - // pure sse path, make sure don't spoil vmm_temp3, which may used in after soft-suppression - uni_vmovups(vmm_temp4, vmm_temp3); - cmpps(vmm_temp4, vmm_iou_threshold, 0x07); // order compare, 0 for at least one is NaN - - uni_vmovups(vmm_temp2, vmm_temp3); - cmpps(vmm_temp2, vmm_iou_threshold, 0x05); // _CMP_GE_US on sse, no direct _CMP_GE_OS supported. - - uni_vandps(vmm_temp4, vmm_temp4, vmm_temp2); - if (is_scalar) { - uni_vpextrd(reg_temp_32, Xmm(vmm_temp4.getIdx()), 0); - test(reg_temp_32, reg_temp_32); - } else { - uni_vtestps(vmm_temp4, vmm_temp4); // ptest: bitwise check if all zeros, on sse41 - } - } - } - - inline void suppressed_by_score() { - if (mayiuse(cpu::x64::avx512_core)) { - vcmpps(k_mask, vmm_temp3, vmm_score_threshold, 0x02); // vcmpps w/ kmask only on V5, w/o kmask version N/A on V5 - kandw(k_mask, k_mask, k_mask_one); - kortestw(k_mask, k_mask); // bitwise check if all zero - } else if (mayiuse(cpu::x64::avx)) { - vcmpps(vmm_temp4, vmm_temp3, vmm_score_threshold, 0x02); - uni_vpextrd(reg_temp_32, Xmm(vmm_temp4.getIdx()), 0); - test(reg_temp_32, reg_temp_32); - } else { - cmpps(vmm_temp3, vmm_score_threshold, 0x02); // _CMP_LE_OS on sse - uni_vpextrd(reg_temp_32, Xmm(vmm_temp3.getIdx()), 0); - test(reg_temp_32, reg_temp_32); - } - } - - inline void iou(int ele_num) { - auto load = [&](Xbyak::Reg64 reg_src, Vmm vmm_dst) { - if (ele_num != scalar_step && ele_num != vector_step) - IE_THROW() << "NMS JIT implementation supports load emitter with only element count scalar_step or vector_step! Get: " << ele_num; - - const auto& load_emitter = ele_num == 1 ? load_scalar_emitter : load_vector_emitter; - load_emitter->emit_code({static_cast(reg_src.getIdx())}, {static_cast(vmm_dst.getIdx())}, - {}, {load_pool_gpr_idxs}); - }; - load(reg_boxes_coord0, vmm_boxes_coord0); - load(reg_boxes_coord1, vmm_boxes_coord1); - load(reg_boxes_coord2, vmm_boxes_coord2); - load(reg_boxes_coord3, vmm_boxes_coord3); - - if (jcp.box_encode_type == NMSBoxEncodeType::CORNER) { - // box format: y1, x1, y2, x2 - uni_vminps(vmm_temp1, vmm_boxes_coord0, vmm_boxes_coord2); - uni_vmaxps(vmm_temp2, vmm_boxes_coord0, vmm_boxes_coord2); - uni_vmovups(vmm_boxes_coord0, vmm_temp1); - uni_vmovups(vmm_boxes_coord2, vmm_temp2); - - uni_vminps(vmm_temp1, vmm_boxes_coord1, vmm_boxes_coord3); - uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_boxes_coord3); - uni_vmovups(vmm_boxes_coord1, vmm_temp1); - uni_vmovups(vmm_boxes_coord3, vmm_temp2); - } else { - // box format: x_center, y_center, width, height --> y1, x1, y2, x2 - uni_vmulps(vmm_temp1, vmm_boxes_coord2, ptr[reg_table]); // width/2 - uni_vmulps(vmm_temp2, vmm_boxes_coord3, ptr[reg_table]); // height/2 - - uni_vaddps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center + width/2 - uni_vmovups(vmm_boxes_coord3, vmm_temp3); - - uni_vaddps(vmm_temp3, vmm_boxes_coord1, vmm_temp2); // y_center + height/2 - uni_vmovups(vmm_boxes_coord2, vmm_temp3); - - uni_vsubps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center - width/2 - uni_vsubps(vmm_temp4, vmm_boxes_coord1, vmm_temp2); // y_center - height/2 - - uni_vmovups(vmm_boxes_coord1, vmm_temp3); - uni_vmovups(vmm_boxes_coord0, vmm_temp4); - } - - uni_vsubps(vmm_temp1, vmm_boxes_coord2, vmm_boxes_coord0); - uni_vsubps(vmm_temp2, vmm_boxes_coord3, vmm_boxes_coord1); - uni_vmulps(vmm_temp1, vmm_temp1, vmm_temp2); // boxes area - - uni_vsubps(vmm_temp2, vmm_candidate_coord2, vmm_candidate_coord0); - uni_vsubps(vmm_temp3, vmm_candidate_coord3, vmm_candidate_coord1); - uni_vmulps(vmm_temp2, vmm_temp2, vmm_temp3); // candidate(bc) area // candidate area calculate once and check if 0 - - uni_vaddps(vmm_temp1, vmm_temp1, vmm_temp2); // areaI + areaJ to free vmm_temp2 - - // y of intersection - uni_vminps(vmm_temp3, vmm_boxes_coord2, vmm_candidate_coord2); // min(Ymax) - uni_vmaxps(vmm_temp4, vmm_boxes_coord0, vmm_candidate_coord0); // max(Ymin) - uni_vsubps(vmm_temp3, vmm_temp3, vmm_temp4); // min(Ymax) - max(Ymin) - uni_vmaxps(vmm_temp3, vmm_temp3, vmm_zero); - - // x of intersection - uni_vminps(vmm_temp4, vmm_boxes_coord3, vmm_candidate_coord3); // min(Xmax) - uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_candidate_coord1); // max(Xmin) - uni_vsubps(vmm_temp4, vmm_temp4, vmm_temp2); // min(Xmax) - max(Xmin) - uni_vmaxps(vmm_temp4, vmm_temp4, vmm_zero); - - // intersection_area - uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp4); - - // iou: intersection_area / (areaI + areaJ - intersection_area); - uni_vsubps(vmm_temp1, vmm_temp1, vmm_temp3); - uni_vdivps(vmm_temp3, vmm_temp3, vmm_temp1); - } - - // std::exp(scale * iou * iou) - inline void soft_coeff() { - uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp3); - uni_vmulps(vmm_temp3, vmm_temp3, vmm_scale); - exp_injector->compute_vector_range(vmm_temp3.getIdx(), vmm_temp3.getIdx() + 1); - } - - inline void horizontal_mul_xmm(const Xbyak::Xmm &xmm_weight, const Xbyak::Xmm &xmm_aux) { - uni_vmovshdup(xmm_aux, xmm_weight); // weight:1,2,3,4; aux:2,2,4,4 - uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2,2*2,3*4,4*4 - uni_vmovhlps(xmm_aux, xmm_aux, xmm_weight); // aux:3*4,4*4,4,4 - uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2*3*4,... - } - - // horizontal mul for vmm_weight(Vmm(3)), temp1 and temp2 as aux - inline void horizontal_mul() { - Xbyak::Xmm xmm_weight = Xbyak::Xmm(vmm_temp3.getIdx()); - Xbyak::Xmm xmm_temp1 = Xbyak::Xmm(vmm_temp1.getIdx()); - Xbyak::Xmm xmm_temp2 = Xbyak::Xmm(vmm_temp2.getIdx()); - if (isa == cpu::x64::sse41) { - horizontal_mul_xmm(xmm_weight, xmm_temp1); - } else if (isa == cpu::x64::avx2) { - Xbyak::Ymm ymm_weight = Xbyak::Ymm(vmm_temp3.getIdx()); - vextractf128(xmm_temp1, ymm_weight, 0); - vextractf128(xmm_temp2, ymm_weight, 1); - uni_vmulps(xmm_weight, xmm_temp1, xmm_temp2); - horizontal_mul_xmm(xmm_weight, xmm_temp1); - } else { - Xbyak::Zmm zmm_weight = Xbyak::Zmm(vmm_temp3.getIdx()); - vextractf32x4(xmm_temp1, zmm_weight, 0); - vextractf32x4(xmm_temp2, zmm_weight, 1); - uni_vmulps(xmm_temp1, xmm_temp1, xmm_temp2); - vextractf32x4(xmm_temp2, zmm_weight, 2); - vextractf32x4(xmm_weight, zmm_weight, 3); - uni_vmulps(xmm_weight, xmm_weight, xmm_temp2); - uni_vmulps(xmm_weight, xmm_weight, xmm_temp1); - horizontal_mul_xmm(xmm_weight, xmm_temp1); - } - } - - inline void prepare_table() { - auto broadcast_d = [&](int val) { - for (size_t d = 0; d < vlen / sizeof(int); ++d) { - dd(val); - } - }; - - align(64); - L(l_table_constant); - broadcast_d(0x3f000000); // 0.5f - dw(0x0001); - } -}; -#endif - -bool NonMaxSuppression::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool NonMaxSuppression::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - using NonMaxSuppressionV9 = ngraph::op::v9::NonMaxSuppression; - if (!one_of(op->get_type_info(), NonMaxSuppressionV9::get_type_info_static(), - ov::op::internal::NonMaxSuppressionIEInternal::get_type_info_static())) { - errorMessage = "Only NonMaxSuppression v9 and NonMaxSuppressionIEInternal are supported"; + if (!one_of(op->get_type_info(), op::v9::NonMaxSuppression::get_type_info_static(), + op::internal::NonMaxSuppressionIEInternal::get_type_info_static(), + op::v13::NMSRotated::get_type_info_static())) { + errorMessage = "Only NonMaxSuppression from opset9, NonMaxSuppressionIEInternal and NMSRotated from opset13 are supported."; return false; } - if (const auto nms9 = std::dynamic_pointer_cast(op)) { + if (auto nms9 = as_type(op.get())) { const auto boxEncoding = nms9->get_box_encoding(); - if (!one_of(boxEncoding, NonMaxSuppressionV9::BoxEncodingType::CENTER, NonMaxSuppressionV9::BoxEncodingType::CORNER)) { + if (!one_of(boxEncoding, op::v9::NonMaxSuppression::BoxEncodingType::CENTER, op::v9::NonMaxSuppression::BoxEncodingType::CORNER)) { errorMessage = "Supports only CENTER and CORNER box encoding type"; return false; } @@ -576,107 +46,125 @@ bool NonMaxSuppression::isSupportedOperation(const std::shared_ptr& op, const GraphContext::CPtr context) - : Node(op, context, InternalDynShapeInferFactory()), - isSoftSuppressedByIOU(false) { +NonMaxSuppression::NonMaxSuppression(const std::shared_ptr& op, const GraphContext::CPtr& context) + : Node(op, context, InternalDynShapeInferFactory()), + m_is_soft_suppressed_by_iou(false) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { - IE_THROW(NotImplemented) << errorMessage; + OPENVINO_THROW(errorMessage); } - errorPrefix = "NMS layer with name '" + op->get_friendly_name() + "' "; - if (one_of(op->get_type_info(), ov::op::internal::NonMaxSuppressionIEInternal::get_type_info_static())) - m_outStaticShape = true; - - if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > 6) - IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + if (one_of(op->get_type_info(), op::internal::NonMaxSuppressionIEInternal::get_type_info_static())) { + m_out_static_shape = true; + } - if (getOriginalOutputsNumber() != 3) - IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > NMS_SOFT_NMS_SIGMA + 1) { + THROW_CPU_NODE_ERR("has incorrect number of input edges: ", getOriginalInputsNumber()); + } + if (getOriginalOutputsNumber() != 3) { + THROW_CPU_NODE_ERR("has incorrect number of output edges: ", getOriginalOutputsNumber()); + } - if (const auto nms9 = std::dynamic_pointer_cast(op)) { + if (auto nms9 = as_type(op.get())) { boxEncodingType = static_cast(nms9->get_box_encoding()); - sortResultDescending = nms9->get_sort_result_descending(); - } else if (const auto nmsIe = std::dynamic_pointer_cast(op)) { - boxEncodingType = nmsIe->m_center_point_box ? NMSBoxEncodeType::CENTER : NMSBoxEncodeType::CORNER; - sortResultDescending = nmsIe->m_sort_result_descending; - } else { - const auto &typeInfo = op->get_type_info(); - IE_THROW() << errorPrefix << " doesn't support NMS: " << typeInfo.name << " v" << typeInfo.version_id; - } + m_sort_result_descending = nms9->get_sort_result_descending(); + m_coord_num = 4lu; + } else if (auto nmsIe = as_type(op.get())) { + boxEncodingType = nmsIe->m_center_point_box ? NMSBoxEncodeType::CENTER : NMSBoxEncodeType::CORNER; + m_sort_result_descending = nmsIe->m_sort_result_descending; + m_coord_num = 4lu; + } else if (auto nms = as_type(op.get())) { + m_sort_result_descending = nms->get_sort_result_descending(); + m_clockwise = nms->get_clockwise(); + m_rotated_boxes = true; + m_coord_num = 5lu; + } else { + const auto &typeInfo = op->get_type_info(); + THROW_CPU_NODE_ERR("doesn't support NMS: ", typeInfo.name, " v", typeInfo.version_id); + } + + const auto &boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims(); + if (boxes_dims.size() != 3) { + THROW_CPU_NODE_ERR("has unsupported 'boxes' input rank: ", boxes_dims.size()); + } + if (boxes_dims[2] != m_coord_num) { + THROW_CPU_NODE_ERR("has unsupported 'boxes' input 3rd dimension size: ", boxes_dims[2]); + } + + const auto &scores_dims = getInputShapeAtPort(NMS_SCORES).getDims(); + if (scores_dims.size() != 3) { + THROW_CPU_NODE_ERR("has unsupported 'scores' input rank: ", scores_dims.size()); + } + + const auto& valid_outputs_shape = getOutputShapeAtPort(NMS_VALID_OUTPUTS); + if (valid_outputs_shape.getRank() != 1) { + THROW_CPU_NODE_ERR("has unsupported 'valid_outputs' output rank: ", valid_outputs_shape.getRank()); + } + if (valid_outputs_shape.getDims()[0] != 1) { + THROW_CPU_NODE_ERR("has unsupported 'valid_outputs' output 1st dimension size: ", valid_outputs_shape.getDims()[1]); + } - const auto &boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims(); - if (boxes_dims.size() != 3) - IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size(); - if (boxes_dims[2] != 4) - IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2]; - - const auto &scores_dims = getInputShapeAtPort(NMS_SCORES).getDims(); - if (scores_dims.size() != 3) - IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); - - const Shape valid_outputs_shape = getOutputShapeAtPort(NMS_VALIDOUTPUTS); - if (valid_outputs_shape.getRank() != 1) - IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_shape.getRank(); - if (valid_outputs_shape.getDims()[0] != 1) - IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_shape.getDims()[1]; + for (size_t i = 0lu; i < op->get_output_size(); i++) { + m_defined_outputs[i] = !op->get_output_target_inputs(i).empty(); + } } void NonMaxSuppression::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - const std::vector supportedFloatPrecision = {Precision::FP32, Precision::BF16, Precision::FP16}; - const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; - - checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); - checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); - checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALIDOUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType); - - const std::vector supportedPrecision = {Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32, - Precision::U32, Precision::I64, Precision::U64}; - - if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) - check1DInput(getInputShapeAtPort(NMS_MAXOUTPUTBOXESPERCLASS), supportedPrecision, "max_output_boxes_per_class", NMS_MAXOUTPUTBOXESPERCLASS); - if (inputShapes.size() > NMS_IOUTHRESHOLD) - check1DInput(getInputShapeAtPort(NMS_IOUTHRESHOLD), supportedFloatPrecision, "iou_threshold", NMS_IOUTHRESHOLD); - if (inputShapes.size() > NMS_SCORETHRESHOLD) - check1DInput(getInputShapeAtPort(NMS_SCORETHRESHOLD), supportedFloatPrecision, "score_threshold", NMS_SCORETHRESHOLD); - if (inputShapes.size() > NMS_SOFTNMSSIGMA) - check1DInput(getInputShapeAtPort(NMS_SCORETHRESHOLD), supportedFloatPrecision, "soft_nms_sigma", NMS_SCORETHRESHOLD); + const auto inputs_num = inputShapes.size(); + if (inputs_num > NMS_MAX_OUTPUT_BOXES_PER_CLASS) { + check1DInput(getInputShapeAtPort(NMS_MAX_OUTPUT_BOXES_PER_CLASS), "max_output_boxes_per_class", NMS_MAX_OUTPUT_BOXES_PER_CLASS); + } + if (inputs_num > NMS_IOU_THRESHOLD) { + check1DInput(getInputShapeAtPort(NMS_IOU_THRESHOLD), "iou_threshold", NMS_IOU_THRESHOLD); + } + if (inputs_num > NMS_SCORE_THRESHOLD) { + check1DInput(getInputShapeAtPort(NMS_SCORE_THRESHOLD), "score_threshold", NMS_SCORE_THRESHOLD); + } + if (inputs_num > NMS_SOFT_NMS_SIGMA) { + check1DInput(getInputShapeAtPort(NMS_SCORE_THRESHOLD), "soft_nms_sigma", NMS_SCORE_THRESHOLD); + } - checkOutput(getOutputShapeAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", NMS_SELECTEDINDICES); - checkOutput(getOutputShapeAtPort(NMS_SELECTEDSCORES), supportedFloatPrecision, "selected_scores", NMS_SELECTEDSCORES); + checkOutput(getOutputShapeAtPort(NMS_SELECTED_INDICES), "selected_indices", NMS_SELECTED_INDICES); + checkOutput(getOutputShapeAtPort(NMS_SELECTED_SCORES), "selected_scores", NMS_SELECTED_SCORES); std::vector inDataConf; - inDataConf.reserve(inputShapes.size()); - for (size_t i = 0; i < inputShapes.size(); ++i) { - Precision inPrecision = i == NMS_MAXOUTPUTBOXESPERCLASS ? Precision::I32 : Precision::FP32; + inDataConf.reserve(inputs_num); + for (size_t i = 0; i < inputs_num; ++i) { + Precision inPrecision = i == NMS_MAX_OUTPUT_BOXES_PER_CLASS ? Precision::I32 : Precision::FP32; inDataConf.emplace_back(LayoutType::ncsp, inPrecision); } std::vector outDataConf; outDataConf.reserve(outputShapes.size()); for (size_t i = 0; i < outputShapes.size(); ++i) { - Precision outPrecision = i == NMS_SELECTEDSCORES ? Precision::FP32 : Precision::I32; + Precision outPrecision = i == NMS_SELECTED_SCORES ? Precision::FP32 : Precision::I32; outDataConf.emplace_back(LayoutType::ncsp, outPrecision); } - impl_desc_type impl_type; - if (mayiuse(cpu::x64::avx512_core)) { - impl_type = impl_desc_type::jit_avx512; - } else if (mayiuse(cpu::x64::avx2)) { - impl_type = impl_desc_type::jit_avx2; - } else if (mayiuse(cpu::x64::sse41)) { - impl_type = impl_desc_type::jit_sse42; - } else { - impl_type = impl_desc_type::ref; - } + impl_desc_type impl_type = impl_desc_type::ref; - addSupportedPrimDesc(inDataConf, outDataConf, impl_type); +#if defined(OPENVINO_ARCH_X86_64) + using namespace dnnl::impl::cpu; - // as only FP32 and ncsp is supported, and kernel is shape agnostic, we can create here. There is no need to recompilation. + // As only FP32 and ncsp is supported, and kernel is shape agnostic, we can create here. There is no need to recompilation. createJitKernel(); + + x64::cpu_isa_t actual_isa = x64::isa_undef; + if (m_jit_kernel) { + actual_isa = m_jit_kernel->getIsa(); + } + switch (actual_isa) { + case x64::avx512_core: impl_type = impl_desc_type::jit_avx512; break; + case x64::avx2: impl_type = impl_desc_type::jit_avx2; break; + case x64::sse41: impl_type = impl_desc_type::jit_sse42; break; + default: impl_type = impl_desc_type::ref; + } +#endif // OPENVINO_ARCH_X86_64 + + addSupportedPrimDesc(inDataConf, outDataConf, impl_type); } void NonMaxSuppression::prepareParams() { @@ -685,193 +173,170 @@ void NonMaxSuppression::prepareParams() { const auto& scoresDims = isDynamicNode() ? getParentEdgesAtPort(NMS_SCORES)[0]->getMemory().getStaticDims() : getInputShapeAtPort(NMS_SCORES).getStaticDims(); - numBatches = boxesDims[0]; - numBoxes = boxesDims[1]; - numClasses = scoresDims[1]; - if (numBatches != scoresDims[0]) - IE_THROW() << errorPrefix << " numBatches is different in 'boxes' and 'scores' inputs"; - if (numBoxes != scoresDims[2]) - IE_THROW() << errorPrefix << " numBoxes is different in 'boxes' and 'scores' inputs"; - - numFiltBox.resize(numBatches); - for (auto & i : numFiltBox) - i.resize(numClasses); -} + m_batches_num = boxesDims[0]; + m_boxes_num = boxesDims[1]; + m_classes_num = scoresDims[1]; + if (m_batches_num != scoresDims[0]) { + THROW_CPU_NODE_ERR("Batches number is different in 'boxes' and 'scores' inputs"); + } + if (m_boxes_num != scoresDims[2]) { + THROW_CPU_NODE_ERR("Boxes number is different in 'boxes' and 'scores' inputs"); + } -bool NonMaxSuppression::isExecutable() const { - return isDynamicNode() || Node::isExecutable(); + m_output_boxes_per_class = std::min(m_max_output_boxes_per_class, m_boxes_num); + const auto max_number_of_boxes = m_output_boxes_per_class * m_batches_num * m_classes_num; + m_filtered_boxes.resize(max_number_of_boxes); + + m_num_filtered_boxes.resize(m_batches_num); + for (auto & i : m_num_filtered_boxes) { + i.resize(m_classes_num); + } } void NonMaxSuppression::createJitKernel() { #if defined(OPENVINO_ARCH_X86_64) - auto jcp = jit_nms_config_params(); - jcp.box_encode_type = boxEncodingType; - jcp.is_soft_suppressed_by_iou = isSoftSuppressedByIOU; - - if (mayiuse(cpu::x64::avx512_core)) { - nms_kernel.reset(new jit_uni_nms_kernel_f32(jcp)); - } else if (mayiuse(cpu::x64::avx2)) { - nms_kernel.reset(new jit_uni_nms_kernel_f32(jcp)); - } else if (mayiuse(cpu::x64::sse41)) { - nms_kernel.reset(new jit_uni_nms_kernel_f32(jcp)); - } + if (!m_rotated_boxes) { + auto jcp = kernel::NmsCompileParams(); + jcp.box_encode_type = boxEncodingType; + jcp.is_soft_suppressed_by_iou = m_is_soft_suppressed_by_iou; - if (nms_kernel) - nms_kernel->create_ker(); -#endif + m_jit_kernel = kernel::JitKernel::createInstance(jcp); + } +#endif // OPENVINO_ARCH_X86_64 } void NonMaxSuppression::executeDynamicImpl(dnnl::stream strm) { - if (hasEmptyInputTensors() || (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS && - reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->getData())[0] == 0)) { + if (hasEmptyInputTensors() || (inputShapes.size() > NMS_MAX_OUTPUT_BOXES_PER_CLASS && + reinterpret_cast(getParentEdgeAt(NMS_MAX_OUTPUT_BOXES_PER_CLASS)->getMemoryPtr()->getData())[0] == 0)) { redefineOutputMemory({{0, 3}, {0, 3}, {1}}); - *reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->getData()) = 0; + *reinterpret_cast(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->getData()) = 0; return; } execute(strm); } void NonMaxSuppression::execute(dnnl::stream strm) { - const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->getData()); - const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->getData()); - - if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) { - maxOutputBoxesPerClass = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->getData())[0]; + const auto inputs_num = inputShapes.size(); + + size_t max_number_of_boxes = m_output_boxes_per_class * m_batches_num * m_classes_num; + if (inputs_num > NMS_MAX_OUTPUT_BOXES_PER_CLASS) { + auto val = reinterpret_cast(getParentEdgeAt(NMS_MAX_OUTPUT_BOXES_PER_CLASS)->getMemoryPtr()->getData())[0]; + m_max_output_boxes_per_class = val <= 0l ? 0lu : static_cast(val); + m_output_boxes_per_class = std::min(m_max_output_boxes_per_class, m_boxes_num); + max_number_of_boxes = m_output_boxes_per_class * m_batches_num * m_classes_num; + m_filtered_boxes.resize(max_number_of_boxes); } - - maxOutputBoxesPerClass = std::min(maxOutputBoxesPerClass, numBoxes); - - if (maxOutputBoxesPerClass == 0) { + if (m_max_output_boxes_per_class == 0lu) { return; } - if (inputShapes.size() > NMS_IOUTHRESHOLD) - iouThreshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->getData())[0]; - - if (inputShapes.size() > NMS_SCORETHRESHOLD) - scoreThreshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->getData())[0]; - - if (inputShapes.size() > NMS_SOFTNMSSIGMA) - softNMSSigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->getData())[0]; - scale = 0.0f; - if (softNMSSigma > 0.0) { - scale = -0.5f / softNMSSigma; + if (inputs_num > NMS_IOU_THRESHOLD) { + m_iou_threshold = reinterpret_cast(getParentEdgeAt(NMS_IOU_THRESHOLD)->getMemoryPtr()->getData())[0]; + } + if (inputs_num > NMS_SCORE_THRESHOLD) { + m_score_threshold = reinterpret_cast(getParentEdgeAt(NMS_SCORE_THRESHOLD)->getMemoryPtr()->getData())[0]; } + if (inputs_num > NMS_SOFT_NMS_SIGMA) { + m_soft_nms_sigma = reinterpret_cast(getParentEdgeAt(NMS_SOFT_NMS_SIGMA)->getMemoryPtr()->getData())[0]; + m_scale = (m_soft_nms_sigma > 0.f) ? (-0.5f / m_soft_nms_sigma) : 0.f; + } + + auto boxes_memory = getParentEdgeAt(NMS_BOXES)->getMemoryPtr(); + auto scores_memory = getParentEdgeAt(NMS_SCORES)->getMemoryPtr(); - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().getDescWithType()->getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().getDescWithType()->getStrides(); + auto boxes = reinterpret_cast(boxes_memory->getData()); + auto scores = reinterpret_cast(scores_memory->getData()); - const auto maxNumberOfBoxes = maxOutputBoxesPerClass * numBatches * numClasses; - std::vector filtBoxes(maxNumberOfBoxes); + const auto& boxes_strides = boxes_memory->getDescWithType()->getStrides(); + const auto& scores_strides = scores_memory->getDescWithType()->getStrides(); - if (softNMSSigma == 0.0f) { - nmsWithoutSoftSigma(boxes, scores, boxesStrides, scoresStrides, filtBoxes); + if (m_rotated_boxes) { + nmsRotated(boxes, scores, boxes_strides, scores_strides, m_filtered_boxes); + } else if (m_soft_nms_sigma == 0.f) { + nmsWithoutSoftSigma(boxes, scores, boxes_strides, scores_strides, m_filtered_boxes); } else { - nmsWithSoftSigma(boxes, scores, boxesStrides, scoresStrides, filtBoxes); + nmsWithSoftSigma(boxes, scores, boxes_strides, scores_strides, m_filtered_boxes); } - size_t startOffset = numFiltBox[0][0]; - for (size_t b = 0; b < numFiltBox.size(); b++) { - size_t batchOffset = b*numClasses*maxOutputBoxesPerClass; - for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) { - size_t offset = batchOffset + c*maxOutputBoxesPerClass; - for (size_t i = 0; i < numFiltBox[b][c]; i++) { - filtBoxes[startOffset + i] = filtBoxes[offset + i]; + size_t start_offset = m_num_filtered_boxes[0][0]; + for (size_t b = 0lu; b < m_num_filtered_boxes.size(); b++) { + size_t batchOffset = b * m_classes_num * m_output_boxes_per_class; + for (size_t c = (b == 0lu ? 1lu : 0lu); c < m_num_filtered_boxes[b].size(); c++) { + size_t offset = batchOffset + c * m_output_boxes_per_class; + for (size_t i = 0lu; i < m_num_filtered_boxes[b][c]; i++) { + m_filtered_boxes[start_offset + i] = m_filtered_boxes[offset + i]; } - startOffset += numFiltBox[b][c]; + start_offset += m_num_filtered_boxes[b][c]; } } - filtBoxes.resize(startOffset); + auto boxes_ptr = m_filtered_boxes.data(); // need more particular comparator to get deterministic behaviour // escape situation when filtred boxes with same score have different position from launch to launch - if (sortResultDescending) { - parallel_sort(filtBoxes.begin(), filtBoxes.end(), - [](const filteredBoxes& l, const filteredBoxes& r) { + if (m_sort_result_descending) { + parallel_sort(boxes_ptr, boxes_ptr + start_offset, + [](const FilteredBox& l, const FilteredBox& r) { return (l.score > r.score) || - (l.score == r.score && l.batch_index < r.batch_index) || - (l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) || - (l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index); + (l.score == r.score && l.batch_index < r.batch_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index); }); } - auto indicesMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr(); - auto scoresMemPtr = getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr(); - const size_t validOutputs = std::min(filtBoxes.size(), maxNumberOfBoxes); + const size_t valid_outputs = std::min(start_offset, max_number_of_boxes); - if (!m_outStaticShape) { - VectorDims newDims{validOutputs, 3}; - redefineOutputMemory({newDims, newDims, {1}}); - } + if (m_defined_outputs[NMS_SELECTED_INDICES]) { + const size_t stride = 3lu; - int selectedIndicesStride = indicesMemPtr->getDescWithType()->getStrides()[0]; + if (!m_out_static_shape) { + redefineOutputMemory(NMS_SELECTED_INDICES, { valid_outputs, stride }); + } - int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->getData()); - float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->getData()); + auto out_ptr = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->getData()); + int32_t* boxes_ptr = &(m_filtered_boxes[0].batch_index); - size_t idx = 0lu; - for (; idx < validOutputs; idx++) { - selectedIndicesPtr[0] = filtBoxes[idx].batch_index; - selectedIndicesPtr[1] = filtBoxes[idx].class_index; - selectedIndicesPtr[2] = filtBoxes[idx].box_index; - selectedIndicesPtr += selectedIndicesStride; + size_t idx = 0lu; + for (; idx < valid_outputs; idx++) { + memcpy(out_ptr, boxes_ptr, 12); + out_ptr += stride; + boxes_ptr += 4; + } - selectedScoresPtr[0] = static_cast(filtBoxes[idx].batch_index); - selectedScoresPtr[1] = static_cast(filtBoxes[idx].class_index); - selectedScoresPtr[2] = static_cast(filtBoxes[idx].score); - selectedScoresPtr += selectedIndicesStride; + if (m_out_static_shape) { + std::fill(out_ptr, out_ptr + (max_number_of_boxes - idx) * stride, -1); + } } - if (m_outStaticShape) { - std::fill(selectedIndicesPtr, selectedIndicesPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1); - std::fill(selectedScoresPtr, selectedScoresPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1.f); - } + if (m_defined_outputs[NMS_SELECTED_SCORES]) { + const size_t stride = 3lu; - int *valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->getData()); - *valid_outputs = static_cast(validOutputs); -} + if (!m_out_static_shape) { + redefineOutputMemory(NMS_SELECTED_SCORES, { valid_outputs, stride }); + } -bool NonMaxSuppression::created() const { - return getType() == Type::NonMaxSuppression; -} + auto out_ptr = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_SCORES)[0]->getMemoryPtr()->getData()); -float NonMaxSuppression::intersectionOverUnion(const float *boxesI, const float *boxesJ) { - float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; - if (boxEncodingType == NMSBoxEncodeType::CENTER) { - // box format: x_center, y_center, width, height - yminI = boxesI[1] - boxesI[3] / 2.f; - xminI = boxesI[0] - boxesI[2] / 2.f; - ymaxI = boxesI[1] + boxesI[3] / 2.f; - xmaxI = boxesI[0] + boxesI[2] / 2.f; - yminJ = boxesJ[1] - boxesJ[3] / 2.f; - xminJ = boxesJ[0] - boxesJ[2] / 2.f; - ymaxJ = boxesJ[1] + boxesJ[3] / 2.f; - xmaxJ = boxesJ[0] + boxesJ[2] / 2.f; - } else { - // box format: y1, x1, y2, x2 - yminI = (std::min)(boxesI[0], boxesI[2]); - xminI = (std::min)(boxesI[1], boxesI[3]); - ymaxI = (std::max)(boxesI[0], boxesI[2]); - xmaxI = (std::max)(boxesI[1], boxesI[3]); - yminJ = (std::min)(boxesJ[0], boxesJ[2]); - xminJ = (std::min)(boxesJ[1], boxesJ[3]); - ymaxJ = (std::max)(boxesJ[0], boxesJ[2]); - xmaxJ = (std::max)(boxesJ[1], boxesJ[3]); - } + size_t idx = 0lu; + for (; idx < valid_outputs; idx++) { + out_ptr[0] = static_cast(m_filtered_boxes[idx].batch_index); + out_ptr[1] = static_cast(m_filtered_boxes[idx].class_index); + out_ptr[2] = m_filtered_boxes[idx].score; + out_ptr += stride; + } - float areaI = (ymaxI - yminI) * (xmaxI - xminI); - float areaJ = (ymaxJ - yminJ) * (xmaxJ - xminJ); - if (areaI <= 0.f || areaJ <= 0.f) - return 0.f; + if (m_out_static_shape) { + std::fill(out_ptr, out_ptr + (max_number_of_boxes - idx) * stride, -1.f); + } + } - float intersection_area = - (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ), 0.f) * - (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ), 0.f); - return intersection_area / (areaI + areaJ - intersection_area); + if (m_defined_outputs[NMS_VALID_OUTPUTS]) { + auto out_ptr = reinterpret_cast(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->getData()); + *out_ptr = static_cast(valid_outputs); + } } void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, - const VectorDims &scoresStrides, std::vector &filtBoxes) { + const VectorDims &scoresStrides, std::vector &filtBoxes) { auto less = [](const boxInfo& l, const boxInfo& r) { return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx)); }; @@ -880,23 +345,23 @@ void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores // if is_soft_suppressed_by_iou is false, apply for all iou, including iou>iou_threshold, soft suppressed when score < score_threshold // if is_soft_suppressed_by_iou is true, hard suppressed by iou_threshold, then soft suppress auto coeff = [&](float iou) { - if (isSoftSuppressedByIOU && iou > iouThreshold) + if (m_is_soft_suppressed_by_iou && iou > m_iou_threshold) return 0.0f; - return std::exp(scale * iou * iou); + return std::exp(m_scale * iou * iou); }; - parallel_for2d(numBatches, numClasses, [&](int batch_idx, int class_idx) { - std::vector selectedBoxes; + parallel_for2d(m_batches_num, m_classes_num, [&](int batch_idx, int class_idx) { + std::vector selectedBoxes; const float *boxesPtr = boxes + batch_idx * boxesStrides[0]; const float *scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; std::priority_queue, decltype(less)> sorted_boxes(less); // score, box_id, suppress_begin_index - for (int box_idx = 0; box_idx < static_cast(numBoxes); box_idx++) { - if (scoresPtr[box_idx] > scoreThreshold) + for (int box_idx = 0; box_idx < static_cast(m_boxes_num); box_idx++) { + if (scoresPtr[box_idx] > m_score_threshold) sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0})); } - size_t sortedBoxSize = sorted_boxes.size(); - size_t maxSeletedBoxNum = std::min(sortedBoxSize, maxOutputBoxesPerClass); + size_t sorted_boxes_size = sorted_boxes.size(); + size_t maxSeletedBoxNum = std::min(sorted_boxes_size, m_output_boxes_per_class); selectedBoxes.reserve(maxSeletedBoxNum); if (maxSeletedBoxNum > 0) { // include first directly @@ -904,22 +369,23 @@ void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores sorted_boxes.pop(); selectedBoxes.push_back({ candidateBox.score, batch_idx, class_idx, candidateBox.idx }); if (maxSeletedBoxNum > 1) { - if (nms_kernel) { + if (m_jit_kernel) { +#if defined(OPENVINO_ARCH_X86_64) std::vector boxCoord0(maxSeletedBoxNum, 0.0f); std::vector boxCoord1(maxSeletedBoxNum, 0.0f); std::vector boxCoord2(maxSeletedBoxNum, 0.0f); std::vector boxCoord3(maxSeletedBoxNum, 0.0f); - boxCoord0[0] = boxesPtr[candidateBox.idx * 4]; - boxCoord1[0] = boxesPtr[candidateBox.idx * 4 + 1]; - boxCoord2[0] = boxesPtr[candidateBox.idx * 4 + 2]; - boxCoord3[0] = boxesPtr[candidateBox.idx * 4 + 3]; + boxCoord0[0] = boxesPtr[candidateBox.idx * m_coord_num]; + boxCoord1[0] = boxesPtr[candidateBox.idx * m_coord_num + 1]; + boxCoord2[0] = boxesPtr[candidateBox.idx * m_coord_num + 2]; + boxCoord3[0] = boxesPtr[candidateBox.idx * m_coord_num + 3]; - auto arg = jit_nms_args(); - arg.iou_threshold = static_cast(&iouThreshold); - arg.score_threshold = static_cast(&scoreThreshold); - arg.scale = static_cast(&scale); - while (selectedBoxes.size() < maxOutputBoxesPerClass && !sorted_boxes.empty()) { + auto arg = kernel::NmsCallArgs(); + arg.iou_threshold = static_cast(&m_iou_threshold); + arg.score_threshold = static_cast(&m_score_threshold); + arg.scale = static_cast(&m_scale); + while (selectedBoxes.size() < m_output_boxes_per_class && !sorted_boxes.empty()) { boxInfo candidateBox = sorted_boxes.top(); float origScore = candidateBox.score; sorted_boxes.pop(); @@ -931,9 +397,9 @@ void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores arg.selected_boxes_coord[1] = static_cast(&boxCoord1[candidateBox.suppress_begin_index]); arg.selected_boxes_coord[2] = static_cast(&boxCoord2[candidateBox.suppress_begin_index]); arg.selected_boxes_coord[3] = static_cast(&boxCoord3[candidateBox.suppress_begin_index]); - arg.candidate_box = static_cast(&boxesPtr[candidateBox.idx * 4]); + arg.candidate_box = static_cast(&boxesPtr[candidateBox.idx * m_coord_num]); arg.candidate_status = static_cast(&candidateStatus); - (*nms_kernel)(&arg); + (*m_jit_kernel)(&arg); if (candidateStatus == NMSCandidateStatus::SUPPRESSED) { continue; @@ -941,30 +407,32 @@ void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores if (candidateBox.score == origScore) { selectedBoxes.push_back({ candidateBox.score, batch_idx, class_idx, candidateBox.idx }); int selectedSize = selectedBoxes.size(); - boxCoord0[selectedSize - 1] = boxesPtr[candidateBox.idx * 4]; - boxCoord1[selectedSize - 1] = boxesPtr[candidateBox.idx * 4 + 1]; - boxCoord2[selectedSize - 1] = boxesPtr[candidateBox.idx * 4 + 2]; - boxCoord3[selectedSize - 1] = boxesPtr[candidateBox.idx * 4 + 3]; + boxCoord0[selectedSize - 1] = boxesPtr[candidateBox.idx * m_coord_num]; + boxCoord1[selectedSize - 1] = boxesPtr[candidateBox.idx * m_coord_num + 1]; + boxCoord2[selectedSize - 1] = boxesPtr[candidateBox.idx * m_coord_num + 2]; + boxCoord3[selectedSize - 1] = boxesPtr[candidateBox.idx * m_coord_num + 3]; } else { candidateBox.suppress_begin_index = selectedBoxes.size(); sorted_boxes.push(candidateBox); } } } +#endif // OPENVINO_ARCH_X86_64 } else { - while (selectedBoxes.size() < maxOutputBoxesPerClass && !sorted_boxes.empty()) { + while (selectedBoxes.size() < m_output_boxes_per_class && !sorted_boxes.empty()) { boxInfo candidateBox = sorted_boxes.top(); float origScore = candidateBox.score; sorted_boxes.pop(); int candidateStatus = NMSCandidateStatus::SELECTED; // 0 for suppressed, 1 for selected, 2 for updated for (int selected_idx = static_cast(selectedBoxes.size()) - 1; selected_idx >= candidateBox.suppress_begin_index; selected_idx--) { - float iou = intersectionOverUnion(&boxesPtr[candidateBox.idx * 4], &boxesPtr[selectedBoxes[selected_idx].box_index * 4]); + float iou = intersectionOverUnion(&boxesPtr[candidateBox.idx * m_coord_num], + &boxesPtr[selectedBoxes[selected_idx].box_index * m_coord_num]); // when is_soft_suppressed_by_iou is true, score is decayed to zero and implicitely suppressed if iou > iou_threshold. candidateBox.score *= coeff(iou); // soft suppressed - if (candidateBox.score <= scoreThreshold) { + if (candidateBox.score <= m_score_threshold) { candidateStatus = NMSCandidateStatus::SUPPRESSED; break; } @@ -984,8 +452,8 @@ void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores } } } - numFiltBox[batch_idx][class_idx] = selectedBoxes.size(); - size_t offset = batch_idx*numClasses*maxOutputBoxesPerClass + class_idx*maxOutputBoxesPerClass; + m_num_filtered_boxes[batch_idx][class_idx] = selectedBoxes.size(); + size_t offset = batch_idx * m_classes_num * m_output_boxes_per_class + class_idx * m_output_boxes_per_class; for (size_t i = 0; i < selectedBoxes.size(); i++) { filtBoxes[offset + i] = selectedBoxes[i]; } @@ -993,44 +461,47 @@ void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores } void NonMaxSuppression::nmsWithoutSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, - const VectorDims &scoresStrides, std::vector &filtBoxes) { - int max_out_box = static_cast(maxOutputBoxesPerClass); - parallel_for2d(numBatches, numClasses, [&](int batch_idx, int class_idx) { + const VectorDims &scoresStrides, std::vector &filtBoxes) { + int max_out_box = static_cast(m_output_boxes_per_class); + parallel_for2d(m_batches_num, m_classes_num, [&](int batch_idx, int class_idx) { const float *boxesPtr = boxes + batch_idx * boxesStrides[0]; const float *scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; std::vector> sorted_boxes; // score, box_idx - for (size_t box_idx = 0; box_idx < numBoxes; box_idx++) { - if (scoresPtr[box_idx] > scoreThreshold) + sorted_boxes.reserve(m_boxes_num); + for (size_t box_idx = 0; box_idx < m_boxes_num; box_idx++) { + if (scoresPtr[box_idx] > m_score_threshold) { sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx)); + } } int io_selection_size = 0; - size_t sortedBoxSize = sorted_boxes.size(); - if (sortedBoxSize > 0) { + const size_t sortedBoxSize = sorted_boxes.size(); + if (sortedBoxSize > 0lu) { parallel_sort(sorted_boxes.begin(), sorted_boxes.end(), [](const std::pair& l, const std::pair& r) { return (l.first > r.first || ((l.first == r.first) && (l.second < r.second))); }); - int offset = batch_idx*numClasses*maxOutputBoxesPerClass + class_idx*maxOutputBoxesPerClass; - filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second); + int offset = batch_idx * m_classes_num * m_output_boxes_per_class + class_idx * m_output_boxes_per_class; + filtBoxes[offset + 0] = FilteredBox(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second); io_selection_size++; - if (sortedBoxSize > 1) { - if (nms_kernel) { + if (sortedBoxSize > 1lu) { + if (m_jit_kernel) { +#if defined(OPENVINO_ARCH_X86_64) std::vector boxCoord0(sortedBoxSize, 0.0f); std::vector boxCoord1(sortedBoxSize, 0.0f); std::vector boxCoord2(sortedBoxSize, 0.0f); std::vector boxCoord3(sortedBoxSize, 0.0f); - boxCoord0[0] = boxesPtr[sorted_boxes[0].second * 4]; - boxCoord1[0] = boxesPtr[sorted_boxes[0].second * 4 + 1]; - boxCoord2[0] = boxesPtr[sorted_boxes[0].second * 4 + 2]; - boxCoord3[0] = boxesPtr[sorted_boxes[0].second * 4 + 3]; + boxCoord0[0] = boxesPtr[sorted_boxes[0].second * m_coord_num]; + boxCoord1[0] = boxesPtr[sorted_boxes[0].second * m_coord_num + 1]; + boxCoord2[0] = boxesPtr[sorted_boxes[0].second * m_coord_num + 2]; + boxCoord3[0] = boxesPtr[sorted_boxes[0].second * m_coord_num + 3]; - auto arg = jit_nms_args(); - arg.iou_threshold = static_cast(&iouThreshold); - arg.score_threshold = static_cast(&scoreThreshold); - arg.scale = static_cast(&scale); + auto arg = kernel::NmsCallArgs(); + arg.iou_threshold = static_cast(&m_iou_threshold); + arg.score_threshold = static_cast(&m_score_threshold); + arg.scale = static_cast(&m_scale); // box start index do not change for hard supresion arg.selected_boxes_coord[0] = static_cast(&boxCoord0[0]); arg.selected_boxes_coord[1] = static_cast(&boxCoord1[0]); @@ -1040,26 +511,27 @@ void NonMaxSuppression::nmsWithoutSoftSigma(const float *boxes, const float *sco for (size_t candidate_idx = 1; (candidate_idx < sortedBoxSize) && (io_selection_size < max_out_box); candidate_idx++) { int candidateStatus = NMSCandidateStatus::SELECTED; // 0 for suppressed, 1 for selected arg.selected_boxes_num = io_selection_size; - arg.candidate_box = static_cast(&boxesPtr[sorted_boxes[candidate_idx].second * 4]); + arg.candidate_box = static_cast(&boxesPtr[sorted_boxes[candidate_idx].second * m_coord_num]); arg.candidate_status = static_cast(&candidateStatus); - (*nms_kernel)(&arg); + (*m_jit_kernel)(&arg); if (candidateStatus == NMSCandidateStatus::SELECTED) { - boxCoord0[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * 4]; - boxCoord1[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * 4 + 1]; - boxCoord2[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * 4 + 2]; - boxCoord3[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * 4 + 3]; + boxCoord0[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * m_coord_num]; + boxCoord1[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * m_coord_num + 1]; + boxCoord2[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * m_coord_num + 2]; + boxCoord3[io_selection_size] = boxesPtr[sorted_boxes[candidate_idx].second * m_coord_num + 3]; filtBoxes[offset + io_selection_size] = - filteredBoxes(sorted_boxes[candidate_idx].first, batch_idx, class_idx, sorted_boxes[candidate_idx].second); + FilteredBox(sorted_boxes[candidate_idx].first, batch_idx, class_idx, sorted_boxes[candidate_idx].second); io_selection_size++; } } +#endif // OPENVINO_ARCH_X86_64 } else { for (size_t candidate_idx = 1; (candidate_idx < sortedBoxSize) && (io_selection_size < max_out_box); candidate_idx++) { int candidateStatus = NMSCandidateStatus::SELECTED; // 0 for suppressed, 1 for selected for (int selected_idx = io_selection_size - 1; selected_idx >= 0; selected_idx--) { - float iou = intersectionOverUnion(&boxesPtr[sorted_boxes[candidate_idx].second * 4], - &boxesPtr[filtBoxes[offset + selected_idx].box_index * 4]); - if (iou >= iouThreshold) { + float iou = intersectionOverUnion(&boxesPtr[sorted_boxes[candidate_idx].second * m_coord_num], + &boxesPtr[filtBoxes[offset + selected_idx].box_index * m_coord_num]); + if (iou >= m_iou_threshold) { candidateStatus = NMSCandidateStatus::SUPPRESSED; break; } @@ -1067,7 +539,7 @@ void NonMaxSuppression::nmsWithoutSoftSigma(const float *boxes, const float *sco if (candidateStatus == NMSCandidateStatus::SELECTED) { filtBoxes[offset + io_selection_size] = - filteredBoxes(sorted_boxes[candidate_idx].first, batch_idx, class_idx, sorted_boxes[candidate_idx].second); + FilteredBox(sorted_boxes[candidate_idx].first, batch_idx, class_idx, sorted_boxes[candidate_idx].second); io_selection_size++; } } @@ -1075,35 +547,372 @@ void NonMaxSuppression::nmsWithoutSoftSigma(const float *boxes, const float *sco } } - numFiltBox[batch_idx][class_idx] = io_selection_size; + m_num_filtered_boxes[batch_idx][class_idx] = io_selection_size; }); } -void NonMaxSuppression::checkPrecision(const Precision& prec, const std::vector& precList, - const std::string& name, const std::string& type) { - if (std::find(precList.begin(), precList.end(), prec) == precList.end()) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; +////////// Rotated boxes ////////// + +struct RotatedBox { + float x_ctr, y_ctr, w, h, a; +}; + +inline float dot_2d(const NonMaxSuppression::Point2D& A, const NonMaxSuppression::Point2D& B) { + return A.x * B.x + A.y * B.y; } -void NonMaxSuppression::check1DInput(const Shape& shape, const std::vector& precList, - const std::string& name, const size_t port) { - checkPrecision(getOriginalInputPrecisionAtPort(port), precList, name, inType); +inline float cross_2d(const NonMaxSuppression::Point2D& A, const NonMaxSuppression::Point2D& B) { + return A.x * B.y - B.x * A.y; +} + +inline void getRotatedVertices(const float* box, NonMaxSuppression::Point2D (&pts)[4], bool clockwise) { + auto theta = clockwise ? box[4] : -box[4]; + + auto cos_theta = std::cos(theta) * 0.5f; + auto sin_theta = std::sin(theta) * 0.5f; + + // y: top --> down; x: left --> right + // Left-Down + pts[0].x = box[0] - sin_theta * box[3] - cos_theta * box[2]; + pts[0].y = box[1] + cos_theta * box[3] - sin_theta * box[2]; + // Left-Top + pts[1].x = box[0] + sin_theta * box[3] - cos_theta * box[2]; + pts[1].y = box[1] - cos_theta * box[3] - sin_theta * box[2]; + // Right-Top + pts[2].x = 2 * box[0] - pts[0].x; + pts[2].y = 2 * box[1] - pts[0].y; + // Right-Down + pts[3].x = 2 * box[0] - pts[1].x; + pts[3].y = 2 * box[1] - pts[1].y; +} + +inline float polygonArea(const NonMaxSuppression::Point2D (&q)[24], const int64_t& m) { + if (m <= 2l) { + return 0.f; + } + + float area = 0.f; + size_t mlu = static_cast(m - 1l); + for (size_t i = 1lu; i < mlu; i++) { + area += std::abs(cross_2d(q[i] - q[0], q[i + 1] - q[0])); + } + + return area / 2.f; +} + +inline size_t convexHullGraham(const NonMaxSuppression::Point2D (&p)[24], + const size_t num_in, + NonMaxSuppression::Point2D (&q)[24]) { + OPENVINO_ASSERT(num_in >= 2lu); + + // Step 1: + // Find point with minimum y + // if more than 1 points have the same minimum y, + // pick the one with the minimum x. + size_t t = 0lu; + for (size_t i = 1lu; i < num_in; i++) { + if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { + t = i; + } + } + auto& start = p[t]; // starting point + + // Step 2: + // Subtract starting point from every points (for sorting in the next step) + for (size_t i = 0lu; i < num_in; i++) { + q[i] = p[i] - start; + } + + // Swap the starting point to position 0 + std::swap(q[t], q[0]); + + // Step 3: + // Sort point 1 ~ num_in according to their relative cross-product values + // (essentially sorting according to angles) + // If the angles are the same, sort according to their distance to origin + float dist[24]; + for (size_t i = 0lu; i < num_in; i++) { + dist[i] = dot_2d(q[i], q[i]); + } + + std::sort(q + 1, q + num_in, [](const NonMaxSuppression::Point2D& A, const NonMaxSuppression::Point2D& B) -> bool { + float temp = cross_2d(A, B); + if (std::abs(temp) < 1e-6f) { + return dot_2d(A, A) < dot_2d(B, B); + } else { + return temp > 0.f; + } + }); + // compute distance to origin after sort, since the points are now different. + for (size_t i = 0lu; i < num_in; i++) { + dist[i] = dot_2d(q[i], q[i]); + } + + // Step 4: + // Make sure there are at least 2 points (that don't overlap with each other) + // in the stack + size_t k = 1lu; // index of the non-overlapped second point + for (; k < num_in; k++) { + if (dist[k] > 1e-8f) { + break; + } + } + if (k == num_in) { + // We reach the end, which means the convex hull is just one point + q[0] = p[t]; + return 1lu; + } + q[1] = q[k]; + size_t m = 2lu; // 2 points in the stack + // Step 5: + // Finally we can start the scanning process. + // When a non-convex relationship between the 3 points is found + // (either concave shape or duplicated points), + // we pop the previous point from the stack + // until the 3-point relationship is convex again, or + // until the stack only contains two points + for (size_t i = k + 1lu; i < num_in; i++) { + while (m > 1lu && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) { + m--; + } + q[m++] = q[i]; + } + + return m; +} + +inline size_t getIntersectionPoints(const NonMaxSuppression::Point2D (&pts1)[4], + const NonMaxSuppression::Point2D (&pts2)[4], + NonMaxSuppression::Point2D (&intersections)[24]) { + // Line vector + // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] + NonMaxSuppression::Point2D vec1[4], vec2[4]; + for (size_t i = 0lu; i < 4lu; i++) { + vec1[i] = pts1[(i + 1lu) % 4lu] - pts1[i]; + vec2[i] = pts2[(i + 1lu) % 4lu] - pts2[i]; + } + + // Line test - test all line combos for intersection + size_t num = 0lu; // number of intersections + for (size_t i = 0lu; i < 4lu; i++) { + for (size_t j = 0lu; j < 4lu; j++) { + // Solve for 2x2 Ax=b + float det = cross_2d(vec2[j], vec1[i]); + + // This takes care of parallel lines + if (std::abs(det) <= 1e-14f) { + continue; + } + + auto vec12 = pts2[j] - pts1[i]; + + auto t1 = cross_2d(vec2[j], vec12) / det; + auto t2 = cross_2d(vec1[i], vec12) / det; + + if (t1 >= 0.f && t1 <= 1.f && t2 >= 0.f && t2 <= 1.f) { + intersections[num++] = pts1[i] + vec1[i] * t1; + } + } + } + + // Check for vertices of rect1 inside rect2 + { + const auto& AB = vec2[0]; + const auto& DA = vec2[3]; + auto ABdotAB = dot_2d(AB, AB); + auto ADdotAD = dot_2d(DA, DA); + for (size_t i = 0lu; i < 4lu; i++) { + // Assume ABCD is the rectangle, and P is the point to be judged + // P is inside ABCD if P's projection on AB lies within AB + // and P's projection on AD lies within AD + + auto AP = pts1[i] - pts2[0]; + auto APdotAB = dot_2d(AP, AB); + auto APdotAD = -dot_2d(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { + intersections[num++] = pts1[i]; + } + } + } + + // Reverse the check - check for vertices of rect2 inside rect1 + { + const auto& AB = vec1[0]; + const auto& DA = vec1[3]; + auto ABdotAB = dot_2d(AB, AB); + auto ADdotAD = dot_2d(DA, DA); + for (size_t i = 0lu; i < 4lu; i++) { + auto AP = pts2[i] - pts1[0]; + + auto APdotAB = dot_2d(AP, AB); + auto APdotAD = -dot_2d(AP, DA); + + if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) { + intersections[num++] = pts2[i]; + } + } + } + + return num; +} + +inline float rotatedBoxesIntersection(const NonMaxSuppression::Point2D (&vertices_0)[4], const float* box_1, const bool clockwise) { + // There are up to 4 x 4 + 4 + 4 = 24 intersections (including duplicates) returned + NonMaxSuppression::Point2D intersect_pts[24], ordered_pts[24]; + + NonMaxSuppression::Point2D vertices_1[4]; + getRotatedVertices(box_1, vertices_1, clockwise); + + auto num = getIntersectionPoints(vertices_0, vertices_1, intersect_pts); + + if (num <= 2lu) { + return 0.f; + } + + auto num_convex = convexHullGraham(intersect_pts, num, ordered_pts); + return polygonArea(ordered_pts, num_convex); +} + +inline float NonMaxSuppression::rotatedIntersectionOverUnion(const NonMaxSuppression::Point2D (&vertices_0)[4], const float area_0, const float* box_1) { + const auto area_1 = box_1[2] * box_1[3]; // W x H + if (area_1 <= 0.f) { + return 0.f; + } + + const auto intersection = rotatedBoxesIntersection(vertices_0, box_1, m_clockwise); + + return intersection / (area_0 + area_1 - intersection); +} + +void NonMaxSuppression::nmsRotated(const float* boxes, const float* scores, const VectorDims& boxes_strides, + const VectorDims& scores_strides, std::vector& filtered_boxes) { + if (m_jit_kernel) { + THROW_CPU_NODE_ERR("does not have implementation of the JIT kernel for Rotated boxes."); + } else { + parallel_for2d(m_batches_num, m_classes_num, [&](int64_t batch_idx, int64_t class_idx) { + const float *boxes_ptr = boxes + batch_idx * boxes_strides[0]; + const float *scores_ptr = scores + batch_idx * scores_strides[0] + class_idx * scores_strides[1]; + + std::vector> sorted_indices; // score, box_idx + sorted_indices.reserve(m_boxes_num); + for (size_t box_idx = 0lu; box_idx < m_boxes_num; box_idx++, scores_ptr++) { + if (*scores_ptr > m_score_threshold) { + sorted_indices.emplace_back(std::make_pair(*scores_ptr, box_idx)); + } + } + + size_t io_selection_size = 0lu; + const size_t sorted_boxes_size = sorted_indices.size(); + + if (sorted_boxes_size > 0lu) { + parallel_sort(sorted_indices.begin(), sorted_indices.end(), + [](const std::pair& l, const std::pair& r) { + return (l.first > r.first || ((l.first == r.first) && (l.second < r.second))); + }); + auto sorted_indices_ptr = sorted_indices.data(); + auto filtered_boxes_ptr = filtered_boxes.data() + + batch_idx * m_classes_num * m_output_boxes_per_class + class_idx * m_output_boxes_per_class; + *filtered_boxes_ptr = FilteredBox(sorted_indices[0].first, batch_idx, class_idx, sorted_indices[0].second); + io_selection_size++; + if (sorted_boxes_size > 1lu) { + sorted_indices_ptr++; + NMSCandidateStatus candidate_status; + + for (size_t candidate_idx = 1lu; (candidate_idx < sorted_boxes_size) && (io_selection_size < m_output_boxes_per_class); + candidate_idx++, sorted_indices_ptr++) { + candidate_status = NMSCandidateStatus::SELECTED; + auto box_0 = boxes_ptr + (*sorted_indices_ptr).second * m_coord_num; + const auto area_0 = box_0[2] * box_0[3]; // W x H + + if (area_0 > 0.f) { + NonMaxSuppression::Point2D vertices_0[4]; + getRotatedVertices(box_0, vertices_0, m_clockwise); + auto trg_boxes = reinterpret_cast(&((*filtered_boxes_ptr).box_index)); + for (size_t selected_idx = 0lu; selected_idx < io_selection_size; selected_idx++, trg_boxes -= 4) { + auto iou = rotatedIntersectionOverUnion(vertices_0, area_0, boxes_ptr + m_coord_num * (*trg_boxes)); + if (iou > m_iou_threshold) { + candidate_status = NMSCandidateStatus::SUPPRESSED; + break; + } + } + } else if (0.f > m_iou_threshold) { + candidate_status = NMSCandidateStatus::SUPPRESSED; + } + + if (candidate_status == NMSCandidateStatus::SELECTED) { + *(++filtered_boxes_ptr) = + FilteredBox((*sorted_indices_ptr).first, batch_idx, class_idx, (*sorted_indices_ptr).second); + io_selection_size++; + } + } + } + } + + m_num_filtered_boxes[batch_idx][class_idx] = io_selection_size; + }); + } +} + +/////////////// End of Rotated boxes /////////////// + +float NonMaxSuppression::intersectionOverUnion(const float *boxesI, const float *boxesJ) { + float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; + if (boxEncodingType == NMSBoxEncodeType::CENTER) { + // box format: x_center, y_center, width, height + yminI = boxesI[1] - boxesI[3] / 2.f; + xminI = boxesI[0] - boxesI[2] / 2.f; + ymaxI = boxesI[1] + boxesI[3] / 2.f; + xmaxI = boxesI[0] + boxesI[2] / 2.f; + yminJ = boxesJ[1] - boxesJ[3] / 2.f; + xminJ = boxesJ[0] - boxesJ[2] / 2.f; + ymaxJ = boxesJ[1] + boxesJ[3] / 2.f; + xmaxJ = boxesJ[0] + boxesJ[2] / 2.f; + } else { + // box format: y1, x1, y2, x2 + yminI = (std::min)(boxesI[0], boxesI[2]); + xminI = (std::min)(boxesI[1], boxesI[3]); + ymaxI = (std::max)(boxesI[0], boxesI[2]); + xmaxI = (std::max)(boxesI[1], boxesI[3]); + yminJ = (std::min)(boxesJ[0], boxesJ[2]); + xminJ = (std::min)(boxesJ[1], boxesJ[3]); + ymaxJ = (std::max)(boxesJ[0], boxesJ[2]); + xmaxJ = (std::max)(boxesJ[1], boxesJ[3]); + } + + float areaI = (ymaxI - yminI) * (xmaxI - xminI); + float areaJ = (ymaxJ - yminJ) * (xmaxJ - xminJ); + if (areaI <= 0.f || areaJ <= 0.f) + return 0.f; + + float intersection_area = + (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ), 0.f) * + (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ), 0.f); + return intersection_area / (areaI + areaJ - intersection_area); +} + +void NonMaxSuppression::check1DInput(const Shape& shape, const std::string& name, const size_t port) { if (shape.getRank() != 0 && shape.getRank() != 1) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' input rank: " << shape.getRank(); + THROW_CPU_NODE_ERR("has unsupported '", name, "' input rank: ", shape.getRank()); if (shape.getRank() == 1) if (shape.getDims()[0] != 1) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' input 1st dimension size: " << MemoryDescUtils::dim2str(shape.getDims()[0]); + THROW_CPU_NODE_ERR("has unsupported '", name, "' input 1st dimension size: ", MemoryDescUtils::dim2str(shape.getDims()[0])); } -void NonMaxSuppression::checkOutput(const Shape& shape, const std::vector& precList, - const std::string& name, const size_t port) { - checkPrecision(getOriginalOutputPrecisionAtPort(port), precList, name, outType); - +void NonMaxSuppression::checkOutput(const Shape& shape, const std::string& name, const size_t port) { if (shape.getRank() != 2) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' output rank: " << shape.getRank(); + THROW_CPU_NODE_ERR("has unsupported '", name, "' output rank: ", shape.getRank()); if (shape.getDims()[1] != 3) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' output 2nd dimension size: " << MemoryDescUtils::dim2str(shape.getDims()[1]); + THROW_CPU_NODE_ERR("has unsupported '", name, "' output 2nd dimension size: ", MemoryDescUtils::dim2str(shape.getDims()[1])); +} + +bool NonMaxSuppression::isExecutable() const { + return isDynamicNode() || Node::isExecutable(); +} + +bool NonMaxSuppression::created() const { + return getType() == Type::NonMaxSuppression; } } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h index 2599fa3843ff06..6547737ef9952c 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h @@ -4,82 +4,43 @@ #pragma once -#include -#include -#include -#include -#include +#include "node.h" +#include "kernels/x64/non_max_suppression.hpp" -#define BOX_COORD_NUM 4 - -using namespace InferenceEngine; namespace ov { namespace intel_cpu { namespace node { -enum class NMSBoxEncodeType { - CORNER, - CENTER -}; - enum NMSCandidateStatus { SUPPRESSED = 0, SELECTED = 1, UPDATED = 2 }; -struct jit_nms_config_params { - NMSBoxEncodeType box_encode_type; - bool is_soft_suppressed_by_iou; -}; - -struct jit_nms_args { - const void* selected_boxes_coord[BOX_COORD_NUM]; - size_t selected_boxes_num; - const void* candidate_box; - const void* iou_threshold; - void* candidate_status; - // for soft suppression, score *= scale * iou * iou; - const void* score_threshold; - const void* scale; - void* score; -}; - -struct jit_uni_nms_kernel { - void (*ker_)(const jit_nms_args *); - - void operator()(const jit_nms_args *args) { - assert(ker_); - ker_(args); - } - - explicit jit_uni_nms_kernel(jit_nms_config_params jcp_) : ker_(nullptr), jcp(jcp_) {} - virtual ~jit_uni_nms_kernel() {} - - virtual void create_ker() = 0; - - jit_nms_config_params jcp; -}; - class NonMaxSuppression : public Node { public: - NonMaxSuppression(const std::shared_ptr& op, const GraphContext::CPtr context); + NonMaxSuppression(const std::shared_ptr& op, const GraphContext::CPtr& context); void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void execute(dnnl::stream strm) override; + + void executeDynamicImpl(dnnl::stream strm) override; + bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; - struct filteredBoxes { + struct FilteredBox { float score; int batch_index; int class_index; int box_index; - filteredBoxes() = default; - filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index) : + FilteredBox() = default; + FilteredBox(float _score, int _batch_index, int _class_index, int _box_index) : score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {} }; @@ -89,66 +50,101 @@ class NonMaxSuppression : public Node { int suppress_begin_index; }; - float intersectionOverUnion(const float *boxesI, const float *boxesJ); - - void nmsWithSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides, - const SizeVector &scoresStrides, std::vector &filtBoxes); - - void nmsWithoutSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides, - const SizeVector &scoresStrides, std::vector &filtBoxes); - - void executeDynamicImpl(dnnl::stream strm) override; - bool isExecutable() const override; + bool needShapeInfer() const override { return false; } + void prepareParams() override; + struct Point2D { + float x, y; + Point2D(const float px = 0.f, const float py = 0.f) : x(px), y(py) {} + Point2D operator+(const Point2D& p) const { + return Point2D(x + p.x, y + p.y); + } + Point2D& operator+=(const Point2D& p) { + x += p.x; + y += p.y; + return *this; + } + Point2D operator-(const Point2D& p) const { + return Point2D(x - p.x, y - p.y); + } + Point2D operator*(const float coeff) const { + return Point2D(x * coeff, y * coeff); + } + }; + private: // input enum { NMS_BOXES, NMS_SCORES, - NMS_MAXOUTPUTBOXESPERCLASS, - NMS_IOUTHRESHOLD, - NMS_SCORETHRESHOLD, - NMS_SOFTNMSSIGMA, + NMS_MAX_OUTPUT_BOXES_PER_CLASS, + NMS_IOU_THRESHOLD, + NMS_SCORE_THRESHOLD, + NMS_SOFT_NMS_SIGMA, }; // output enum { - NMS_SELECTEDINDICES, - NMS_SELECTEDSCORES, - NMS_VALIDOUTPUTS + NMS_SELECTED_INDICES, + NMS_SELECTED_SCORES, + NMS_VALID_OUTPUTS }; - NMSBoxEncodeType boxEncodingType = NMSBoxEncodeType::CORNER; - bool sortResultDescending = true; + float intersectionOverUnion(const float *boxesI, const float *boxesJ); - size_t numBatches = 0; - size_t numBoxes = 0; - size_t numClasses = 0; + float rotatedIntersectionOverUnion(const Point2D (&vertices_0)[4], const float area_0, const float* box_1); - size_t maxOutputBoxesPerClass = 0lu; - float iouThreshold = 0.0f; - float scoreThreshold = 0.0f; - float softNMSSigma = 0.0f; - float scale = 1.f; - // control placeholder for NMS in new opset. - bool isSoftSuppressedByIOU = false; + void nmsWithSoftSigma(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides, + const InferenceEngine::SizeVector &scoresStrides, std::vector &filtBoxes); - bool m_outStaticShape = false; + void nmsWithoutSoftSigma(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides, + const InferenceEngine::SizeVector &scoresStrides, std::vector &filtBoxes); - std::string errorPrefix; + void nmsRotated(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides, + const InferenceEngine::SizeVector &scoresStrides, std::vector &filtBoxes); - std::vector> numFiltBox; - const std::string inType = "input", outType = "output"; + void check1DInput(const Shape& shape, + const std::string& name, + const size_t port); - void checkPrecision(const Precision& prec, const std::vector& precList, const std::string& name, const std::string& type); - void check1DInput(const Shape& shape, const std::vector& precList, const std::string& name, const size_t port); - void checkOutput(const Shape& shape, const std::vector& precList, const std::string& name, const size_t port); + void checkOutput(const Shape& shape, + const std::string& name, + const size_t port); void createJitKernel(); - std::shared_ptr nms_kernel = nullptr; + + + NMSBoxEncodeType boxEncodingType = NMSBoxEncodeType::CORNER; + bool m_sort_result_descending = true; + bool m_clockwise = false; + bool m_rotated_boxes = false; + size_t m_coord_num = 1lu; + + size_t m_batches_num = 0lu; + size_t m_boxes_num = 0lu; + size_t m_classes_num = 0lu; + + size_t m_max_output_boxes_per_class = 0lu; // Original value of input NMS_MAX_OUTPUT_BOXES_PER_CLASS + size_t m_output_boxes_per_class = 0lu; // Actual number of output boxes + float m_iou_threshold = 0.f; + float m_score_threshold = 0.f; + float m_soft_nms_sigma = 0.f; + float m_scale = 0.f; + // control placeholder for NMS in new opset. + bool m_is_soft_suppressed_by_iou = false; + + bool m_out_static_shape = false; + + std::vector> m_num_filtered_boxes; + const std::string inType = "input"; + const std::string outType = "output"; + bool m_defined_outputs[NMS_VALID_OUTPUTS + 1] = { false, false, false }; + std::vector m_filtered_boxes; + + std::shared_ptr m_jit_kernel; }; } // namespace node diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 21483175aed169..274d23ce23b527 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -197,6 +197,8 @@ std::vector disabledTestPatterns() { R"(.*RDFTLayerTest.*SignalSize=().*)", // Issue: 123815 (Tests are sensintive to available thread count on testing machines) R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)", + // Issue: 122356 + R"(.*NmsRotatedOpTest.*(SortDesc=True|Clockwise=False).*)", }; #if defined(OPENVINO_ARCH_X86) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/nms_rotated.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/nms_rotated.cpp new file mode 100644 index 00000000000000..7888a88a60221d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/nms_rotated.cpp @@ -0,0 +1,95 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_op_tests/nms_rotated.hpp" + +using namespace LayerTestsDefinitions; +using namespace ov::test; + + +static const std::vector> input_shapes = { + { + { {}, {{1, 5, 5}} }, + { {}, {{1, 7, 5}} } + }, + { + { {}, {{2, 9, 5}} }, + { {}, {{2, 15, 9}} } + }, + { + { {}, {{5, 17, 5}} }, + { {}, {{5, 7, 17}} } + }, + { + { {}, {{9, 75, 5}} }, + { {}, {{9, 55, 75}} } + }, + { + { {-1, -1, 5}, {{5, 20, 5}, {3, 50, 5}, {2, 99, 5}} }, + { {-1, -1, -1}, {{5, 30, 20}, {3, 100, 50}, {2, 133, 99}} } + } +}; + +static const std::vector> input_shapes_nightly = { + { + { {}, {{3, 11, 5}} }, + { {}, {{3, 15, 11}} } + }, + { + { {}, {{15, 29, 5}} }, + { {}, {{15, 31, 29}} } + }, + { + { {}, {{21, 64, 5}} }, + { {}, {{21, 32, 64}} } + }, + { + { {-1, -1, 5}, {{7, 35, 5}, {7, 35, 5}, {7, 35, 5}} }, + { {-1, -1, -1}, {{7, 30, 35}, {7, 100, 35}, {7, 133, 35}} } + } +}; + +const ov::AnyMap empty_plugin_config{}; + +INSTANTIATE_TEST_SUITE_P(smoke_, NmsRotatedOpTest, + ::testing::Combine( + ::testing::ValuesIn(input_shapes), // Input shapes + ::testing::Values(ElementType::f32), // Boxes and scores input precisions + ::testing::Values(ElementType::i32), // Max output boxes input precisions + ::testing::Values(ElementType::f32), // Thresholds precisions + ::testing::Values(ElementType::i32), // Output type + ::testing::Values(5, 20), // Max output boxes per class + ::testing::Values(0.3f, 0.7f), // IOU threshold + ::testing::Values(0.3f, 0.7f), // Score threshold + ::testing::Values(true, false), // Sort result descending + ::testing::Values(true, false), // Clockwise + ::testing::Values(false), // Is 1st input constant + ::testing::Values(false), // Is 2nd input constant + ::testing::Values(false), // Is 3rd input constant + ::testing::Values(false), // Is 4th input constant + ::testing::Values(false), // Is 5th input constant + ::testing::Values(empty_plugin_config), // Additional plugin configuration + ::testing::Values(utils::DEVICE_CPU)), // Device name + NmsRotatedOpTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_, NmsRotatedOpTest, + ::testing::Combine( + ::testing::ValuesIn(input_shapes_nightly), + ::testing::Values(ElementType::f16, ElementType::bf16), + ::testing::Values(ElementType::i64), + ::testing::Values(ElementType::f16, ElementType::bf16), + ::testing::Values(ElementType::i64), + ::testing::Values(10), + ::testing::Values(0.5f), + ::testing::Values(0.4f), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(empty_plugin_config), + ::testing::Values(utils::DEVICE_CPU)), + NmsRotatedOpTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp index 072b481dd3c3da..a43b208ad9754f 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/non_max_suppression.cpp @@ -43,9 +43,9 @@ using NmsParams = std::tuple; // Device name class NmsLayerCPUTest : public testing::WithParamInterface, virtual public SubgraphBaseTest, public CPUTestsBase { @@ -57,9 +57,9 @@ class NmsLayerCPUTest : public testing::WithParamInterface, virtual p ngraph::helpers::InputLayerType maxOutBoxesType; ThresholdValues thrValues; float iouThr, scoreThr, softNmsSigma; - op::v9::NonMaxSuppression::BoxEncodingType boxEncoding; + ov::op::v9::NonMaxSuppression::BoxEncodingType boxEncoding; bool sortResDescend; - element::Type outType; + ElementType outType; std::string targetDevice; std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType, targetDevice) = obj.param; @@ -115,12 +115,12 @@ class NmsLayerCPUTest : public testing::WithParamInterface, virtual p ThresholdValues thrValues; ngraph::helpers::InputLayerType maxOutBoxesType; float iouThr, scoreThr, softNmsSigma; - op::v9::NonMaxSuppression::BoxEncodingType boxEncoding; + ov::op::v9::NonMaxSuppression::BoxEncodingType boxEncoding; bool sortResDescend; - element::Type outType; + ElementType outType; std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType, targetDevice) = this->GetParam(); - element::Type paramsPrec, maxBoxPrec, thrPrec; + ElementType paramsPrec, maxBoxPrec, thrPrec; std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions; std::tie(iouThr, scoreThr, softNmsSigma) = thrValues; @@ -156,7 +156,7 @@ class NmsLayerCPUTest : public testing::WithParamInterface, virtual p if (maxOutBoxesType == ngraph::helpers::InputLayerType::PARAMETER) { inputDynamicShapes.push_back(ngraph::PartialShape{1}); - params.push_back(std::make_shared(element::Type_t::i32, inputDynamicShapes.back())); + params.push_back(std::make_shared(ElementType::i32, inputDynamicShapes.back())); params[1]->set_friendly_name("param_3"); maxOutBoxesPerClassNode = params.back(); } else { @@ -166,7 +166,7 @@ class NmsLayerCPUTest : public testing::WithParamInterface, virtual p auto iouThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector{iouThr})->output(0); auto scoreThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector{scoreThr})->output(0); auto softNmsSigmaNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector{softNmsSigma})->output(0); - auto nms = std::make_shared(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode, + auto nms = std::make_shared(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode, softNmsSigmaNode, boxEncoding, sortResDescend, outType); function = makeNgraphFunction(paramsPrec, params, nms, "NMS"); @@ -276,7 +276,7 @@ class NmsLayerCPUTest : public testing::WithParamInterface, virtual p expectedList.resize(selected_indices_size); - if (indeces_iter->get_element_type() == ov::element::i32) { + if (indeces_iter->get_element_type() == ElementType::i32) { auto selected_indices_data = indeces_iter->data(); for (size_t i = 0; i < selected_indices_size; i += 3) { @@ -296,7 +296,7 @@ class NmsLayerCPUTest : public testing::WithParamInterface, virtual p } } - if (scores_iter->get_element_type() == ov::element::f32) { + if (scores_iter->get_element_type() == ElementType::f32) { auto selected_scores_data = scores_iter->data(); for (size_t i = 0; i < selected_scores_size; i += 3) { expectedList[i/3].score = selected_scores_data[i+2]; @@ -319,7 +319,7 @@ class NmsLayerCPUTest : public testing::WithParamInterface, virtual p size_t selected_indices_size = indeces_iter->get_size(); const auto selected_scores_data = scores_iter->data(); - if (indeces_iter->get_element_type() == ov::element::i32) { + if (indeces_iter->get_element_type() == ElementType::i32) { const auto selected_indices_data = indeces_iter->data(); for (size_t i = 0; i < selected_indices_size; i += 3) { const int32_t batchId = selected_indices_data[i+0]; @@ -415,10 +415,10 @@ const std::vector inShapeParams = { const std::vector maxOutBoxPerClass = {5, 20}; const std::vector threshold = {0.3f, 0.7f}; const std::vector sigmaThreshold = {0.0f, 0.5f}; -const std::vector encodType = {op::v9::NonMaxSuppression::BoxEncodingType::CENTER, - op::v9::NonMaxSuppression::BoxEncodingType::CORNER}; +const std::vector encodType = {ov::op::v9::NonMaxSuppression::BoxEncodingType::CENTER, + ov::op::v9::NonMaxSuppression::BoxEncodingType::CORNER}; const std::vector sortResDesc = {true, false}; -const std::vector outType = {element::i32, element::i64}; +const std::vector outType = {ElementType::i32, ElementType::i64}; const std::vector maxBoxInputTypes = {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT}; const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams), diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/nms_rotated.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/nms_rotated.hpp new file mode 100644 index 00000000000000..e0b83a31866381 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/nms_rotated.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/nms_rotated.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(NmsRotatedOpTest, CompareWithRefs) { + run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/nms_rotated.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/nms_rotated.hpp new file mode 100644 index 00000000000000..ec7b5a32ec3e1c --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/nms_rotated.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + std::vector, // Input shapes + ov::test::ElementType, // Boxes and scores input precisions + ov::test::ElementType, // Max output boxes input precisions + ov::test::ElementType, // Thresholds precisions + ov::test::ElementType, // Output type + int64_t, // Max output boxes per class + float, // IOU threshold + float, // Score threshold + bool, // Sort result descending + bool, // Clockwise + bool, // Is 1st input constant + bool, // Is 2nd input constant + bool, // Is 3rd input constant + bool, // Is 4th input constant + bool, // Is 5th input constant + ov::AnyMap, // Additional configuration + std::string // Device name +> NmsRotatedParams; + +class NmsRotatedOpTest : public testing::WithParamInterface, + public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; + + void generate_inputs(const std::vector& target_shapes) override; + +private: + int64_t m_max_out_boxes_per_class; + float m_iou_threshold; + float m_score_threshold; +}; + +} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/single_op/nms_rotated.cpp b/src/tests/functional/shared_test_classes/src/single_op/nms_rotated.cpp new file mode 100644 index 00000000000000..c6c9e210633ae2 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/nms_rotated.cpp @@ -0,0 +1,207 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/nms_rotated.hpp" +#include "ov_models/builders.hpp" +#include "common_test_utils/data_utils.hpp" +#include "openvino/op/nms_rotated.hpp" + +using namespace ov::test; + +namespace LayerTestsDefinitions { + +std::string NmsRotatedOpTest::getTestCaseName(const testing::TestParamInfo& obj) { + const auto& in_shapes = std::get<0>(obj.param); + + std::ostringstream result; + + result << "IS=("; + for (size_t i = 0lu; i < in_shapes.size(); i++) { + result << utils::partialShape2str({in_shapes[i].first}) << (i < in_shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < in_shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < in_shapes.size(); j++) { + result << utils::vec2str(in_shapes[j].second[i]) << (j < in_shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "_BoxPrc=" << std::get<1>(obj.param); + result << "_MaxPrc=" << std::get<2>(obj.param); + result << "_ThrPrc=" << std::get<3>(obj.param); + result << "_OutPrc=" << std::get<4>(obj.param); + result << "_MaxBox=" << std::get<5>(obj.param); + result << "_IouThr=" << std::get<6>(obj.param); + result << "_ScoreThr=" << std::get<7>(obj.param); + result << "_SortDesc=" << utils::bool2str(std::get<8>(obj.param)); + result << "_Clockwise=" << utils::bool2str(std::get<9>(obj.param)); + result << "_ConstIn={" << utils::bool2str(std::get<10>(obj.param)) << "," + << utils::bool2str(std::get<11>(obj.param)) << "," + << utils::bool2str(std::get<12>(obj.param)) << "," + << utils::bool2str(std::get<13>(obj.param)) << "," + << utils::bool2str(std::get<14>(obj.param)) << "}"; + + const auto& config = std::get<15>(obj.param); + if (!config.empty()) { + result << "_Config={"; + for (const auto& conf_item : config) { + result << "_" << conf_item.first << "="; + conf_item.second.print(result); + } + result << "}"; + } + + result << "_Device=" << std::get<16>(obj.param); + + return result.str(); +} + +void NmsRotatedOpTest::SetUp() { + const auto& params = this->GetParam(); + const auto& in_shapes = std::get<0>(params); + const auto& boxes_prc = std::get<1>(params); + const auto& max_boxes_prc = std::get<2>(params); + const auto& thresholds_prc = std::get<3>(params); + const auto& out_prc = std::get<4>(params); + m_max_out_boxes_per_class = std::get<5>(params); + m_iou_threshold = std::get<6>(params); + m_score_threshold = std::get<7>(params); + const auto& sort_descending = std::get<8>(params); + const auto& clockwise = std::get<9>(params); + const auto& is_0_in_const = std::get<10>(params); + const auto& is_1_in_const = std::get<11>(params); + const auto& is_2_in_const = std::get<12>(params); + const auto& is_3_in_const = std::get<13>(params); + const auto& is_4_in_const = std::get<14>(params); + configuration = std::get<15>(params); + targetDevice = std::get<16>(params); + + std::vector actual_shapes; + ov::ParameterVector in_params; + std::vector> inputs; + const auto in_shape_1d = InputShape{{1}, {{1}}}; + +#define CONST_CASE(P, S, H, L) \ + case P: \ + inputs.push_back(ngraph::builder::makeConstant(P, S, std::vector::value_type>{}, true, \ + ov::element_type_traits

::value_type(H), ov::element_type_traits

::value_type(L))); \ + break; + +#define CREATE_INPUT(C, P, S, N, H, L) \ + if (C) { \ + switch (P) { \ + CONST_CASE(ElementType::f32, S.second[0], H, L) \ + CONST_CASE(ElementType::f16, S.second[0], H, L) \ + CONST_CASE(ElementType::bf16, S.second[0], H, L) \ + CONST_CASE(ElementType::i32, S.second[0], H, L) \ + CONST_CASE(ElementType::i64, S.second[0], H, L) \ + default: OPENVINO_THROW("NmsRotated does not support precision ", P, " for the ", N, " input."); \ + } \ + } else { \ + actual_shapes.push_back(S); \ + if (S.first.rank() == 0) { \ + in_params.push_back(std::make_shared(P, S.second.front())); \ + } else { \ + in_params.push_back(std::make_shared(P, S.first)); \ + } \ + in_params.back()->set_friendly_name(N); \ + inputs.push_back(in_params.back()); \ + } + + CREATE_INPUT(is_0_in_const, boxes_prc, in_shapes[0], "Boxes", 30, 10) + CREATE_INPUT(is_1_in_const, boxes_prc, in_shapes[1], "Scores", 1, 0) + CREATE_INPUT(is_2_in_const, max_boxes_prc, in_shape_1d, "MaxOutputBoxesPerClass", m_max_out_boxes_per_class, m_max_out_boxes_per_class) + CREATE_INPUT(is_3_in_const, thresholds_prc, in_shape_1d, "IouThreshold", m_iou_threshold, m_iou_threshold) + CREATE_INPUT(is_4_in_const, thresholds_prc, in_shape_1d, "ScoreThreshold", m_score_threshold, m_score_threshold) + +#undef CONST_CASE +#undef CREATE_INPUT + + init_input_shapes(actual_shapes); + + const auto nms_op = std::make_shared(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], + sort_descending, out_prc, clockwise); + ov::ResultVector results; + for (size_t i = 0lu; i < nms_op->get_output_size(); i++) { + results.push_back(std::make_shared(nms_op->output(i))); + } + + function = std::make_shared(results, in_params, "NMSRotated"); +} + +template +void fill_data(TD* dst, const TS* src, size_t len) { + for (size_t i = 0llu; i < len; i++) { + dst[i] = static_cast

- - - + + + - - - + + +
OperationOperation ({OpName}-{FirstOpsetNumberContainsOp}){{ d }}(src[i]); + } +} + +void NmsRotatedOpTest::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + const auto& func_inputs = function->inputs(); + + for (size_t i = 0llu; i < func_inputs.size(); ++i) { + const auto& func_input = func_inputs[i]; + const auto& name = func_input.get_node()->get_friendly_name(); + const auto& in_prc = func_input.get_element_type(); + auto tensor = ov::Tensor(in_prc, targetInputStaticShapes[i]); + +#define FILL_DATA(P, S, L) \ +case P : \ +fill_data(tensor.data::value_type>(), S, L); break; + +#define GEN_DATA(P, R, S, K) \ +case P : \ +utils::fill_data_random(tensor.data::value_type>(), shape_size(targetInputStaticShapes[i]), R, S, K); break; + + if (name == "Boxes") { + switch (in_prc) { + GEN_DATA(ElementType::f32, 30, 20, 1) + GEN_DATA(ElementType::f16, 30, 20, 1) + GEN_DATA(ElementType::bf16, 30, 20, 1) + default: + OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the Scores input."); + } + } else if (name == "Scores") { + switch (in_prc) { + GEN_DATA(ElementType::f32, 1, 0, 100) + GEN_DATA(ElementType::f16, 1, 0, 100) + GEN_DATA(ElementType::bf16, 1, 0, 100) + default: + OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the Scores input."); + } + } else if (name == "MaxOutputBoxesPerClass") { + switch (in_prc) { + FILL_DATA(ElementType::i64, &m_max_out_boxes_per_class, 1) + FILL_DATA(ElementType::i32, &m_max_out_boxes_per_class, 1) + default: + OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the MaxOutputBoxesPerClass input."); + } + } else if (name == "IouThreshold") { + switch (in_prc) { + FILL_DATA(ElementType::f32, &m_iou_threshold, 1) + FILL_DATA(ElementType::f16, &m_iou_threshold, 1) + FILL_DATA(ElementType::bf16, &m_iou_threshold, 1) + default: + OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the IouThreshold input."); + } + } else if (name == "ScoreThreshold") { + switch (in_prc) { + FILL_DATA(ElementType::f32, &m_score_threshold, 1) + FILL_DATA(ElementType::f16, &m_score_threshold, 1) + FILL_DATA(ElementType::bf16, &m_score_threshold, 1) + default: + OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the ScoreThreshold input."); + } + } + +#undef GEN_DATA +#undef FILL_DATA + + inputs.insert({func_input.get_node_shared_ptr(), tensor}); + } +} + +} // namespace LayerTestsDefinitions diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv index fa91f28719a834..51b03e9f335714 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/skip_configs/CPU/expected_failures_OP.csv @@ -1131,5 +1131,4 @@ conformance_RegionYolo/ReadIRTest.ImportExport/Op=RegionYolo.1_Type=f32_IR=Regio conformance_Add/ReadIRTest.ImportExport/Op=Add.1_Type=i32_IR=28f23780d4ca0d40671caf79d5cd9223ad8f6dc2fa5ade2521f3d99586eeeb7f_Device=CPU_Shape=static_Config=(),9.72615e-07 conformance_Convolution/ReadIRTest.Inference/Op=Convolution.1_Type=f32_IR=c301804445f273eef62f41f02204711d9d6e571da28c76ab447d7d90983b0032_Device=CPU_Shape=dynamic_Config=(),0.000113281 conformance/OpImplCheckTest.checkPluginImplementation/Function=Multinomial_opset13_Device=CPU_Config=(),1 -conformance/OpImplCheckTest.checkPluginImplementation/Function=NMSRotated_opset13_Device=CPU_Config=(),1 conformance/OpImplCheckTest.checkPluginImplementation/Function=LSTMSequence_opset1_Device=CPU_Config=(),1 From 48c9598892c76b6906ad0eb2e9c63c427fc1e864 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Tue, 31 Oct 2023 14:05:21 +0100 Subject: [PATCH 162/275] Handle Reshape in SplitSqueezeConcatFusion (#20345) * Handle Reshape in SplitSqueezeConcatFusion Ticket: CVS-122455 * move check for squeeze/reshape * add some comments * review comments * add use_shapes flag to SplitSqueezeConcatFusion --- .../split_squeeze_concat_fusion.hpp | 2 +- .../moc_transformations.cpp | 2 +- .../split_squeeze_concat_fusion.cpp | 145 ++++++++++++------ .../split_squeeze_concat_fusion_test.cpp | 78 +++++++++- 4 files changed, 172 insertions(+), 55 deletions(-) diff --git a/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp index 733e6c66a5f5b8..28f94637523f79 100644 --- a/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp @@ -27,5 +27,5 @@ class TRANSFORMATIONS_API SplitSqueezeConcatFusion; class ov::pass::SplitSqueezeConcatFusion : public ov::pass::MatcherPass { public: OPENVINO_RTTI("SplitSqueezeConcatFusion", "0"); - SplitSqueezeConcatFusion(); + SplitSqueezeConcatFusion(bool use_shapes); }; diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 9a3446f2386161..5c768be324e334 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -170,7 +170,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking, // because it replaces pattern that may contain Transposes which must be optimized before // the transformation and it also inserts Transpose that can be optimized by TransposeSinking - ADD_MATCHER(transpose_sinking, SplitSqueezeConcatFusion) + ADD_MATCHER(transpose_sinking, SplitSqueezeConcatFusion, m_use_shapes) REGISTER_PASS(manager, TransposeMatMul) diff --git a/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp index b4bc2567e77d43..0baac07d14e434 100644 --- a/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp @@ -18,7 +18,9 @@ #include "openvino/op/transpose.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" -ov::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() { +static bool is_axis_squeezed_by_node(const std::shared_ptr& squeeze_node, int64_t axis, bool use_shapes); + +ov::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion(bool use_shapes) { MATCHER_SCOPE(SplitSqueezeConcatFusion); // Detect only concat, because we don't know how many inputs will go into concat auto concat_pattern = ov::pass::pattern::wrap_type(); @@ -32,66 +34,51 @@ ov::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() { NodeVector nodes_to_delete{concat}; - int64_t axis_value = 0; std::shared_ptr split; + int64_t split_axis = 0; - const auto& concat_inputs = concat->input_values(); - if (concat_inputs.empty()) - return false; - for (size_t i = 0; i < concat_inputs.size(); i++) { - auto squeeze = std::dynamic_pointer_cast(concat_inputs[i].get_node_shared_ptr()); - if (!squeeze) + for (size_t i = 0; i < concat->get_input_size(); i++) { + auto squeeze_node = concat->get_input_node_shared_ptr(i); + if (!ov::is_type(squeeze_node) && !ov::is_type(squeeze_node)) return false; - - nodes_to_delete.push_back(squeeze); - - auto split_to_check = std::dynamic_pointer_cast(squeeze->get_input_node_shared_ptr(0)); + auto split_to_check = + std::dynamic_pointer_cast(squeeze_node->get_input_node_shared_ptr(0)); if (!split_to_check) return false; - std::vector squeeze_axes_vec; - if (squeeze->get_input_size() < 2) { - const auto& shape = squeeze->get_input_partial_shape(0); - if (shape.is_dynamic()) { - return false; - } - for (size_t i = 0; i < shape.size(); i++) { - if (shape[i].get_length() == 1) - squeeze_axes_vec.push_back(static_cast(i)); - } - - } else { - auto squeeze_axes = - std::dynamic_pointer_cast(squeeze->get_input_node_shared_ptr(1)); - if (!squeeze_axes) - return false; - squeeze_axes_vec = squeeze_axes->cast_vector(); - } - - if (squeeze_axes_vec.size() != 1) - return false; if (i == 0) { - axis_value = squeeze_axes_vec[0]; nodes_to_delete.push_back(split_to_check); split = split_to_check; - } else if (axis_value != squeeze_axes_vec[0] || split_to_check != split) { + auto split_axis_node = + std::dynamic_pointer_cast(split->get_input_node_shared_ptr(1)); + if (!split_axis_node) + return false; + auto axis_vec = split_axis_node->cast_vector(); + if (axis_vec.size() != 1) + return false; + split_axis = axis_vec[0]; + if (split_axis < 0) { + auto rank = split->get_output_partial_shape(0).rank(); + if (rank.is_dynamic()) + return false; + split_axis += rank.get_length(); + } + } else if (split_to_check != split) { return false; } - auto split_output = squeeze->input_value(0); - if (split_output.get_target_inputs().size() != 1 || split_output.get_index() != i) + if (!is_axis_squeezed_by_node(squeeze_node, split_axis, use_shapes)) { return false; - } + } - if (split->get_num_splits() != concat_inputs.size()) - return false; + nodes_to_delete.push_back(squeeze_node); - auto split_axis = std::dynamic_pointer_cast(split->input_value(1).get_node_shared_ptr()); - if (!split_axis) - return false; + auto split_output = squeeze_node->input_value(0); + if (split_output.get_target_inputs().size() != 1 || split_output.get_index() != i) + return false; + } - auto axis_vec = split_axis->cast_vector(); - if (axis_vec.size() != 1 || axis_value != axis_vec[0]) + if (split->get_num_splits() != concat->get_input_size()) return false; auto input = split->input_value(0); @@ -102,8 +89,8 @@ ov::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() { return false; std::vector order(rank.get_length()); std::iota(order.begin(), order.end(), 0); - order.erase(order.begin() + axis_value); - order.insert(order.begin() + concat_axis, axis_value); + order.erase(order.begin() + split_axis); + order.insert(order.begin() + concat_axis, split_axis); auto transpose_order = ov::op::v0::Constant::create(element::i64, {(size_t)rank.get_length()}, order); auto transpose = register_new_node(input, transpose_order); @@ -120,3 +107,67 @@ ov::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() { auto m = std::make_shared(concat_pattern, matcher_name); register_matcher(m, callback); } + +bool is_axis_squeezed_by_node(const std::shared_ptr& squeeze_node, int64_t axis, bool use_shapes) { + const auto& input_shape = squeeze_node->get_input_partial_shape(0); + const auto& output_shape = squeeze_node->get_output_partial_shape(0); + if (input_shape.rank().is_dynamic() || output_shape.rank().is_dynamic()) + return false; + + auto input_rank = input_shape.rank().get_length(); + auto output_rank = output_shape.rank().get_length(); + // check if output_rank == input_rank - 1 + // to make sure the node actually squeezes a dimension + if (input_rank != output_rank + 1) + return false; + + // check if squeezed dimension equals to 1 + if (input_shape[axis].is_dynamic() || input_shape[axis] != 1) + return false; + + if (ov::is_type(squeeze_node)) { + if (!use_shapes) + return false; + // clang-format off + // check if the dimensions surrounding squeezed axis match + // function returns false if input_shape[:axis] != output_shape[:axis] or input_shape[(axis + 1):] != output_shape[axis:] + // clang-format on + if (input_shape.is_dynamic() || output_shape.is_dynamic()) + return false; + + if (!std::equal(input_shape.begin(), input_shape.begin() + axis, output_shape.begin())) + return false; + + if (!std::equal(input_shape.begin() + axis + 1, input_shape.end(), output_shape.begin() + axis)) + return false; + } else { + if (squeeze_node->get_input_size() == 1) { + // The case when Squeeze has only one input so every dimension == 1 is squeezed + if (input_shape.is_dynamic()) + return false; + size_t num_squeezed_axes = 0; + for (size_t i = 0; i < input_shape.size(); i++) { + if (input_shape[i].get_length() == 1) { + num_squeezed_axes++; + if (num_squeezed_axes > 1) + return false; + if (static_cast(i) != axis) + return false; + } + } + } else { + // The second Squeeze input has explicit axes + auto constant = ov::as_type_ptr(squeeze_node->get_input_node_shared_ptr(1)); + if (!constant) + return false; + if (ov::shape_size(constant->get_shape()) != 1) + return false; + auto squeezed_axis = constant->cast_vector()[0]; + squeezed_axis = squeezed_axis < 0 ? squeezed_axis + input_rank : squeezed_axis; + if (axis != squeezed_axis) + return false; + } + } + + return true; +} diff --git a/src/common/transformations/tests/common_optimizations/split_squeeze_concat_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/split_squeeze_concat_fusion_test.cpp index 8d72aba9b0c6a6..3bea132c205fcf 100644 --- a/src/common/transformations/tests/common_optimizations/split_squeeze_concat_fusion_test.cpp +++ b/src/common/transformations/tests/common_optimizations/split_squeeze_concat_fusion_test.cpp @@ -37,7 +37,7 @@ TEST_F(TransformationTestsF, SplitSqueezeConcatFusion) { model = std::make_shared(NodeVector{concat}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(false); } { @@ -69,7 +69,7 @@ TEST_F(TransformationTestsF, SplitSqueezeConcatFusionSqueezeWithoutAxesInput) { model = std::make_shared(NodeVector{concat}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(false); } { @@ -103,7 +103,7 @@ TEST_F(TransformationTestsF, SplitSqueezeConcatFusionNegativeCaseNotAllSplitOutp model = std::make_shared(NodeVector{concat}, ParameterVector{input}); model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(false); } { @@ -144,7 +144,7 @@ TEST_F(TransformationTestsF, SplitSqueezeConcatFusionNegativeCaseSplitOutputsGoI model = std::make_shared(NodeVector{concat}, ParameterVector{input}); model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(false); } { @@ -185,7 +185,7 @@ TEST_F(TransformationTestsF, SplitSqueezeConcatFusionNegativeCaseSplitAxisDiffer model = std::make_shared(NodeVector{concat}, ParameterVector{input}); model_ref = std::make_shared(NodeVector{concat}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(false); } { @@ -222,6 +222,72 @@ TEST_F(TransformationTestsF, SplitSqueezeConcatFusionNegativeSqueezeWithoutAxesI model = std::make_shared(NodeVector{concat}, ParameterVector{input}); - manager.register_pass(); + manager.register_pass(false); } } + +struct SplitReshapeConcatFusionParam { + int num_splits; + int split_axis; + Shape input_shape; + std::vector reshaped_shape; + int concat_axis; + std::vector transpose_order; + bool can_fuse; +}; + +class SplitReshapeConcatFusion : public TransformationTestsF, + public testing::WithParamInterface {}; + +TEST_P(SplitReshapeConcatFusion, SplitSqueezeConcatFusion) { + auto params = GetParam(); + ASSERT_EQ(0, params.input_shape[params.split_axis] % params.num_splits); + + { + auto input = std::make_shared(element::f32, params.input_shape); + auto split_axis_node = opset7::Constant::create(element::i64, Shape{}, {params.split_axis}); + auto split = std::make_shared(input, split_axis_node, params.num_splits); + OutputVector squeeze_vec; + squeeze_vec.reserve(params.num_splits); + auto reshaped_shape_node = + opset7::Constant::create(element::i32, Shape{params.reshaped_shape.size()}, params.reshaped_shape); + for (int i = 0; i < params.num_splits; i++) { + squeeze_vec.push_back(std::make_shared(split->output(i), reshaped_shape_node, true)); + } + auto concat = std::make_shared(squeeze_vec, params.concat_axis); + model = std::make_shared(NodeVector{concat}, ParameterVector{input}); + manager.register_pass(true); + } + + if (!params.can_fuse) { + model_ref = model->clone(); + } else { + auto input = std::make_shared(element::f32, params.input_shape); + auto transpose_order_node = + opset7::Constant::create(element::i64, Shape{params.transpose_order.size()}, params.transpose_order); + auto transpose = std::make_shared(input, transpose_order_node); + auto reshape_shape = params.input_shape; + reshape_shape.erase(reshape_shape.begin() + params.split_axis); + reshape_shape[params.concat_axis] *= params.num_splits; + auto reshape_shape_node = opset7::Constant::create(element::i64, Shape{reshape_shape.size()}, reshape_shape); + auto reshape = std::make_shared(transpose, reshape_shape_node, false); + + model_ref = std::make_shared(NodeVector{reshape}, ParameterVector{input}); + } + + comparator.enable(FunctionsComparator::CmpValues::ACCURACY); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +static std::vector split_reshape_concat_fusion_params{ + {4, 2, Shape{3, 1, 4, 1, 5}, {3, 1, 1, 5}, 1, {0, 2, 1, 3, 4}, true}, + {4, 0, Shape{4, 6, 5}, {6, 5}, 1, {1, 0, 2}, true}, + {5, 2, Shape{4, 6, 5}, {4, 6}, 0, {2, 0, 1}, true}, + {2, 2, Shape{3, 1, 4, 5}, {3, 2, 5}, 1, {0, 2, 1, 3}, false}, + {2, 1, Shape{3, 2, 3, 4, 5}, {3, 3, 5, 4}, 1, {0, 2, 1, 3, 4}, false}, + {4, 2, Shape{3, 1, 4, 1, 5}, {3, 1, 5}, 1, {0, 2, 1, 3, 4}, false}, +}; + +INSTANTIATE_TEST_SUITE_P(TransformationTests, + SplitReshapeConcatFusion, + testing::ValuesIn(split_reshape_concat_fusion_params)); From 8d6f56dd1214253e14e55b035e4e22970ff26565 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 31 Oct 2023 17:22:09 +0400 Subject: [PATCH 163/275] [TF FE] Fix translators for multiple output operations (#20787) Signed-off-by: Kazantsev, Roman --- src/frontends/tensorflow/src/op/sparse_reshape.cpp | 9 +++++++-- src/frontends/tensorflow/src/op_table.cpp | 4 ++-- .../tensorflow_common/include/common_op_table.hpp | 1 + src/frontends/tensorflow_common/src/op/max_pool.cpp | 7 +++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/frontends/tensorflow/src/op/sparse_reshape.cpp b/src/frontends/tensorflow/src/op/sparse_reshape.cpp index 1def5f4192f155..0d2e9de81e68c2 100644 --- a/src/frontends/tensorflow/src/op/sparse_reshape.cpp +++ b/src/frontends/tensorflow/src/op/sparse_reshape.cpp @@ -16,7 +16,7 @@ namespace ov { namespace frontend { namespace tensorflow { namespace op { -OutputVector translate_sparse_reshape_op(const ov::frontend::tensorflow::NodeContext& node) { +NamedOutputVector translate_sparse_reshape_op(const ov::frontend::tensorflow::NodeContext& node) { // Currently, the translation for SparseReshape is possible only if new shape value is the same as the input shape // value or it is different just by one dynamic dimension of the new shape that can be replace with the // corresponding static dimension of the input shape. @@ -67,7 +67,12 @@ OutputVector translate_sparse_reshape_op(const ov::frontend::tensorflow::NodeCon "This case with SparseReshape is not possible to translate to OpenVINO opset. The number " "of dynamic shapes in new shape must be 1 at most."); */ - return {input_indices, input_shape}; + auto output_indices = input_indices; + auto output_shape = input_shape; + set_out_name(node.get_name() + ":0", output_indices); + set_out_name(node.get_name() + ":1", output_shape); + + return {{"output_indices", output_indices}, {"output_shape", output_shape}}; } NamedOutputVector translate_sparse_fill_empty_rows_op(const ov::frontend::tensorflow::NodeContext& node) { diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 4926ac159cecbb..e5f25dad31270a 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -43,7 +43,7 @@ TF_OP_CONVERTER(translate_queue_dequeue_many_op); TF_OP_CONVERTER(translate_readvariable_op); TF_OP_CONVERTER(translate_restorev2_op); TF_OP_CONVERTER_NAMED(translate_sparse_fill_empty_rows_op); -TF_OP_CONVERTER(translate_sparse_reshape_op); +TF_OP_CONVERTER_NAMED(translate_sparse_reshape_op); TF_OP_CONVERTER(translate_sparse_segment_sum_op); TF_OP_CONVERTER(translate_staticregexfullmatch_op); TF_OP_CONVERTER(translate_stringjoin_op); @@ -216,7 +216,7 @@ const std::map get_supported_ops() { {"MaxPool", CreatorFunction(translate_max_pool_op)}, {"MaxPoolV2", CreatorFunction(translate_max_pool_op)}, {"MaxPool3D", CreatorFunction(translate_max_pool_op)}, - {"MaxPoolWithArgmax", CreatorFunction(translate_max_pool_op)}, + {"MaxPoolWithArgmax", CreatorFunction(translate_max_pool_with_argmax)}, {"Merge", CreatorFunction(translate_merge_op)}, {"MirrorPad", CreatorFunction(translate_mirror_pad_op)}, {"MutableHashTable", CreatorFunction(translate_hash_table_op)}, diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 3601a07f6c45d0..6befa470761a45 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -93,6 +93,7 @@ OP_CONVERTER(translate_lrn_op); OP_CONVERTER(translate_mat_mul_op); OP_CONVERTER(translate_matrix_diag_op); OP_CONVERTER(translate_max_pool_op); +OP_CONVERTER_NAMED(translate_max_pool_with_argmax); OP_CONVERTER(translate_mirror_pad_op); OP_CONVERTER_NAMED(translate_non_max_suppression_op); OP_CONVERTER(translate_parallel_dynamic_stitch_op); diff --git a/src/frontends/tensorflow_common/src/op/max_pool.cpp b/src/frontends/tensorflow_common/src/op/max_pool.cpp index d64ac1a17fbafe..c693f1e7533554 100644 --- a/src/frontends/tensorflow_common/src/op/max_pool.cpp +++ b/src/frontends/tensorflow_common/src/op/max_pool.cpp @@ -128,7 +128,7 @@ OutputVector translate_max_pool_v2(const NodeContext& node) { return translate_max_pool_util(node, 2, ksize_vector, strides_vector); } -OutputVector translate_max_pool_with_argmax(const NodeContext& node) { +NamedOutputVector translate_max_pool_with_argmax(const NodeContext& node) { // MaxPoolWithArgmax has just one input. ksize and strides are attributes TENSORFLOW_OP_VALIDATION(node, node.get_input_size() > 0, @@ -199,8 +199,9 @@ OutputVector translate_max_pool_with_argmax(const NodeContext& node) { convert_nchw_to_nhwc(true, output_indices, 4); } + set_out_name(node_name + ":0", max_pool); set_out_name(node_name + ":1", output_indices); - return {max_pool, output_indices}; + return {{"output", max_pool}, {"argmax", output_indices}}; } OutputVector translate_max_pool_op(const NodeContext& node) { @@ -210,8 +211,6 @@ OutputVector translate_max_pool_op(const NodeContext& node) { return translate_max_pool_v2(node); } else if (node.get_op_type() == "MaxPool3D") { return translate_max_pool(node, 3); - } else if (node.get_op_type() == "MaxPoolWithArgmax") { - return translate_max_pool_with_argmax(node); } else { TENSORFLOW_OP_VALIDATION(node, false, "Only MaxPool2D, MaxPoolV2 and MaxPool3D are supported."); } From 8eee1b52ed828fa3f4dfaf78267ea9d58fe6a759 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 31 Oct 2023 19:00:36 +0400 Subject: [PATCH 164/275] [IE TESTS] ImportExport reporting + extension of base class (#20765) * [IE TESTS] ImportExport reporting + extension of base class * reuse dynamic check --- .../include/utils/dynamism.hpp | 43 ++++++ .../subgraphs_dumper/include/utils/model.hpp | 2 +- .../subgraphs_dumper/include/utils/node.hpp | 17 +-- .../subgraphs_dumper/src/utils/model.cpp | 14 -- .../op_conformance_runner/CMakeLists.txt | 1 + .../include/read_ir_test/read_ir.hpp | 2 - .../src/read_ir/read_ir.cpp | 110 +--------------- .../shared_test_classes/base/ov_subgraph.hpp | 1 + .../src/base/ov_subgraph.cpp | 124 ++++++++++++++++-- 9 files changed, 164 insertions(+), 150 deletions(-) create mode 100644 src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/dynamism.hpp diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/dynamism.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/dynamism.hpp new file mode 100644 index 00000000000000..a43f759e41a956 --- /dev/null +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/dynamism.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/core/model.hpp" + +namespace ov { +namespace tools { +namespace subgraph_dumper { + +inline bool is_dynamic_node(const std::shared_ptr& node) { + for (size_t i = 0; i < node->get_input_size(); ++i) { + if (node->get_input_partial_shape(i).is_dynamic()) { + return true; + } + } + for (size_t i = 0; i < node->get_output_size(); ++i) { + if (node->get_output_partial_shape(i).is_dynamic()) { + return true; + } + } + return false; +} + +inline bool is_dynamic_model(const std::shared_ptr& model) { + for (const auto& parameter : model->get_parameters()) { + if (is_dynamic_node(parameter)) { + return true; + } + } + for (const auto& result : model->get_results()) { + if (is_dynamic_node(result)) { + return true; + } + } + return false; +} + +} // namespace subgraph_dumper +} // namespace tools +} // namespace ov diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp index bfae8ea874b2c0..786e67fca8d3a8 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp @@ -18,6 +18,7 @@ #include "cache/cache.hpp" #include "utils/node.hpp" +#include "utils/dynamism.hpp" namespace ov { namespace tools { @@ -75,7 +76,6 @@ std::map> cache_models( void save_model_status_to_file(const std::map>& caching_status, const std::string& output_dir); -bool is_dynamic_model(const std::shared_ptr& model); std::string get_model_type(const std::shared_ptr& model); std::map diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp index 928ebd36935345..147b73807c0ae6 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp @@ -6,8 +6,11 @@ #include #include "cache/meta/input_info.hpp" +#include "utils/dynamism.hpp" + #include "functional_test_utils/node_utils.hpp" #include "functional_test_utils/summary/op_info.hpp" + #include "openvino/openvino.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/constant_folding.hpp" @@ -45,20 +48,6 @@ std::shared_ptr convert_const_to_param(const std::shared_ // all inputs are defined as parameters and contains detailed info in meta std::shared_ptr generate_model_by_node(const std::shared_ptr& node); -inline bool is_dynamic_node(const std::shared_ptr& node) { - for (size_t i = 0; i < node->get_input_size(); ++i) { - if (node->get_input_partial_shape(i).is_dynamic()) { - return true; - } - } - for (size_t i = 0; i < node->get_output_size(); ++i) { - if (node->get_output_partial_shape(i).is_dynamic()) { - return true; - } - } - return false; -} - inline std::string get_node_type(const std::shared_ptr& node) { if (is_dynamic_node(node)) { return "dynamic"; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp index ef0dd60d0f771b..2a9c585bcdee2d 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/utils/model.cpp @@ -68,20 +68,6 @@ find_models(const std::vector &dirs, const std::string& regexp) { return { models, { ModelCacheStatus::NOT_READ, not_read_model } }; } -bool is_dynamic_model(const std::shared_ptr& model) { - for (const auto& parameter : model->get_parameters()) { - if (is_dynamic_node(parameter)) { - return true; - } - } - for (const auto& result : model->get_results()) { - if (is_dynamic_node(result)) { - return true; - } - } - return false; -} - std::string get_model_type(const std::shared_ptr& model) { if (is_dynamic_model(model)) { return "dynamic"; diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/CMakeLists.txt b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/CMakeLists.txt index a9eb6cec8f3006..8fbeea2859299a 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/CMakeLists.txt +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/CMakeLists.txt @@ -13,6 +13,7 @@ ov_add_test_target( PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include" "${OpenVINO_SOURCE_DIR}/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/meta/" + "${OpenVINO_SOURCE_DIR}/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/" ADD_CPPLINT LINK_LIBRARIES PUBLIC diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/include/read_ir_test/read_ir.hpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/include/read_ir_test/read_ir.hpp index d8a49d5754bc67..c62c7943d968f4 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/include/read_ir_test/read_ir.hpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/include/read_ir_test/read_ir.hpp @@ -27,8 +27,6 @@ class ReadIRTest : public testing::WithParamInterface, virtual public ov::test::SubgraphBaseTest { public: static std::string getTestCaseName(const testing::TestParamInfo &obj); - void query_model() override; - void import_export(); std::vector calculate_refs() override; protected: diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp index 308d733574a38f..954d0fcc0b0329 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp @@ -21,6 +21,7 @@ #include "functional_test_utils/summary/op_info.hpp" #include "functional_test_utils/skip_tests_config.hpp" +#include "dynamism.hpp" #include "input_info.hpp" #include "conformance.hpp" #include "read_ir_test/read_ir.hpp" @@ -116,99 +117,6 @@ std::string ReadIRTest::getTestCaseName(const testing::TestParamInfo(new ov::test::utils::CrashHandler()); - auto &s = ov::test::utils::OpSummary::getInstance(); - - // place to jump in case of a crash - int jmpRes = 0; -#ifdef _WIN32 - jmpRes = setjmp(ov::test::utils::env); -#else - jmpRes = sigsetjmp(ov::test::utils::env, 1); -#endif - if (jmpRes == ov::test::utils::JMP_STATUS::ok) { - crashHandler->StartTimer(); - if (functionRefs == nullptr) { - functionRefs = ngraph::clone_function(*function); - functionRefs->set_friendly_name("refFunction"); - } - s.setDeviceName(targetDevice); - - if (ov::test::utils::current_test_is_disabled()) { - s.updateOPsStats(functionRefs, ov::test::utils::PassRate::Statuses::SKIPPED, rel_influence_coef); - GTEST_SKIP() << "Disabled test due to configuration" << std::endl; - } else { - s.updateOPsStats(functionRefs, ov::test::utils::PassRate::Statuses::CRASHED, rel_influence_coef); - } - try { - SubgraphBaseTest::query_model(); - s.updateOPsStats(functionRefs, ov::test::utils::PassRate::Statuses::PASSED, rel_influence_coef); - } catch (std::exception& err) { - s.updateOPsStats(functionRefs, ov::test::utils::PassRate::Statuses::FAILED, rel_influence_coef); - GTEST_FAIL() << err.what(); - } catch (...) { - s.updateOPsStats(functionRefs, ov::test::utils::PassRate::Statuses::FAILED, rel_influence_coef); - GTEST_FAIL() << "Something is wrong in Query model! Please check"; - } - } else if (jmpRes == ov::test::utils::JMP_STATUS::alarmErr) { - s.updateOPsStats(functionRefs, ov::test::utils::PassRate::Statuses::HANGED, rel_influence_coef); - IE_THROW() << "Crash happens"; - } else if (jmpRes == ov::test::utils::JMP_STATUS::anyError) { - IE_THROW() << "Crash happens"; - } -} - -void ReadIRTest::import_export() { - // in case of crash jump will be made and work will be continued - auto crashHandler = std::unique_ptr(new ov::test::utils::CrashHandler()); - auto &summary = ov::test::utils::OpSummary::getInstance(); - - // place to jump in case of a crash - int jmpRes = 0; -#ifdef _WIN32 - jmpRes = setjmp(ov::test::utils::env); -#else - jmpRes = sigsetjmp(ov::test::utils::env, 1); -#endif - if (jmpRes == ov::test::utils::JMP_STATUS::ok) { - crashHandler->StartTimer(); - summary.setDeviceName(targetDevice); - try { - ov::CompiledModel model = core->compile_model(function, targetDevice, configuration); - - std::stringstream strm; - model.export_model(strm); - - ov::CompiledModel importedModel = core->import_model(strm, targetDevice, configuration); - - auto comparator = FunctionsComparator::with_default() - .enable(FunctionsComparator::ATTRIBUTES) - .enable(FunctionsComparator::NAMES) - .enable(FunctionsComparator::CONST_VALUES); - - auto importedFunction = importedModel.get_runtime_model()->clone(); - auto res = comparator.compare(importedFunction, function); - EXPECT_TRUE(res.valid) << res.message; - - summary.updateOPsImplStatus(function, true); - } catch (const std::exception &e) { - summary.updateOPsImplStatus(function, false); - GTEST_FAIL() << "Exception in the Core::compile_model() method call: " << e.what(); - } catch (...) { - summary.updateOPsImplStatus(function, false); - GTEST_FAIL() << "Error in the Core::query_model() method call!"; - } - } else if (jmpRes == ov::test::utils::JMP_STATUS::anyError) { - summary.updateOPsImplStatus(function, false); - GTEST_FAIL() << "Crash happens"; - } else if (jmpRes == ov::test::utils::JMP_STATUS::alarmErr) { - summary.updateOPsImplStatus(function, false); - GTEST_FAIL() << "Hang happens"; - } -} - uint64_t clip(uint64_t n, uint64_t lower, uint64_t upper) { return std::max(lower, std::min(n, upper)); } @@ -267,21 +175,7 @@ void ReadIRTest::SetUp() { } } - bool hasDynamic = false; - for (const auto& param : function->get_parameters()) { - if (param->get_partial_shape().is_dynamic()) { - hasDynamic = true; - break; - } - } - if (!hasDynamic) { - for (const auto& result : function->get_results()) { - if (result->get_output_partial_shape(0).is_dynamic()) { - hasDynamic = true; - break; - } - } - } + bool hasDynamic = tools::subgraph_dumper::is_dynamic_model(function); #ifdef ENABLE_CONFORMANCE_PGQL // Updating data in runtime. Should be set before possible call of a first GTEST status diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp index c76cd8fbc1bc72..1653f6ffd4298c 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp @@ -31,6 +31,7 @@ class SubgraphBaseTest : public ov::test::TestsCommon { virtual void run(); virtual void serialize(); virtual void query_model(); + virtual void import_export(); protected: virtual void compare(const std::vector& expected, const std::vector& actual); diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index aca76c4e9e65ab..b1c5eb9e9832ac 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -128,20 +128,122 @@ void SubgraphBaseTest::serialize() { } void SubgraphBaseTest::query_model() { - SKIP_IF_CURRENT_TEST_IS_DISABLED(); + bool isCurrentTestDisabled = ov::test::utils::current_test_is_disabled(); - auto queryNetworkResult = core->query_model(function, targetDevice); - std::set expected; - for (auto&& node : function->get_ops()) { - expected.insert(node->get_friendly_name()); - } + ov::test::utils::PassRate::Statuses status = isCurrentTestDisabled ? + ov::test::utils::PassRate::Statuses::SKIPPED : + ov::test::utils::PassRate::Statuses::CRASHED; + summary.setDeviceName(targetDevice); + summary.updateOPsStats(function, status, rel_influence_coef); + + if (isCurrentTestDisabled) + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new ov::test::utils::CrashHandler()); - std::set actual; - for (auto&& res : queryNetworkResult) { - actual.insert(res.first); + // place to jump in case of a crash + int jmpRes = 0; +#ifdef _WIN32 + jmpRes = setjmp(ov::test::utils::env); +#else + jmpRes = sigsetjmp(ov::test::utils::env, 1); +#endif + if (jmpRes == ov::test::utils::JMP_STATUS::ok) { + crashHandler->StartTimer(); + std::string errorMessage; + try { + auto queryNetworkResult = core->query_model(function, targetDevice); + std::set expected; + for (auto&& node : function->get_ops()) { + expected.insert(node->get_friendly_name()); + } + + std::set actual; + for (auto&& res : queryNetworkResult) { + actual.insert(res.first); + } + if (expected != actual) { + IE_THROW() << "Expected and actual are different"; + } + status = ov::test::utils::PassRate::Statuses::PASSED; + } catch (const std::exception& ex) { + status = ov::test::utils::PassRate::Statuses::FAILED; + errorMessage = ex.what(); + } catch (...) { + status = ov::test::utils::PassRate::Statuses::FAILED; + errorMessage = "Unknown failure occurred."; + } + summary.updateOPsStats(function, status, rel_influence_coef); + if (status != ov::test::utils::PassRate::Statuses::PASSED) { + GTEST_FATAL_FAILURE_(errorMessage.c_str()); + } + } else if (jmpRes == ov::test::utils::JMP_STATUS::anyError) { + IE_THROW() << "Crash happens"; + } else if (jmpRes == ov::test::utils::JMP_STATUS::alarmErr) { + summary.updateOPsStats(function, ov::test::utils::PassRate::Statuses::HANGED, rel_influence_coef); + IE_THROW() << "Crash happens"; } - if (expected != actual) { - IE_THROW() << "Expected and actual are different"; +} + +void SubgraphBaseTest::import_export() { + bool isCurrentTestDisabled = ov::test::utils::current_test_is_disabled(); + + ov::test::utils::PassRate::Statuses status = isCurrentTestDisabled ? + ov::test::utils::PassRate::Statuses::SKIPPED : + ov::test::utils::PassRate::Statuses::CRASHED; + summary.setDeviceName(targetDevice); + summary.updateOPsStats(function, status, rel_influence_coef); + + if (isCurrentTestDisabled) + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new ov::test::utils::CrashHandler()); + + // place to jump in case of a crash + int jmpRes = 0; +#ifdef _WIN32 + jmpRes = setjmp(ov::test::utils::env); +#else + jmpRes = sigsetjmp(ov::test::utils::env, 1); +#endif + if (jmpRes == ov::test::utils::JMP_STATUS::ok) { + crashHandler->StartTimer(); + std::string errorMessage; + try { + compile_model(); + + std::stringstream strm; + compiledModel.export_model(strm); + ov::CompiledModel importedModel = core->import_model(strm, targetDevice, configuration); + auto importedFunction = importedModel.get_runtime_model()->clone(); + + auto comparator = FunctionsComparator::with_default() + .enable(FunctionsComparator::ATTRIBUTES) + .enable(FunctionsComparator::NAMES) + .enable(FunctionsComparator::CONST_VALUES); + auto res = comparator.compare(importedFunction, function); + if (!res.valid) { + throw std::runtime_error(res.message); + } + status = ov::test::utils::PassRate::Statuses::PASSED; + } catch (const std::exception& ex) { + status = ov::test::utils::PassRate::Statuses::FAILED; + errorMessage = ex.what(); + } catch (...) { + status = ov::test::utils::PassRate::Statuses::FAILED; + errorMessage = "Unknown failure occurred."; + } + summary.updateOPsStats(function, status, rel_influence_coef); + if (status != ov::test::utils::PassRate::Statuses::PASSED) { + GTEST_FATAL_FAILURE_(errorMessage.c_str()); + } + } else if (jmpRes == ov::test::utils::JMP_STATUS::anyError) { + IE_THROW() << "Crash happens"; + } else if (jmpRes == ov::test::utils::JMP_STATUS::alarmErr) { + summary.updateOPsStats(function, ov::test::utils::PassRate::Statuses::HANGED, rel_influence_coef); + IE_THROW() << "Crash happens"; } } From 65f6950f5695fe479ef554ae19621007f717b142 Mon Sep 17 00:00:00 2001 From: Sofya Balandina Date: Tue, 31 Oct 2023 15:14:36 +0000 Subject: [PATCH 165/275] [apiConformance][TEMPLATE] Add too get_profiling_info info about ops (#20697) --- src/plugins/template/src/sync_infer_request.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp index e788142dfb3991..fa87f4e9bd4fe5 100644 --- a/src/plugins/template/src/sync_infer_request.cpp +++ b/src/plugins/template/src/sync_infer_request.cpp @@ -17,6 +17,7 @@ #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/profiling_info.hpp" #include "openvino/runtime/tensor.hpp" +#include "perf_counter.hpp" #include "plugin.hpp" #include "remote_tensor.hpp" #include "template/remote_tensor.hpp" @@ -268,9 +269,19 @@ std::vector ov::template_plugin::InferRequest::get_profiling_ p_info.cpu_time = p_info.real_time = std::chrono::duration_cast(time); return p_info; }; + info.emplace_back(fill_profiling_info("input preprocessing", m_durations[Preprocess])); info.emplace_back(fill_profiling_info("execution time", m_durations[StartPipeline])); + auto template_model = get_template_model(); + for (const auto& op : template_model->get_runtime_model()->get_ops()) { + auto rt_info = op->get_rt_info(); + const auto& it = rt_info.find(ov::runtime::interpreter::PERF_COUNTER_NAME); + OPENVINO_ASSERT(it != rt_info.end(), "Operation ", op, " doesn't contain performance counter"); + auto counter = it->second.as>(); + info.emplace_back(fill_profiling_info(op->get_friendly_name(), counter->duration())); + } info.emplace_back(fill_profiling_info("output postprocessing", m_durations[Postprocess])); + return info; } // ! [infer_request:get_profiling_info] From dd10a520e3f1481b684f519576e9b719cd98447c Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 31 Oct 2023 19:57:41 +0400 Subject: [PATCH 166/275] [DO NOT REVIEW OR MERGE] LLM in SubgraphsDumper (#20756) --- .../subgraphs_dumper/include/cache/cache.hpp | 4 +++- .../subgraphs_dumper/include/gflag_config.hpp | 2 +- .../subgraphs_dumper/src/cache/graph_cache.cpp | 18 ++++++++++++------ .../src/matchers/subgraph/fused_names.cpp | 8 ++++++++ .../tests/matchers/subgraph/fused_names.cpp | 2 +- .../find_models_for_subgraphs_dumper.py | 3 +++ 6 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp index a35eca0e7ad619..8cd67dea3d7d85 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/cache/cache.hpp @@ -45,7 +45,9 @@ class ICache { bool is_model_large_to_store_const(const std::shared_ptr& model) { auto model_bytesize = model->get_graph_size(); - if (mem_size < model_bytesize * 4) { + size_t gb_8 = 1; + gb_8 <<= 33; + if (mem_size <= model_bytesize * 4 || model_bytesize >= gb_8) { return true; } return false; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp index 298397c433ecf9..d8f76ae44c9a6e 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp @@ -25,7 +25,7 @@ DEFINE_bool(h, false, help_message); DEFINE_string(input_folders, "", local_cache_message); DEFINE_string(local_cache, "", input_folders_message); DEFINE_string(output_folder, "output", output_folder_message); -DEFINE_string(device, "CPU", device_message); +DEFINE_string(device, "TEMPLATE", device_message); DEFINE_string(path_regex, ".*", output_folder_message); DEFINE_bool(extract_body, true, extract_body_message); DEFINE_string(cache_type, "", cache_type_message); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp index 824f611a6e8808..74ec925c9f8c09 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/cache/graph_cache.cpp @@ -37,11 +37,16 @@ void GraphCache::update_cache(const std::shared_ptr& model, auto model_bytesize = model->get_graph_size(); // check that Free RAM memory is enough. Serialize in other case // serialize graph cache in case graph cache bytesize > 4GB to avoid long search the same graphs - if (m_graph_cache_bytesize + 2 * model_bytesize > mem_size || m_graph_cache_bytesize >> 20 != 0) { + if (m_graph_cache_bytesize + 2 * model_bytesize >= mem_size) { std::cout << "[ GRAPH CACHE ][ WARNING ] There are not enought RAM memory! Serialize graph cache" << std::endl; serialize_cache(); m_graph_cache_bytesize = 0; } + if (m_graph_cache_bytesize * 4 >= mem_size) { + std::cout << "[ GRAPH CACHE ][ WARNING ] 25% of RAM is used by cache! Serialize graph cache" << std::endl; + serialize_cache(); + m_graph_cache_bytesize = 0; + } auto is_large_model = is_model_large_to_store_const(model); if (is_large_model) { auto model_bytesize_gb = model_bytesize; @@ -49,7 +54,7 @@ void GraphCache::update_cache(const std::shared_ptr& model, auto mem_size_gb = mem_size; mem_size_gb >>= 30; std::cout << "[ GRAPH CACHE ][ WARNING ] Model bytesize is " << model_bytesize_gb << - "GB. It is larger than 25% RAM size: " << mem_size_gb << ". Constants won't be copied!" << std::endl; + "GB. It is larger than 25% RAM size or >= 8GB: " << mem_size_gb << ". Constants won't be copied!" << std::endl; } auto extracted_patterns = m_manager.extract(model, extract_body, !is_large_model); if (extracted_patterns.empty()) { @@ -169,11 +174,12 @@ void GraphCache::update_cache(const std::shared_ptr& extracted_model, } void GraphCache::serialize_cache() { - for (const auto& cache_item : m_graph_cache) { - auto rel_dir = ov::util::path_join({ m_cache_subdir, get_model_type(cache_item.first), cache_item.second.get_any_extractor() }); - serialize_model(cache_item, rel_dir); + while (!m_graph_cache.empty()) { + auto cache_item = m_graph_cache.begin(); + auto rel_dir = ov::util::path_join({ m_cache_subdir, get_model_type(cache_item->first), cache_item->second.get_any_extractor() }); + serialize_model(*cache_item, rel_dir); + m_graph_cache.erase(cache_item); } - m_graph_cache.clear(); } } // namespace subgraph_dumper diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp index b8c6408329ac48..b3b5c58a84e86b 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/subgraph/fused_names.cpp @@ -6,6 +6,7 @@ #include "openvino/op/tensor_iterator.hpp" #include "openvino/op/if.hpp" #include "openvino/op/loop.hpp" +#include "openvino/util/file_util.hpp" #include "common_test_utils/common_utils.hpp" @@ -16,6 +17,13 @@ using namespace ov::tools::subgraph_dumper; void FusedNamesExtractor::set_target_device(const std::string& _device) { auto available_devices = core->get_available_devices(); + if (_device == std::string(ov::test::utils::DEVICE_TEMPLATE) && + std::find(available_devices.begin(), available_devices.end(), _device) == available_devices.end()) { + auto plugin_path = ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), + std::string(ov::test::utils::TEMPLATE_LIB) + OV_BUILD_POSTFIX); + core->register_plugin(plugin_path, _device); + available_devices = core->get_available_devices(); + } if (_device.empty() && !available_devices.empty()) { device = available_devices.front(); std::cout << "[ WARNING ][ GRAPH CACHE ] " << device << diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp index 6a287a8e364c64..afe63aa8c9f8ec 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/subgraph/fused_names.cpp @@ -22,7 +22,7 @@ using namespace ov::tools::subgraph_dumper; // ======================= ExtractorsManagerTest Unit tests ======================= class FusedNamesExtractorTest : public SubgraphsDumperBaseTest { - FusedNamesExtractor extractor; + FusedNamesExtractor extractor = FusedNamesExtractor("TEMPLATE"); protected: void is_match(const std::shared_ptr& model) { diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py index 3016f8c48c80aa..aa91d68160d4c0 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/conformance_helper_tools/find_models_for_subgraphs_dumper.py @@ -90,6 +90,9 @@ def generate_model_list_file(input_str: str, re_exp_file_path: str, output_file_ except: pass for line in model_list: + str_line = str(line) + if "tfhub_module.pb" in str_line or "_metadata.pb" in str_line: + continue output_file.write(f"{line}\n") output_file.close() From 703e911321797bbfdb73d329d5a6eccbf22afd98 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Tue, 31 Oct 2023 16:46:52 +0000 Subject: [PATCH 167/275] [CI] [GHA] Increase job timeouts (#20797) * increase job timouts * increase further --- .github/workflows/android_arm64.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/windows.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 8c1106fdb2457f..7f9f892cc82263 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -27,7 +27,7 @@ concurrency: jobs: Build: - timeout-minutes: 15 + timeout-minutes: 40 defaults: run: shell: bash diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 6b4328f77dc3fe..356b1afb8d9d9b 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -313,7 +313,7 @@ jobs: Samples: needs: Build - timeout-minutes: 10 + timeout-minutes: 20 defaults: run: shell: bash diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 4984826bfba7fd..d57d9f58b59786 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -188,7 +188,7 @@ jobs: Samples: needs: Build - timeout-minutes: 10 + timeout-minutes: 20 defaults: run: shell: pwsh From 26c9c41b8eab9b5e4dc7070c7620262b778998f8 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 31 Oct 2023 22:53:35 +0400 Subject: [PATCH 168/275] Revert "[TF FE] Switch off TF1 While support totally (#20774)" (#20800) This reverts commit 38b60921203aee035903d40dfdc2d7db0dbed03f. --- src/frontends/tensorflow/src/op/enter.cpp | 3 --- src/frontends/tensorflow/src/op/loop_cond.cpp | 3 --- src/frontends/tensorflow/tests/convert_unsupported.cpp | 2 +- tests/layer_tests/tensorflow_tests/test_tf_While.py | 4 ++-- tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py | 1 - 5 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/frontends/tensorflow/src/op/enter.cpp b/src/frontends/tensorflow/src/op/enter.cpp index 190f6adebec6c7..c0719f83e36ccb 100644 --- a/src/frontends/tensorflow/src/op/enter.cpp +++ b/src/frontends/tensorflow/src/op/enter.cpp @@ -22,9 +22,6 @@ OutputVector translate_enter_op(const NodeContext& node) { auto data = node.get_input(0); auto frame_name = node.get_attribute("frame_name"); - // TODO 123651: remove this fallback to the legacy FE once GPU fixes dynamism for Loop operation - TENSORFLOW_OP_VALIDATION(node, false, "Fallback to legacy FE: Switch off TF1 While support due to GPU limitation"); - auto enter_node = make_shared(data, frame_name, node.get_decoder()); set_node_name(node.get_name(), enter_node); diff --git a/src/frontends/tensorflow/src/op/loop_cond.cpp b/src/frontends/tensorflow/src/op/loop_cond.cpp index 3b16518456a849..286192a017f283 100644 --- a/src/frontends/tensorflow/src/op/loop_cond.cpp +++ b/src/frontends/tensorflow/src/op/loop_cond.cpp @@ -22,9 +22,6 @@ OutputVector translate_loop_cond_op(const NodeContext& node) { default_op_checks(node, 1, {"LoopCond"}); auto input = node.get_input(0); - // TODO 123651: remove this fallback to the legacy FE once GPU fixes dynamism for Loop operation - TENSORFLOW_OP_VALIDATION(node, false, "Fallback to legacy FE: Switch off TF1 While support due to GPU limitation"); - auto loop_cond_node = make_shared(input, node.get_decoder()); set_node_name(node.get_name(), loop_cond_node); diff --git a/src/frontends/tensorflow/tests/convert_unsupported.cpp b/src/frontends/tensorflow/tests/convert_unsupported.cpp index aef4a0bea9d115..7d9a83045b5606 100644 --- a/src/frontends/tensorflow/tests/convert_unsupported.cpp +++ b/src/frontends/tensorflow/tests/convert_unsupported.cpp @@ -154,7 +154,7 @@ TEST(FrontEndConvertModelTest, test_unsupported_tf1_while_and_incorrect_less_tra } } -TEST(FrontEndConvertModelTest, DISABLED_conversion_with_unknown_exception) { +TEST(FrontEndConvertModelTest, conversion_with_unknown_exception) { shared_ptr model = nullptr; try { auto conv_ext = diff --git a/tests/layer_tests/tensorflow_tests/test_tf_While.py b/tests/layer_tests/tensorflow_tests/test_tf_While.py index cd66024d1e8294..2a112700f30ad5 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_While.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_While.py @@ -51,7 +51,7 @@ def body(x, y): test_data_basic = [ dict(y_shape=[2, 3], data_type=np.int32, lower_control_flow=False), dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=False), - pytest.param(dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=True), marks=pytest.mark.xfail(reason="123651")) + dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=True) ] @pytest.mark.parametrize("params", test_data_basic) @@ -110,7 +110,7 @@ def body(x, y): test_data_basic = [ dict(y_shape=[2, 3], lower_control_flow=False), dict(y_shape=[2, 1, 4], lower_control_flow=False), - pytest.param(dict(y_shape=[2, 1, 4], lower_control_flow=True), marks=pytest.mark.xfail(reason="123651")) + dict(y_shape=[2, 1, 4], lower_control_flow=True) ] @pytest.mark.parametrize("params", test_data_basic) diff --git a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py index 9902f18ad3d874..26ea01b77d6722 100644 --- a/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py +++ b/tools/mo/unit_tests/moc_tf_fe/conversion_basic_models_test.py @@ -239,7 +239,6 @@ def test_conversion_tf1_while_default(self): self.basic("ctc_model_based.pbtxt", None, None, None, None, None, None, True, True, False, False) - @unittest.skip("123651: enable when GPU fixes dynamism in Loop operation") def test_conversion_tf1_while_use_new_frontend(self): self.basic("ctc_model_based.pbtxt", None, None, None, None, None, None, True, True, True, False) From bb0e4f8ecf8352a0592bff100dbfb236fd0ab61f Mon Sep 17 00:00:00 2001 From: Surya Siddharth Pemmaraju Date: Wed, 1 Nov 2023 00:36:19 -0700 Subject: [PATCH 169/275] Fixed issue with cat in fx backend (#20744) * Added fix for cat in torchfx * Added batch_norm_legit_no_training op * Fixed coding style * Fixed clang format * Addressed PR comments --- .../pytorch/torchdynamo/op_support.py | 2 ++ src/frontends/pytorch/src/op/batch_norm.cpp | 34 +++++++++++++++---- src/frontends/pytorch/src/op/cat.cpp | 13 +++---- src/frontends/pytorch/src/op_table.cpp | 7 ++-- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py index 4a76d90b160553..a6fb4de094d3eb 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py @@ -82,6 +82,8 @@ def __init__(self): "torch.ops.aten.mul.Scalar": None, "torch.ops.aten.mul.Tensor": None, "torch.ops.aten.native_batch_norm.default": None, + "torch.ops.aten._native_batch_norm_legit.default": None, + "torch.ops.aten._native_batch_norm_legit_no_training.default": None, "torch.ops.aten.native_group_norm.default": None, "torch.ops.aten.native_layer_norm.default": None, "torch.ops.aten.neg.default": None, diff --git a/src/frontends/pytorch/src/op/batch_norm.cpp b/src/frontends/pytorch/src/op/batch_norm.cpp index 1c7528e8ed3ad8..126588eb952a6b 100644 --- a/src/frontends/pytorch/src/op/batch_norm.cpp +++ b/src/frontends/pytorch/src/op/batch_norm.cpp @@ -39,10 +39,13 @@ Output broadcast_const_to_channel_dim(const NodeContext& context, } } // namespace -OutputVector translate_batch_norm(const NodeContext& context) { +OutputVector translate_batch_norm_common(const NodeContext& context, bool training) { // Schema: aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, // bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor - num_inputs_check(context, 8, 9); + + // batch_norm_legit_no_training Schema: aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? + // running_mean, Tensor? running_var, float momentum, float eps) -> Tensor + auto input = context.get_input(0); Output weight; Output bias; @@ -63,7 +66,6 @@ OutputVector translate_batch_norm(const NodeContext& context) { bias = broadcast_const_to_channel_dim(context, input, zero_f); } // index 3 running_mean and index 4 running_var can be none for training case only, check that not training before - auto training = context.const_input(5); // if training for batch norm activated, but model in eval mode, it uses current statistics instead of running if (training) { auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); @@ -92,14 +94,34 @@ OutputVector translate_batch_norm(const NodeContext& context) { running_var = current_var; } // Input with index 6 is momentum, it is used only for updating running_mean accumulation during training - auto epsilon = context.const_input(7); + // In batch_norm_legit_no_training, momentum is index 5 and epsilon is 6 + float epsilon; + if (context.get_input_size() == 7) { + epsilon = context.const_input(6); + } else { + epsilon = context.const_input(7); + } // Input with index 8 is flag "cudnn_enabled" we can ignore it return {context.mark_node( std::make_shared(input, weight, bias, running_mean, running_var, epsilon))}; }; -OutputVector translate_batch_norm_fx(const NodeContext& context) { - auto output = translate_batch_norm(context); +OutputVector translate_batch_norm(const NodeContext& context) { + num_inputs_check(context, 7, 9); + auto training = context.const_input(5); + return translate_batch_norm_common(context, training); +} + +OutputVector translate_batch_norm_legit_fx(const NodeContext& context) { + num_inputs_check(context, 7, 9); + auto training = context.const_input(5); + auto output = translate_batch_norm_common(context, training); + return {context.mark_node(make_list_construct(output))}; +} + +OutputVector translate_batch_norm_legit_no_training_fx(const NodeContext& context) { + num_inputs_check(context, 7, 9); + auto output = translate_batch_norm_common(context, false); return {context.mark_node(make_list_construct(output))}; } diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index 63e61734544333..9476979a118bd7 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -22,7 +22,8 @@ using namespace ov::op; OutputVector translate_cat_common(const NodeContext& context, const std::deque>& list_elems, - int64_t axis) { + int64_t axis, + bool is_fx) { if (list_elems.empty()) { // couldn't get list elements auto fw_node = std::make_shared(context.get_decoder(), OutputVector{context.get_input(0)}, 1); @@ -39,8 +40,8 @@ OutputVector translate_cat_common(const NodeContext& context, "::cat is located inside body while inputs are located outside of the body. " "This case is not supported."); if (list_elems.size() == 1 && - !std::dynamic_pointer_cast(context.get_input(0).get_node_shared_ptr())) { - // Case when list was merged into tensor + !std::dynamic_pointer_cast(context.get_input(0).get_node_shared_ptr()) && !is_fx) { + // Case when list was merged into tensor. // This case doesn't work with torchfx auto tensor = list_elems[0]; auto shape = context.mark_node(std::make_shared(tensor, element::i32)); auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); @@ -63,7 +64,7 @@ OutputVector translate_cat(const NodeContext& context) { num_inputs_check(context, 2, 3); const auto&& list_elems = get_list_as_outputs(context.get_input(0)); auto axis = context.const_input(1); - auto out = translate_cat_common(context, list_elems, axis); + auto out = translate_cat_common(context, list_elems, axis, false); if (!context.input_is_none(2)) { context.mutate_input(2, out[0]); } @@ -78,7 +79,7 @@ OutputVector translate_cat_fx(const NodeContext& context) { list_elems.push_back(context.get_input(static_cast(i))); } auto axis = context.const_input(context.get_input_size() - 1); - return translate_cat_common(context, list_elems, axis); + return translate_cat_common(context, list_elems, axis, true); }; OutputVector translate_quantized_cat(const NodeContext& context) { @@ -87,7 +88,7 @@ OutputVector translate_quantized_cat(const NodeContext& context) { auto axis = context.const_input(1); FRONT_END_OP_CONVERSION_CHECK(!list_elems.empty(), "Couldn't find quantized input for quantized::cat operation."); return {quantize(context, - translate_cat_common(context, list_elems, axis)[0], + translate_cat_common(context, list_elems, axis, false)[0], context.get_input(2), context.get_input(3), list_elems.front())}; diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 3f71d22e428c5f..933f9a48eeb389 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -213,7 +213,8 @@ OP_CONVERTER(translate_quantized_linear); OP_CONVERTER(translate_xor); // Torch FX Translations OP_CONVERTER(translate_arange_fx); -OP_CONVERTER(translate_batch_norm_fx); +OP_CONVERTER(translate_batch_norm_legit_fx); +OP_CONVERTER(translate_batch_norm_legit_no_training_fx); OP_CONVERTER(translate_cat_fx); OP_CONVERTER(translate_chunk_fx); OP_CONVERTER(translate_expand_fx); @@ -612,7 +613,9 @@ const std::map get_supported_ops_fx() { {"aten.mm.default", op::translate_1to1_match_2_inputs}, {"aten.mul.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.mul.Scalar", op::translate_1to1_match_2_inputs_align_types}, - {"aten.native_batch_norm.default", op::translate_batch_norm_fx}, + {"aten.native_batch_norm.default", op::translate_batch_norm_legit_fx}, + {"aten._native_batch_norm_legit.default", op::translate_batch_norm_legit_fx}, + {"aten._native_batch_norm_legit_no_training.default", op::translate_batch_norm_legit_no_training_fx}, {"aten.native_group_norm.default", op::translate_group_norm_fx}, {"aten.native_layer_norm.default", op::translate_layer_norm_fx}, {"aten.neg.default", op::translate_neg}, From 03f23ae57ad9661f0366bcb3246baa7a3de2782d Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Wed, 1 Nov 2023 08:07:40 +0000 Subject: [PATCH 170/275] [CI] [GHA] Fix `openvino.test_utils` imports for Mac and Win Python unittests (#20786) * add platform-agnostic setup python action * use specific version * rm debug message, checkout action * correct path * add checkout of the action, correct paths * correct path; enclose into brackets * transfer linux pipelines to local setup-python action * transfer pipelines * use newer version * account for fedora, add missing cache path * correct name * use 3.9 for fedora * rm python install from fedora * mv fetch and setup together, set pip_cache_dir * correct order * rm triggers * add missing pythonpaths * correct path * add one more pythonpath * add paths to ov package libs * Revert "add paths to ov package libs" This reverts commit a775881f3e097fd3ef58806e78b3943df794a746. --- .github/actions/setup_python/action.yml | 63 ++++++ .github/workflows/linux.yml | 197 +++++++++--------- .../linux_conditional_compilation.yml | 14 +- .github/workflows/mac.yml | 42 +++- .github/workflows/windows.yml | 19 +- .../windows_conditional_compilation.yml | 28 ++- 6 files changed, 241 insertions(+), 122 deletions(-) create mode 100644 .github/actions/setup_python/action.yml diff --git a/.github/actions/setup_python/action.yml b/.github/actions/setup_python/action.yml new file mode 100644 index 00000000000000..5c26561cac3ca2 --- /dev/null +++ b/.github/actions/setup_python/action.yml @@ -0,0 +1,63 @@ +name: 'Setup Python and pip cache' +description: 'Setups Python with the provided version and sets up the pip cache' +inputs: + version: + description: 'Python version to install' + required: true + pip-cache-path: + description: 'Path on share where the pip cache is stored' + required: false + should-setup-pip-paths: + description: 'If the action should setup `PIP_CACHE_DIR` & `PIP_INSTALL_PATH` env variables' + required: false + default: 'false' + self-hosted-runner: + description: 'If the runner is self-hosted' + required: false + default: 'true' +runs: + using: 'composite' + steps: + + - if: ${{ runner.os == 'Linux' && inputs.self-hosted-runner == 'true' }} + name: Install 'actions/setup-python@v4' dependencies + shell: bash + run: apt-get update && apt-get install -y ca-certificates + + - if: ${{ runner.os == 'Linux' && runner.arch == 'ARM64' }} + name: Setup sudo + shell: bash + run: apt-get update && apt-get install -y sudo # Needed for the deadsnakes action + + - if: ${{ runner.os == 'Linux' && runner.arch == 'ARM64' }} + name: Setup Python ${{ inputs.version }} + uses: deadsnakes/action@v3.0.1 + with: + python-version: ${{ inputs.version }} + + - if: ${{ runner.os == 'macOS' || runner.os == 'Windows' || (runner.os == 'Linux' && runner.arch != 'ARM64') }} + name: Setup Python ${{ inputs.version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ inputs.version }} + env: + PIP_CACHE_DIR: ${{ inputs.self-hosted-runner == 'true' && inputs.pip-cache-path || '' }} + + - if: ${{ inputs.should-setup-pip-paths == 'true' }} + name: Setup pip variables (cache and install path) + shell: bash + run: | + PIP_VER=$(python3 -c "import pip; print(pip.__version__)") + echo "Using pip version: ${PIP_VER}" + echo "PIP_CACHE_DIR=${{ inputs.pip-cache-path }}/${PIP_VER}" >> $GITHUB_ENV + echo "PIP_INSTALL_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" >> $GITHUB_ENV + + - if: ${{ inputs.should-setup-pip-paths == 'true' }} + name: Get pip cache info + shell: bash + run: | + echo "Cache size: " + du -h -d2 ${{ env.PIP_CACHE_DIR }} + echo "Cache info: " + python3 -m pip cache info + continue-on-error: true diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 356b1afb8d9d9b..83b27ef6ab0e45 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -96,30 +96,20 @@ jobs: run: | bash ${OPENVINO_REPO}/install_build_dependencies.sh # default-jdk - Java API - # libssl1.1 - 'python3 -m pip' in self-hosted runner - apt install --assume-yes --no-install-recommends default-jdk libssl1.1 + apt install --assume-yes --no-install-recommends default-jdk - name: Install sccache uses: mozilla-actions/sccache-action@v0.0.3 with: version: "v0.5.4" - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Setup pip cache dir - run: | - PIP_VER=$(python3 -c "import pip; print(pip.__version__)") - echo "Using pip version: ${PIP_VER}" - echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV - - name: Get pip cache info - run: | - echo "Cache size: " - du -h -d2 ${PIP_CACHE_DIR} - echo "Cache info: " - python -m pip cache info - continue-on-error: true + version: ${{ env.PYTHON_VERSION }} + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' + self-hosted-runner: 'true' - name: Install python dependencies run: | @@ -348,21 +338,23 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - - name: Install 'actions/setup-python@v4' dependencies - run: apt-get update && apt-get install -y libssl1.1 + - name: Install OpenVINO dependencies + run: ${INSTALL_DIR}/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -y - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Setup pip cache dir - run: | - PIP_VER=$(python3 -c "import pip; print(pip.__version__)") - echo "Using pip version: ${PIP_VER}" - echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' - - name: Install OpenVINO dependencies - run: ${INSTALL_DIR}/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -y + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Build cpp samples - GCC run: ${INSTALL_DIR}/samples/cpp/build_samples.sh -i ${INSTALL_DIR} -b ${BUILD_DIR}/cpp_samples @@ -427,7 +419,6 @@ jobs: - TEST_TYPE: 'OP' - TEST_TYPE: 'API' env: - DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests CONFORMANCE_TOOLS_DIR: ${{ github.workspace }}/install/tests/functional_test_utils/layer_tests_summary @@ -435,7 +426,6 @@ jobs: TEST_DEVICE: 'CPU' steps: - - name: Create Directories run: | mkdir -p ${CONFORMANCE_ARTIFACTS_DIR} @@ -465,17 +455,25 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install Dependencies run: | sudo -E ${INSTALL_DIR}/install_dependencies/install_openvino_dependencies.sh -c=core -y - # Needed for downloading IRs from storage.openvinotoolkit with Python urllib - sudo apt-get update && sudo apt-get install --assume-yes --no-install-recommends ca-certificates - python3 -m pip install -r ${CONFORMANCE_TOOLS_DIR}/requirements.txt # @@ -533,23 +531,25 @@ jobs: ONNX_RUNTIME_BUILD_DIR: /__w/openvino/openvino/onnxruntime/build steps: - - name: Fetch install_build_dependencies.sh + - name: Fetch install_build_dependencies.sh and setup_python action uses: actions/checkout@v4 with: sparse-checkout: | install_build_dependencies.sh + .github/actions/setup_python/action.yml sparse-checkout-cone-mode: false path: ${{ env.OPENVINO_REPO }} - ref: 'master' - name: Install git run: | apt-get update apt-get install --assume-yes --no-install-recommends git ca-certificates - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' # # Initialize OpenVINO @@ -910,19 +910,20 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - - name: Install 'actions/setup-python@v4' dependencies - run: apt-get update && apt-get install -y libssl1.1 ca-certificates - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 with: - python-version: ${{ env.PYTHON_VERSION }} - env: - PIP_CACHE_DIR: ${{ env.PIP_CACHE_PATH }} - - name: Setup pip cache dir - run: | - PIP_VER=$(python3 -c "import pip; print(pip.__version__)") - echo "Using pip version: ${PIP_VER}" - echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV - echo "PIP_INSTALL_PATH=${Python_ROOT_DIR}/lib/python${PYTHON_VERSION}/site-packages" >> $GITHUB_ENV + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: ${{ env.OPENVINO_REPO }} + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' - name: Install OpenVINO Python wheels run: | @@ -1118,6 +1119,7 @@ jobs: container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 env: + OPENVINO_REPO: /__w/openvino/openvino/openvino INSTALL_DIR: /__w/openvino/openvino/install INSTALL_TEST_DIR: /__w/openvino/openvino/install/tests PARALLEL_TEST_SCRIPT: /__w/openvino/openvino/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py @@ -1148,12 +1150,19 @@ jobs: - name: Install OpenVINO dependencies run: bash ${INSTALL_DIR}/install_dependencies/install_openvino_dependencies.sh -c=core -y - - name: Install 'actions/setup-python@v4' dependencies - run: apt-get update && apt-get install -y libssl1.1 + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: ${{ env.OPENVINO_REPO }} - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' - name: Install python dependencies for run_parallel.py run: python3 -m pip install -r ${INSTALL_TEST_DIR}/functional_test_utils/layer_tests_summary/requirements.txt @@ -1209,6 +1218,7 @@ jobs: # volumes: # - /mount/caches:/mount/caches env: + OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests @@ -1217,19 +1227,6 @@ jobs: - name: Check sudo run: if [ "$(id -u)" -eq 0 ]; then apt update && apt --assume-yes install sudo; fi - - name: Install 'actions/setup-python@v4' dependencies - run: sudo apt-get update && sudo apt-get install -y libssl1.1 ca-certificates - - - uses: actions/setup-python@v4 - with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Setup pip cache dir - run: | - PIP_VER=$(python3 -c "import pip; print(pip.__version__)") - echo "Using pip version: ${PIP_VER}" - echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV - - name: Download OpenVINO package uses: actions/download-artifact@v3 with: @@ -1252,6 +1249,21 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + - name: Install OpenVINO Python wheels run: python3 -m pip install ${INSTALL_DIR}/tools/openvino-* @@ -1290,6 +1302,7 @@ jobs: # volumes: # - /mount/caches:/mount/caches env: + OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests @@ -1300,8 +1313,6 @@ jobs: - name: Install dependencies run: | - # libssl1.1 - 'python3 -m pip' in self-hosted runner - sudo apt install --assume-yes --no-install-recommends libssl1.1 # install git (required to build pip deps from the sources) # install 'g++' to build 'detectron2' and 'natten' wheels sudo apt-get install --assume-yes --no-install-recommends g++ git ca-certificates @@ -1327,14 +1338,20 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${INSTALL_DIR} popd - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 with: - python-version: ${{ env.PYTHON_VERSION }} - - name: Setup pip cache dir - run: | - PIP_VER=$(python3 -c "import pip; print(pip.__version__)") - echo "Using pip version: ${PIP_VER}" - echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install OpenVINO Python wheels run: python3 -m pip install ${INSTALL_DIR}/tools/openvino-* @@ -1398,6 +1415,15 @@ jobs: SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release steps: + + - name: Fetch install_build_dependencies.sh + uses: actions/checkout@v4 + with: + sparse-checkout: | + install_build_dependencies.sh + sparse-checkout-cone-mode: false + path: ${{ env.OPENVINO_REPO }} + - name: Install Prerequisites run: apt update && apt install -y git ca-certificates @@ -1423,21 +1449,6 @@ jobs: tar -xzf openvino_developer_package.tar.gz -C ${INSTALL_DIR} popd - # TODO: replace with sparse checkout below - - name: Clone OpenVINO - uses: actions/checkout@v4 - with: - path: ${{ env.OPENVINO_REPO }} - - - name: Fetch install_build_dependencies.sh - if: ${{ 'false' }} - uses: actions/checkout@v4 - with: - sparse-checkout: | - install_build_dependencies.sh - sparse-checkout-cone-mode: false - path: ${{ env.OPENVINO_REPO }} - - name: Clone OpenVINO Contrib uses: actions/checkout@v4 with: diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 7fac5d9a1bde98..a264bea8436ee3 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -100,15 +100,13 @@ jobs: with: version: "v0.5.4" - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Setup pip cache dir - run: | - PIP_VER=$(python3 -c "import pip; print(pip.__version__)") - echo "Using pip version: ${PIP_VER}" - echo "PIP_CACHE_DIR=${PIP_CACHE_PATH}/${PIP_VER}" >> $GITHUB_ENV + version: ${{ env.PYTHON_VERSION }} + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' + self-hosted-runner: 'true' - name: Install python dependencies run: | diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 86bee3e82e8df5..883be21828b6f6 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -88,9 +88,12 @@ jobs: - name: Install build dependencies run: brew install coreutils ninja scons - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install python dependencies run: | @@ -210,11 +213,13 @@ jobs: machine: 'macos-13-xlarge' runs-on: ${{ matrix.machine }} env: + OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests BUILD_DIR: ${{ github.workspace }}/build steps: + # # Initialize OpenVINO # @@ -244,9 +249,20 @@ jobs: - name: Install dependencies run: brew install coreutils - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Build cpp samples run: ${INSTALL_DIR}/samples/cpp/build_samples.sh -i ${INSTALL_DIR} -b ${BUILD_DIR}/cpp_samples @@ -551,9 +567,12 @@ jobs: # Dependencies # - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Download OpenVINO package uses: actions/download-artifact@v3 @@ -623,9 +642,9 @@ jobs: - name: MO Python API Tests run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH - # TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' - source ${INSTALL_DIR}/setupvars.sh + + # Used for 'test_utils' installed in '/python/openvino/test_utils' + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/python/openvino/test_utils:${{ env.INSTALL_TEST_DIR }}/python:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests/ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml env: @@ -635,8 +654,9 @@ jobs: - name: OVC Python API Tests run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - # TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' - source ${{ env.INSTALL_DIR }}/setupvars.sh + + # Used for 'test_utils' installed in '/python/openvino/test_utils' + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/python/openvino/test_utils:${{ env.INSTALL_TEST_DIR }}/python:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml env: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index d57d9f58b59786..7e0d840ac8c7af 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -73,9 +73,12 @@ jobs: # Dependencies # - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install python dependencies run: | @@ -418,8 +421,10 @@ jobs: run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - :: TODO: remove setupvars.bat from here; currently, it's used for 'test_utils' installed in '/python/openvino' - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml + :: Used for 'test_utils' installed in '\python\openvino\test_utils' + set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\python\openvino\test_utils;${{ env.INSTALL_TEST_DIR }}\python;%PYTHONPATH% + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 @@ -429,8 +434,10 @@ jobs: run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - :: TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml + :: Used for 'test_utils' installed in '\python\openvino\test_utils' + set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\python\openvino\test_utils;${{ env.INSTALL_TEST_DIR }}\python;%PYTHONPATH% + + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 976daa18272796..0304cbfe92bf7e 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -75,13 +75,30 @@ jobs: # Dependencies # - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install build dependencies run: choco install --no-progress ninja + - name: Install python dependencies + run: | + # For running ONNX frontend unit tests + python3 -m pip install --force-reinstall -r ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests/requirements.txt + + # For running TensorFlow frontend unit tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/frontends/tensorflow/tests/requirements.txt + + # For running TensorFlow Lite frontend unit tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/frontends/tensorflow_lite/tests/requirements.txt + + # For running Paddle frontend unit tests + python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/frontends/paddle/tests/requirements.txt + # # Build # @@ -285,9 +302,12 @@ jobs: - name: Extract OpenVINO tests package run: Expand-Archive ${{ env.INSTALL_TEST_DIR }}/openvino_tests.zip -DestinationPath "${{ env.INSTALL_TEST_DIR }}" - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install python dependencies for run_parallel.py run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/layer_tests_summary/requirements.txt From e6ff5edc3dcbc6821550e73eb24585fab96ec12a Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Wed, 1 Nov 2023 15:42:25 +0400 Subject: [PATCH 171/275] [CPU] Empty tensor used with a custom op leads to CPU plugin exception. (#19733) --- src/plugins/intel_cpu/src/node.h | 1 + src/plugins/intel_cpu/src/nodes/reference.cpp | 20 +++++++++++++++---- src/plugins/intel_cpu/src/nodes/reference.h | 1 + .../subgraph_tests/src/custom_op_scalar.cpp | 1 + 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 4b6fa3a87f72dd..dba25591386bb2 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -42,6 +42,7 @@ #include "nodes/executors/executor.hpp" #define THROW_CPU_NODE_ERR(...) OPENVINO_THROW(getTypeStr(), " node with name '", getName(), "' ", __VA_ARGS__) +#define CPU_NODE_ASSERT(condition, ...) OPENVINO_ASSERT(condition, getTypeStr(), " node with name '", getName(), "' ", __VA_ARGS__) namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index 091e31813125cf..0ba82bacec5769 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -108,22 +108,34 @@ bool Reference::needShapeInfer() const { ov::TensorVector Reference::prepareInputs() const { ov::TensorVector inputs; - for (size_t i = 0; i < inputShapes.size(); i++) { + for (size_t i = 0lu; i < inputShapes.size(); i++) { void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().getData(); ov::Shape shape = ovCoreNode->get_input_partial_shape(i).rank().get_length() == 0 ? ov::Shape{} : getParentEdgesAtPort(i)[0]->getMemory().getStaticDims(); - inputs.push_back(ov::Tensor(ovCoreNode->get_input_element_type(i), shape, srcDataPtr)); + + if (std::any_of(shape.begin(), shape.end(), [](const size_t dim) { return dim == 0lu; } )) { + inputs.push_back(ov::Tensor(ovCoreNode->get_input_element_type(i), shape)); + } else { + CPU_NODE_ASSERT(srcDataPtr, "has empty input data on port ", i); + inputs.push_back(ov::Tensor(ovCoreNode->get_input_element_type(i), shape, srcDataPtr)); + } } return inputs; } ov::TensorVector Reference::prepareOutputs() const { ov::TensorVector outputs; - for (size_t i = 0; i < outputShapes.size(); i++) { + for (size_t i = 0lu; i < outputShapes.size(); i++) { void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().getData(); ov::Shape shape = ovCoreNode->get_output_partial_shape(i).rank().get_length() == 0 ? ov::Shape{} : getChildEdgesAtPort(i)[0]->getMemory().getStaticDims(); - outputs.push_back(ov::Tensor(ovCoreNode->get_output_element_type(i), shape, dstDataPtr)); + + if (std::any_of(shape.begin(), shape.end(), [](const size_t dim) { return dim == 0lu; } )) { + outputs.push_back(ov::Tensor(ovCoreNode->get_output_element_type(i), shape)); + } else { + CPU_NODE_ASSERT(dstDataPtr, "has empty output data on port ", i); + outputs.push_back(ov::Tensor(ovCoreNode->get_output_element_type(i), shape, dstDataPtr)); + } } return outputs; } diff --git a/src/plugins/intel_cpu/src/nodes/reference.h b/src/plugins/intel_cpu/src/nodes/reference.h index c2453835229138..9f35f0398e5ce0 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.h +++ b/src/plugins/intel_cpu/src/nodes/reference.h @@ -22,6 +22,7 @@ class Reference : public Node { bool needShapeInfer() const override; bool needPrepareParams() const override { return false; } + bool isExecutable() const override { return true; } void executeDynamicImpl(dnnl::stream strm) override; private: diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp index 15eef344b6ffd2..c108d067a47311 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp @@ -140,6 +140,7 @@ TEST_P(CustomOpScalarCPUTest, CompareWithRefs) { const std::vector inputShapes = { {{}, {{2, 3, 16}}}, + {{2, 3, -1}, {{2, 3, 0}}}, {{}, {{}}} }; From 4084e9acc04973c6090c2033e5b46a79bdc84105 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Wed, 1 Nov 2023 15:02:36 +0100 Subject: [PATCH 172/275] Handle Reshape's special zero in SimplifySecondInputOfReshape (#20785) * Handle Reshape's special zero in SimplifySecondInputOfReshape SimplifySecondInputOfReshape detects ShapeOf->Gather->Concat subgraphs on Reshape's second input and replaces ShapeOf->Gather with a Constant with zero(s). Currently it works only with Reshapes that have special_zero set to true, but it can work for Reshapes with special_zero == false if non-Gather inputs to Concat are Constants and don't contain any zero. Ticket: CVS-123434 * fix no default output --- .../simplify_shape_of_sub_graph.cpp | 31 ++++++++---- .../simplify_second_input_of_reshape_test.cpp | 50 +++++++++++++++++++ 2 files changed, 70 insertions(+), 11 deletions(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp index 5ef33a33326e00..7facf950ee7bd4 100644 --- a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp @@ -200,7 +200,7 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { matcher_pass_callback callback = [=](Matcher& m) { auto node = m.get_match_root(); const auto reshape = as_type_ptr(node); - if (!reshape || reshape->get_special_zero() == false) { + if (!reshape) { return false; } @@ -219,7 +219,7 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { auto check_shape_of_gather = [&](const std::shared_ptr& gather) { auto shape_of = gather->get_input_node_shared_ptr(0); - if (!is_type(shape_of) && !is_type(shape_of)) { + if (!is_type(shape_of)) { return false; } return shape_of->input_value(0) == data; @@ -237,16 +237,15 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { gather_dims_expected_location += concat_input_shape[0]; }; + bool special_zero = reshape->get_special_zero(); + // We need this check to avoid sequences shapeOf -> gather -> concat // that change the arrangement of dimensions in the reshape pattern for (auto& concat_input : new_concat_inputs) { - if (const auto gather = as_type_ptr(concat_input.get_node_shared_ptr())) { - auto indices_constant = as_type_ptr(gather->get_input_node_shared_ptr(1)); - if (!indices_constant || !check_shape_of_gather(gather)) { - update_expected_gather_location(gather); - continue; - } - + auto node = concat_input.get_node_shared_ptr(); + if (ov::is_type(node) && + ov::is_type(node->get_input_node_shared_ptr(1)) && check_shape_of_gather(node)) { + auto indices_constant = as_type_ptr(node->get_input_node_shared_ptr(1)); bool gather_can_be_fused = true; const auto indices = indices_constant->cast_vector(); for (size_t i = 0; i < indices.size(); ++i) { @@ -258,11 +257,21 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { if (gather_can_be_fused) { const size_t num_of_unchanged_dimensions = indices.size(); - const auto subgraph_et = gather->get_input_element_type(0); + const auto subgraph_et = node->get_input_element_type(0); concat_input = v0::Constant::create(subgraph_et, Shape{num_of_unchanged_dimensions}, {0}); gather_folded = true; } } else { + if (!special_zero) { + // If special zero is false - check if other inputs to Concat are Constants. + // If any of those Constants contain zero - return false. + auto constant = as_type_ptr(node); + if (!constant) + return false; + auto values = constant->cast_vector(); + if (std::find(values.begin(), values.end(), 0) != values.end()) + return false; + } update_expected_gather_location(concat_input); } } @@ -275,7 +284,7 @@ pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { new_concat->set_friendly_name(concat->get_friendly_name()); copy_runtime_info(concat, new_concat); - const auto new_reshape = reshape->clone_with_new_inputs({reshape->input_value(0), new_concat}); + const auto new_reshape = std::make_shared(reshape->input_value(0), new_concat, true); new_reshape->set_friendly_name(reshape->get_friendly_name()); copy_runtime_info(reshape, new_reshape); diff --git a/src/common/transformations/tests/common_optimizations/simplify_second_input_of_reshape_test.cpp b/src/common/transformations/tests/common_optimizations/simplify_second_input_of_reshape_test.cpp index 7431174daaa0ae..cd8ca2f1f0a640 100644 --- a/src/common/transformations/tests/common_optimizations/simplify_second_input_of_reshape_test.cpp +++ b/src/common/transformations/tests/common_optimizations/simplify_second_input_of_reshape_test.cpp @@ -611,3 +611,53 @@ TEST_F(TransformationTestsF, SimplifySecondInputOfReshapeTest21) { } comparator.enable(FunctionsComparator::CONST_VALUES); } + +TEST_F(TransformationTestsF, SimplifySecondInputOfReshapeTestFalseSpecialZero) { + PartialShape data_shape{1, 128, 12, 64}; + { + auto data = std::make_shared(element::f32, data_shape); + + auto shape_of = std::make_shared(data); + auto gather_op = gather(shape_of, std::vector{0, 1}); + auto constant = opset7::Constant::create(element::i64, Shape{1}, {768}); + auto concat = std::make_shared(OutputVector{gather_op, constant}, -1); + + auto reshape = std::make_shared(data, concat, false); + model = std::make_shared(NodeVector{reshape}, ParameterVector{data}); + + manager.register_pass(); + } + { + auto data = std::make_shared(element::f32, data_shape); + auto reshape_pattern = opset7::Constant::create(element::i64, Shape{3}, {0, 0, 768}); + auto reshape = std::make_shared(data, reshape_pattern, true); + model_ref = std::make_shared(NodeVector{reshape}, ParameterVector{data}); + } + comparator.enable(FunctionsComparator::ATTRIBUTES); + comparator.enable(FunctionsComparator::CONST_VALUES); +} + +TEST_F(TransformationTestsF, SimplifySecondInputOfReshapeTestFalseSpecialZeroZeroDim) { + PartialShape data_shape{1, 0, 12, 64}; + { + auto data = std::make_shared(element::f32, data_shape); + + auto shape_of = std::make_shared(data); + auto gather_op = gather(shape_of, std::vector{0, 1}); + auto constant = opset7::Constant::create(element::i64, Shape{1}, {768}); + auto concat = std::make_shared(OutputVector{gather_op, constant}, -1); + + auto reshape = std::make_shared(data, concat, false); + model = std::make_shared(NodeVector{reshape}, ParameterVector{data}); + + manager.register_pass(); + } + { + auto data = std::make_shared(element::f32, data_shape); + auto reshape_pattern = opset7::Constant::create(element::i64, Shape{3}, {0, 0, 768}); + auto reshape = std::make_shared(data, reshape_pattern, true); + model_ref = std::make_shared(NodeVector{reshape}, ParameterVector{data}); + } + comparator.enable(FunctionsComparator::ATTRIBUTES); + comparator.enable(FunctionsComparator::CONST_VALUES); +} From eea49f3c9e6bba5463460fdc126c2df38a4a5215 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Wed, 1 Nov 2023 18:20:22 +0000 Subject: [PATCH 173/275] [CI] [GHA] Remove full cloning of the OV repository in the Python unittests job in the Win and Mac workflows (#20810) * rm full cloning of the OV repo in win and mac workflows * skip tests on mac * increase timeout for win C++ tests; skip more mac tests * rm triggers --- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 56 +++++++++++-------- .github/workflows/windows.yml | 36 +++++------- tests/layer_tests/pytorch_tests/test_loop.py | 4 +- .../test_tf_NonMaxSupression.py | 4 ++ 5 files changed, 55 insertions(+), 47 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 83b27ef6ab0e45..0e3d5feba1a8af 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -978,7 +978,7 @@ jobs: # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately python3 -m pytest -sv ${INSTALL_TEST_DIR}/onnx -k 'not cuda' \ --junitxml=${INSTALL_TEST_DIR}/TEST-onnx_frontend.xml \ - --ignore=${INSTALL_TEST_DIR}/onnx/test_python/test_zoo_models.py \ + --ignore=${INSTALL_TEST_DIR}/onnx/test_python/test_zoo_models.py - name: OVC unit tests run: python3 -m pytest -s ${INSTALL_TEST_DIR}/ovc/unit_tests --junitxml=${INSTALL_TEST_DIR}/TEST-OpenVinoConversion.xml diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 883be21828b6f6..a69f460bcee976 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -557,15 +557,13 @@ jobs: INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - - name: Clone OpenVINO + - name: Fetch setup_python action uses: actions/checkout@v4 with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false path: 'openvino' - submodules: 'true' - - # - # Dependencies - # - name: Setup Python ${{ env.PYTHON_VERSION }} uses: ./openvino/.github/actions/setup_python @@ -574,6 +572,10 @@ jobs: should-setup-pip-paths: 'false' self-hosted-runner: 'false' + # + # Dependencies + # + - name: Download OpenVINO package uses: actions/download-artifact@v3 with: @@ -596,14 +598,6 @@ jobs: tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} popd - - name: Install Python API tests dependencies - run: | - # For torchvision to OpenVINO preprocessing converter - python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt - - # TODO: replace with Python API tests requirements - python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_dev.txt - - name: Install OpenVINO Python wheels run: | # Install the core OV wheel @@ -621,6 +615,14 @@ jobs: python3 -m pip install $ov_dev_wheel_name[$extras_to_install] popd + - name: Install Python API tests dependencies + run: | + # For torchvision to OpenVINO preprocessing converter + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/python/preprocess/torchvision/requirements.txt + + # TODO: replace with Python API tests requirements + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/mo/requirements_dev.txt + - name: Python API 1.0 Tests run: | python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/pyngraph \ @@ -684,7 +686,7 @@ jobs: run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/mo:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx.xml env: @@ -695,7 +697,7 @@ jobs: run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/mo:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_fe.xml env: @@ -705,7 +707,8 @@ jobs: if: ${{ 'false' }} # Ticket: 123322 run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/mo:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_fe.xml env: @@ -714,14 +717,16 @@ jobs: - name: TensorFlow 1 Layer Tests - Legacy FE run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/mo:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_Roll.xml - name: TensorFlow 2 Layer Tests - Legacy FE run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/mo:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/test_tf2_keras_activation.py \ --ir_version=11 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_Activation.xml -k "sigmoid" @@ -732,7 +737,8 @@ jobs: - name: TensorFlow Lite Layer Tests - TFL FE run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/mo:$PYTHONPATH python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_lite_tests/ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tfl_fe.xml env: @@ -743,14 +749,16 @@ jobs: if: ${{ 'false' }} # Ticket: 123325 run: | # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately - python3 -m pytest -sv ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests -k 'not cuda' \ + python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/onnx -k 'not cuda' \ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx_frontend.xml \ - --ignore=${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests/test_python/test_zoo_models.py + --ignore=${{ env.INSTALL_TEST_DIR }}/onnx/test_python/test_zoo_models.py - name: Python Frontend tests run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - export PYTHONPATH=${{ env.OPENVINO_REPO }}/tools/mo/:${{ env.LAYER_TESTS_INSTALL_DIR }}:$PYTHONPATH + + export PYTHONPATH=${{ env.INSTALL_TEST_DIR }}/mo:$PYTHONPATH + # to allow 'libtest_builtin_extensions.so' to find 'libopenvino_onnx_frontend.so' source ${{ env.INSTALL_DIR }}/setupvars.sh @@ -758,7 +766,7 @@ jobs: # TODO: install to 'tests' component via cpack - name: OVC unit tests - run: python3 -m pytest -s ${{ env.OPENVINO_REPO }}/tools/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml + run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml - name: Upload Test Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 7e0d840ac8c7af..e36f2f0204a489 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -295,23 +295,10 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}" popd - - name: Clone OpenVINO - uses: actions/checkout@v4 - with: - path: 'openvino' - - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} - - name: Install Python API tests dependencies - run: | - # For torchvision to OpenVINO preprocessing converter - python3 -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt - - # TODO: replace with Python API tests requirements - python3 -m pip install -r ${{ env.OPENVINO_REPO }}/tools/mo/requirements_dev.txt - - name: Install OpenVINO Python wheels run: | # Find and install the core OV wheel @@ -322,6 +309,14 @@ jobs: $ovDevWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino_dev*.whl | % { $_.FullName } python3 -m pip install "$ovDevWheelPath[mxnet,caffe,kaldi,onnx,tensorflow2,pytorch]" + - name: Install Python API tests dependencies + run: | + # For torchvision to OpenVINO preprocessing converter + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/python/preprocess/torchvision/requirements.txt + + # TODO: replace with Python API tests requirements + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/mo/requirements_dev.txt + - name: Python API 1.0 Tests shell: cmd run: | @@ -354,7 +349,7 @@ jobs: python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt :: requires 'unit_tests' from 'tools/mo' - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% + set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\mo;%PYTHONPATH% python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml env: TEST_DEVICE: CPU @@ -366,7 +361,7 @@ jobs: python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt :: requires 'unit_tests' from 'tools/mo' - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% + set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\mo;%PYTHONPATH% python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_fe.xml env: TEST_DEVICE: CPU @@ -378,7 +373,7 @@ jobs: python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt :: requires 'unit_tests' from 'tools/mo' - set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% + set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\mo;%PYTHONPATH% python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/ --use_new_frontend -m precommit_tf_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_fe.xml env: @@ -412,9 +407,9 @@ jobs: shell: cmd run: | :: Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately - python3 -m pytest ${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests -k "not cuda" ^ + python3 -m pytest ${{ env.INSTALL_TEST_DIR }}/onnx -k "not cuda" ^ --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx_frontend.xml ^ - --ignore=${{ env.OPENVINO_REPO }}/src/frontends/onnx/tests/test_python/test_zoo_models.py + --ignore=${{ env.INSTALL_TEST_DIR }}/onnx/test_python/test_zoo_models.py - name: MO Python API Tests shell: cmd @@ -449,10 +444,9 @@ jobs: call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/py_frontend_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_py_fontend.xml - # TODO: install to 'tests' component via cpack - name: OVC unit tests shell: cmd - run: python3 -m pytest -s ${{ env.OPENVINO_REPO }}/tools/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml + run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml - name: Upload Test Results uses: actions/upload-artifact@v3 @@ -465,7 +459,7 @@ jobs: CXX_Unit_Tests: name: C++ unit tests needs: Build - timeout-minutes: 15 + timeout-minutes: 25 defaults: run: shell: pwsh diff --git a/tests/layer_tests/pytorch_tests/test_loop.py b/tests/layer_tests/pytorch_tests/test_loop.py index 8c91833f437b18..c413d24854cae8 100644 --- a/tests/layer_tests/pytorch_tests/test_loop.py +++ b/tests/layer_tests/pytorch_tests/test_loop.py @@ -1,6 +1,6 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import os +import platform import pytest import numpy as np @@ -39,6 +39,8 @@ def forward(self, x): @pytest.mark.parametrize("s,n", [([1, 1024, 3], 512), ([1, 512, 3], 128)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122715') def test_loop_alias(self, s, n, ie_device, precision, ir_version): self.shape = s self._test(*self.create_model(n), ie_device, precision, diff --git a/tests/layer_tests/tensorflow_tests/test_tf_NonMaxSupression.py b/tests/layer_tests/tensorflow_tests/test_tf_NonMaxSupression.py index 35c8eacda3e03a..32f6d19aea048c 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_NonMaxSupression.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_NonMaxSupression.py @@ -1,6 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import platform + import numpy as np import pytest import tensorflow as tf @@ -83,6 +85,8 @@ def create_nms_net(self, test_params: dict, with_scores: bool = False): @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_tf_fe + @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', + reason='Ticket - 122716') def test_NonMaxSuppression(self, test_params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): if ie_device == 'GPU': From 88c31717428f2187578dac57d0fb59b9260dd59b Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 2 Nov 2023 13:46:03 +0800 Subject: [PATCH 174/275] [CPU] Fix avx2 gather of bfloat16 (#20683) --- .../src/nodes/kernels/x64/gather_uni_kernel.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp index dbc7aa08b79770..51a94c60a4faf6 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/gather_uni_kernel.cpp @@ -744,8 +744,6 @@ void jitUniGatherKernel::process16b(bool isShortIdx, bool blocked) { mov(regAux1, reinterpret_cast(shufMask16bitUni)); uni_vmovups(vShufMask, ptr[regAux1]); - mov(regAux1, reinterpret_cast(permMask16bitUni)); - uni_vmovups(vPermMask, ptr[regAux1]); // First iteration shiftIdxAndGather(vmmAuxContainer, isShortIdx, false, blocked); @@ -755,6 +753,9 @@ void jitUniGatherKernel::process16b(bool isShortIdx, bool blocked) { vpshufb(vmmAuxContainer[0], vmmAuxContainer[2], vShufMask); vshufps(vmmAuxContainer[0], vBuff0, vmmAuxContainer[0], 0x44); + // vPermMask(vmm1) is override in shiftIdxAndGather, load the mask here for correctness + mov(regAux1, reinterpret_cast(permMask16bitUni)); + uni_vmovups(vPermMask, ptr[regAux1]); vpermd(vmmAuxContainer[0], vPermMask, vmmAuxContainer[0]); uni_vmovups(ptr[regDst], vmmAuxContainer[0]); @@ -774,6 +775,11 @@ void jitUniGatherKernel::process16b(bool isShortIdx, bool blocked) { vpshufb(vmmAuxContainer[0], vmmAuxContainer[2], vShufMask); vshufps(vmmAuxContainer[0], vBuff0, vmmAuxContainer[0], 0x44); + if (isa == x64::avx2) { + // Register vPermMask is invalidated by shiftIdxAndGather and must be initialized again. + mov(regAux1, reinterpret_cast(permMask16bitUni)); + uni_vmovups(vPermMask, ptr[regAux1]); + } vpermd(vmmAuxContainer[0], vPermMask, vmmAuxContainer[0]); uni_vmovups(ptr[regDst], vmmAuxContainer[0]); From 2afb64b9eabee4a01fbd11c61428c1fbf0503238 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 2 Nov 2023 09:54:19 +0400 Subject: [PATCH 175/275] [GPU] Refactor KV cache test (#20802) --- .../subgraph_tests/dynamic/kv_cache.cpp | 56 +++++++++---------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp index a32e97d8e8e0fc..a1d90e0e239582 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -125,12 +125,13 @@ TEST(KVCacheTest, smoke_multipleIterations) { auto output0 = model->get_results().at(0); auto output1 = model->get_results().at(1); - auto infer_request = compiled_model.create_infer_request(); - auto input0_tensor_remote_io = infer_request.get_tensor(input0); - auto input1_tensor_remote_io = infer_request.get_tensor(input1); - auto input2_tensor_remote_io = infer_request.get_tensor(input2); - auto output0_tensor_remote_io = infer_request.get_tensor(output0); - auto output1_tensor_remote_io = infer_request.get_tensor(output1); + auto get_ref_results = [&model, &input0, &input1, &input2](const ov::Tensor& kv_cache, const ov::Tensor& new_token_data, const ov::Tensor& matmul_data) { + auto ref_model = model->clone(); + ov::Tensor kv_cache_copy(kv_cache.get_element_type(), kv_cache.get_shape()); + kv_cache.copy_to(kv_cache_copy); + ngraph::helpers::resize_function(ref_model, {kv_cache_copy.get_shape(), new_token_data.get_shape(), matmul_data.get_shape()}); + return ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache_copy}, {input1, new_token_data}, {input2, matmul_data}}); + }; auto compare_tensors = [&model](const std::vector expected, const std::vector& actual) { ASSERT_EQ(expected.size(), actual.size()); @@ -154,28 +155,36 @@ TEST(KVCacheTest, smoke_multipleIterations) { } }; + auto infer_request = compiled_model.create_infer_request(); + auto kv_cache_input = infer_request.get_tensor(output0); + auto matmul_out = infer_request.get_tensor(output1); + auto new_token_input = infer_request.get_tensor(input1); + auto matmul_input = infer_request.get_tensor(input2); + + infer_request.set_tensor(input0, kv_cache_input); + infer_request.set_tensor(input1, new_token_input); + infer_request.set_tensor(input2, matmul_input); + { - const ov::Shape kv_cache_size_initial = {batch, n_heads, cache_size, n_features}; const ov::Shape new_token_size_initial = {batch, context_size, n_heads, n_features}; + const ov::Shape kv_cache_size_initial = {batch, n_heads, cache_size, n_features}; const ov::Shape matmul_in_size_initial = {batch, n_heads, context_size, context_size}; auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size_initial); auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_initial); - auto kv_cache_input = infer_request.get_tensor(input0); kv_cache_input.set_shape(kv_cache_size_initial); + new_token_input.set_shape(new_token_data.get_shape()); + matmul_input.set_shape(matmul_data.get_shape()); - auto ref_model = model->clone(); - ngraph::helpers::resize_function(ref_model, {kv_cache_input.get_shape(), new_token_data.get_shape(), matmul_data.get_shape()}); - auto results = ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache_input}, {input1, new_token_data}, {input2, matmul_data}}); + new_token_data.copy_to(new_token_input); + matmul_data.copy_to(matmul_input); - infer_request.set_tensor(input0, kv_cache_input); - infer_request.set_tensor(input1, new_token_data); - infer_request.set_tensor(input2, matmul_data); + auto ref_results = get_ref_results(kv_cache_input, new_token_data, matmul_data); infer_request.infer(); - compare_tensors(results, {infer_request.get_tensor(output0), infer_request.get_tensor(output1)}); + compare_tensors(ref_results, {kv_cache_input, matmul_out}); cache_size += context_size; } @@ -188,29 +197,16 @@ TEST(KVCacheTest, smoke_multipleIterations) { ov::Shape matmul_in_size_loop = {batch, n_heads, input_tokens, context_length}; auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size); auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_loop); + auto ref_results = get_ref_results(kv_cache_input, new_token_data, matmul_data); - auto kv_cache_input = infer_request.get_tensor(output0); - auto kv_shape = kv_cache_input.get_shape(); - - auto ref_model = model->clone(); - ngraph::helpers::resize_function(ref_model, {kv_shape, new_token_data.get_shape(), matmul_data.get_shape()}); - auto results = ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache_input}, {input1, new_token_data}, {input2, matmul_data}}); - - auto new_token_input = infer_request.get_tensor(input1); new_token_input.set_shape(new_token_data.get_shape()); - auto matmul_input = infer_request.get_tensor(input2); matmul_input.set_shape(matmul_data.get_shape()); - new_token_data.copy_to(new_token_input); matmul_data.copy_to(matmul_input); - infer_request.set_tensor(input0, kv_cache_input); - infer_request.set_tensor(input1, new_token_input); - infer_request.set_tensor(input2, matmul_input); - infer_request.infer(); - compare_tensors(results, {infer_request.get_tensor(output0), infer_request.get_tensor(output1)}); + compare_tensors(ref_results, {kv_cache_input, matmul_out}); } } From 3ff85ae70a1fddafc1e31a640a09a0a42083f9ff Mon Sep 17 00:00:00 2001 From: Gorokhov Dmitriy Date: Thu, 2 Nov 2023 11:33:37 +0400 Subject: [PATCH 176/275] [CPU] Fixed port mismatch in Eltwise fusion graph optimization (#20782) --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index cf79a5b79f299d..fe8dae30e84405 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -2081,7 +2081,9 @@ void GraphOptimizer::FuseEltwiseAndSimple(Graph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentNode->inputShapes.push_back(parent->getOutputShapeAtPort(inNum)); + if (parentNode->inputShapes.size() < static_cast(outNum + 1)) + parentNode->inputShapes.resize(outNum + 1); + parentNode->inputShapes[outNum] = parent->getOutputShapeAtPort(inNum); } } From 4a6e6e64c552fe85b874863c3b8223ebb801aba7 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Thu, 2 Nov 2023 12:46:57 +0400 Subject: [PATCH 177/275] [TF FE] Fix conversion of FILM of keras.Model format (#20825) Signed-off-by: Kazantsev, Roman --- src/frontends/tensorflow/src/input_model.hpp | 4 +- .../tensorflow/src/op/partitioned_call.cpp | 15 +++++- src/frontends/tensorflow/src/tf_utils.cpp | 23 +++++++-- src/frontends/tensorflow/src/tf_utils.hpp | 3 +- .../tensorflow/src/translate_session.cpp | 47 ++++++++++++++----- .../tf_hub_tests/test_tf_hub_api_notebooks.py | 11 ++++- 6 files changed, 82 insertions(+), 21 deletions(-) diff --git a/src/frontends/tensorflow/src/input_model.hpp b/src/frontends/tensorflow/src/input_model.hpp index a95a4447cc03c3..64d60b48fb8ec2 100644 --- a/src/frontends/tensorflow/src/input_model.hpp +++ b/src/frontends/tensorflow/src/input_model.hpp @@ -24,11 +24,9 @@ class InputModel : public ov::frontend::InputModel { class InputModelTFImpl; std::shared_ptr _impl; - std::vector get_input_names() const; std::vector get_output_names() const; std::vector> get_op_places() const; std::map> get_tensor_values() const; - std::shared_ptr get_body_input_model(const std::string& body_input_model_name) const; public: explicit InputModel(const GraphIterator::Ptr& graph_iterator, @@ -58,6 +56,8 @@ class InputModel : public ov::frontend::InputModel { std::shared_ptr get_checkpoint_v1_reader() const; std::map> get_tensor_places() const; + std::shared_ptr get_body_input_model(const std::string& body_input_model_name) const; + std::vector get_input_names() const; }; } // namespace tensorflow diff --git a/src/frontends/tensorflow/src/op/partitioned_call.cpp b/src/frontends/tensorflow/src/op/partitioned_call.cpp index 635ea1a802d9b5..70b9557c911176 100644 --- a/src/frontends/tensorflow/src/op/partitioned_call.cpp +++ b/src/frontends/tensorflow/src/op/partitioned_call.cpp @@ -40,9 +40,22 @@ OutputVector translate_partitioned_call_op(const NodeContext& node) { "[TensorFlow Frontend] Internal error or incorrect input model: body graph is not found for " + operation_type + "."); + // retrieve input_names of the body graph + auto input_model = dynamic_pointer_cast(translate_session->get_input_model()); + TENSORFLOW_OP_VALIDATION( + node, + input_model, + "[TensorFlow Frontend] internal error: input_model must be of tensorflow::InputModel type"); + auto body_input_model = input_model->get_body_input_model(operation_type); + TENSORFLOW_OP_VALIDATION(node, + body_input_model, + "[TensorFlow Frontend] internal error or inconsistent model: body graph " + + operation_type + " is not found in the graph"); + auto body_input_names = body_input_model->get_input_names(); + // inject the body graph into the parent graph OutputVector ov_outputs; - inject_body_model(body_model, operation_type, ov_inputs, ov_outputs); + inject_body_model(body_model, operation_type, ov_inputs, ov_outputs, body_input_names); // set output tensor names for (size_t idx = 0; idx < ov_outputs.size(); ++idx) { diff --git a/src/frontends/tensorflow/src/tf_utils.cpp b/src/frontends/tensorflow/src/tf_utils.cpp index e298f49f92889f..68ddf3677a6189 100644 --- a/src/frontends/tensorflow/src/tf_utils.cpp +++ b/src/frontends/tensorflow/src/tf_utils.cpp @@ -456,19 +456,34 @@ shared_ptr create_loop_for_tf_while(const std::string& while_node_name void inject_body_model(std::shared_ptr ov_model_to_inject, const std::string& operation_type, const ov::OutputVector& ov_inputs, - ov::OutputVector& ov_outputs) { + ov::OutputVector& ov_outputs, + const std::vector& ov_input_names) { ov_outputs.clear(); auto body_parameters = ov_model_to_inject->get_parameters(); - FRONT_END_GENERAL_CHECK(body_parameters.size() == ov_inputs.size(), + // some external inputs can be skipped if some body graph inputs turn to be Constant nodes + FRONT_END_GENERAL_CHECK(body_parameters.size() <= ov_inputs.size(), "[TensorFlow Error] Internal error or incorrect input models: number of " "inputs and arguments to the function " + operation_type + " do not match."); for (size_t param_ind = 0; param_ind < body_parameters.size(); ++param_ind) { + auto param_name = body_parameters[param_ind]->get_friendly_name(); + // find suitable index of external input + size_t ext_found_ind = param_ind; + if (ov_input_names.size() > 0) { + // only used for PartitionedCall translator + for (size_t ext_input_ind = 0; ext_input_ind < ov_input_names.size(); ++ext_input_ind) { + if (ov_input_names[ext_input_ind] == param_name) { + ext_found_ind = ext_input_ind; + break; + } + } + } + auto orig_type = body_parameters[param_ind]->get_element_type(); // avoid not needed tensor names from body graph Parameter node after replacing body_parameters[param_ind]->output(0).set_names({}); - body_parameters[param_ind]->output(0).replace(ov_inputs[param_ind]); - if (auto ext_parameter = as_type_ptr(ov_inputs[param_ind].get_node_shared_ptr())) { + body_parameters[param_ind]->output(0).replace(ov_inputs[ext_found_ind]); + if (auto ext_parameter = as_type_ptr(ov_inputs[ext_found_ind].get_node_shared_ptr())) { // save type of a Parameter as converted in the body // this is important if the external conversion extension is applied to body graph node // with setting its own type diff --git a/src/frontends/tensorflow/src/tf_utils.hpp b/src/frontends/tensorflow/src/tf_utils.hpp index 861fb56f552685..82c86c51c9ebc3 100644 --- a/src/frontends/tensorflow/src/tf_utils.hpp +++ b/src/frontends/tensorflow/src/tf_utils.hpp @@ -114,7 +114,8 @@ std::shared_ptr create_loop_for_tf_while(const std::string& wh void inject_body_model(std::shared_ptr ov_model_to_inject, const std::string& operation_type, const ov::OutputVector& ov_inputs, - ov::OutputVector& ov_outputs); + ov::OutputVector& ov_outputs, + const std::vector& ov_input_names = {}); } // namespace tensorflow } // namespace frontend } // namespace ov diff --git a/src/frontends/tensorflow/src/translate_session.cpp b/src/frontends/tensorflow/src/translate_session.cpp index 4038995c6cb693..ba88b0be30b4e2 100644 --- a/src/frontends/tensorflow/src/translate_session.cpp +++ b/src/frontends/tensorflow/src/translate_session.cpp @@ -26,7 +26,8 @@ std::vector reorder_ops_by_names(const std::vector& names, const // in case unspecified names, return the initial order of operations return ops; } - FRONT_END_GENERAL_CHECK(names.size() == ops.size(), + // some body graph input can turn to be a constant node + FRONT_END_GENERAL_CHECK(names.size() >= ops.size(), "[TensorFlow Frontend] Internal error: cannot perform reordering of operations. The number " "of names mismatches the number of operations."); std::vector resulted_ops(ops.size(), nullptr); @@ -700,18 +701,40 @@ std::shared_ptr TranslateSession::get_body_ov_model(const std::string // set input shapes and types for InputModel of the body graph // it allows to get more optimized model after the conversion, // for example, to get less sub-graphs with ShapeOf and Convert operations - auto inputs = body_input_model->get_inputs(); - size_t num_inputs = inputs.size(); - FRONT_END_GENERAL_CHECK(num_inputs == ov_inputs.size(), - "[TensorFlow Frontend] internal error: a number of external and internal inputs for a " - "body graph mismatch"); - for (size_t input_ind = 0; input_ind < num_inputs; ++input_ind) { - auto input_place = inputs[input_ind]; - if (input_types[input_ind].is_static()) { - body_input_model->set_element_type(input_place, input_types[input_ind]); + // input names set an order of body graph inputs + auto input_names = body_input_model->get_input_names(); + auto body_inputs = body_input_model->get_inputs(); + size_t int_num_inputs = body_inputs.size(); + size_t ext_num_inputs = ov_inputs.size(); + FRONT_END_GENERAL_CHECK(int_num_inputs <= ext_num_inputs, + "[TensorFlow Frontend] internal error: a number of external and " + "internal inputs for a body graph mismatch"); + FRONT_END_GENERAL_CHECK(input_names.size() == ext_num_inputs, + "[TensorFlow Frontend] internal error: a number of body graph names and external " + "inputs to body must match"); + for (size_t input_ind = 0; input_ind < ext_num_inputs; ++input_ind) { + auto required_input_name = input_names[input_ind]; + bool is_found_body_input = false; + size_t body_found_ind = 0; + for (size_t internal_ind = 0; internal_ind < int_num_inputs; ++internal_ind) { + auto body_input_place = body_inputs[internal_ind]; + auto body_input_names = body_input_place->get_names(); + if (std::find(body_input_names.begin(), body_input_names.end(), required_input_name) != + body_input_names.end()) { + is_found_body_input = true; + body_found_ind = internal_ind; + break; + } } - if (input_shapes[input_ind].rank().is_static()) { - body_input_model->set_partial_shape(input_place, input_shapes[input_ind]); + if (is_found_body_input) { + auto body_input_place = body_inputs[body_found_ind]; + // if body input with required name is found, set its type + if (input_types[input_ind].is_static()) { + body_input_model->set_element_type(body_input_place, input_types[input_ind]); + } + if (input_shapes[input_ind].rank().is_static()) { + body_input_model->set_partial_shape(body_input_place, input_shapes[input_ind]); + } } } diff --git a/tests/model_hub_tests/tf_hub_tests/test_tf_hub_api_notebooks.py b/tests/model_hub_tests/tf_hub_tests/test_tf_hub_api_notebooks.py index fe006d3860f56f..50c6f54f7c6c40 100644 --- a/tests/model_hub_tests/tf_hub_tests/test_tf_hub_api_notebooks.py +++ b/tests/model_hub_tests/tf_hub_tests/test_tf_hub_api_notebooks.py @@ -24,6 +24,15 @@ def load_model(self, model_name, model_link): softmax = tf.keras.layers.Dense(20, activation='softmax')(feature_vector) classification_model = tf.keras.Model(inputs=[image], outputs=[softmax]) return classification_model + elif model_name == 'film': + inputs = dict( + x0=tf.keras.layers.Input(shape=(200, 200, 3)), + x1=tf.keras.layers.Input(shape=(200, 200, 3)), + time=tf.keras.layers.Input(shape=(1)), + ) + film_layer = hub.KerasLayer("https://tfhub.dev/google/film/1")(inputs) + film_model = tf.keras.Model(inputs=inputs, outputs=list(film_layer.values())[0]) + return film_model else: raise "Unknown input model: {}".format(model_name) @@ -58,6 +67,6 @@ def infer_fw_model(self, model_obj, inputs): return post_outputs @pytest.mark.precommit - @pytest.mark.parametrize("model_name", ['mobilenet_v2_100_224_dict', 'mobilenet_v2_100_224_list']) + @pytest.mark.parametrize("model_name", ['mobilenet_v2_100_224_dict', 'mobilenet_v2_100_224_list', 'film']) def test_tf_hub_api_notebook1(self, model_name, ie_device): self.run(model_name, '', ie_device) From 4e70b3b33bc7bc3bbdff7fd94904bb3c9dd3e98a Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Thu, 2 Nov 2023 09:57:24 +0100 Subject: [PATCH 178/275] Remove WA with double loading of FE. (#20794) --- tools/ovc/openvino/tools/ovc/convert_impl.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/ovc/openvino/tools/ovc/convert_impl.py b/tools/ovc/openvino/tools/ovc/convert_impl.py index 3a746f646d8730..31d7482a1cdb40 100644 --- a/tools/ovc/openvino/tools/ovc/convert_impl.py +++ b/tools/ovc/openvino/tools/ovc/convert_impl.py @@ -133,15 +133,12 @@ def get_moc_frontends(argv: argparse.Namespace): available_moc_front_ends = get_available_front_ends(fem) if argv.framework: - moc_front_end = fem.load_by_framework(argv.framework) # WA to prevent process hanging. Need to remove when 115994 fixed. moc_front_end = fem.load_by_framework(argv.framework) return moc_front_end, available_moc_front_ends if argv.input_model: if isinstance(argv.input_model, (tuple, list)) and len(argv.input_model) == 2: - moc_front_end = fem.load_by_model([argv.input_model[0], argv.input_model[1]]) # WA to prevent process hanging. Need to remove when 115994 fixed. moc_front_end = fem.load_by_model([argv.input_model[0], argv.input_model[1]]) # TODO: Pass all input model parts else: - moc_front_end = fem.load_by_model(argv.input_model) # WA to prevent process hanging. Need to remove when 115994 fixed. moc_front_end = fem.load_by_model(argv.input_model) if not moc_front_end: return None, available_moc_front_ends From f8cd53bb77f4792a60e1ef45a97941536244b6ee Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 2 Nov 2023 13:35:05 +0100 Subject: [PATCH 179/275] [core]Migrate Floor operator to new API (#20830) * Migrate Floor operator to new API * Remove `visit_attributes` is same as base class --- src/core/include/openvino/op/floor.hpp | 5 +- .../include/openvino/reference/floor.hpp | 33 ++++- src/core/src/op/floor.cpp | 118 +++++++----------- 3 files changed, 77 insertions(+), 79 deletions(-) diff --git a/src/core/include/openvino/op/floor.hpp b/src/core/include/openvino/op/floor.hpp index 9321a911b36493..a88cb4a8ae4933 100644 --- a/src/core/include/openvino/op/floor.hpp +++ b/src/core/include/openvino/op/floor.hpp @@ -21,11 +21,8 @@ class OPENVINO_API Floor : public util::UnaryElementwiseArithmetic { /// \param arg Node that produces the input tensor. Floor(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/floor.hpp b/src/core/reference/include/openvino/reference/floor.hpp index d7fca06c5f3393..223463e46f91af 100644 --- a/src/core/reference/include/openvino/reference/floor.hpp +++ b/src/core/reference/include/openvino/reference/floor.hpp @@ -7,13 +7,36 @@ #include #include +#include "openvino/reference/copy.hpp" +#include "openvino/reference/utils/type_util.hpp" + namespace ov { namespace reference { -template -void floor(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = std::floor(arg[i]); - } + +/** + * @brief Reference implementation of Floor operator (integral types). + * + * @param arg Input pointer to data. + * @param out Output pointer to results. + * @param count Number of elements in input buffer. + */ +template ::value>::type* = nullptr> +void floor(const T* arg, T* out, const size_t count) { + reference::copy(arg, out, count); +} + +/** + * @brief Reference implementation of Floor operator (floating point types). + * + * @param arg Input pointer to data. + * @param out Output pointer to results. + * @param count Number of elements in input buffer. + */ +template ()>::type* = nullptr> +void floor(const T* arg, T* out, const size_t count) { + std::transform(arg, arg + count, out, [](const T v) { + return std::floor(v); + }); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/floor.cpp b/src/core/src/op/floor.cpp index 864b7dd8188448..c884dac18cab36 100644 --- a/src/core/src/op/floor.cpp +++ b/src/core/src/op/floor.cpp @@ -2,95 +2,73 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/floor.hpp" +#include "openvino/op/floor.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/util/eval_copy.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "openvino/reference/copy.hpp" #include "openvino/reference/floor.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace floor { -op::Floor::Floor(const Output& arg) : UnaryElementwiseArithmetic(arg) { - constructor_validate_and_infer_types(); -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool ngraph::op::v0::Floor::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Floor_visit_attributes); - return true; -} + template > + static result_type visit(const Tensor& arg, Tensor& out, const size_t count) { + reference::floor(arg.data(), out.data(), count); + return true; + } +}; +} // namespace floor -shared_ptr op::Floor::clone_with_new_inputs(const OutputVector& new_args) const { - OV_OP_SCOPE(v0_Floor_clone_with_new_inputs); - check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); -} +namespace v0 { -OPENVINO_SUPPRESS_DEPRECATED_START -namespace floorop { -namespace { -// function used by TYPE_CASE -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::floor(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; +Floor::Floor(const Output& arg) : UnaryElementwiseArithmetic(arg) { + constructor_validate_and_infer_types(); } -// function used by COPY_TENSOR -template -inline bool copy_tensor(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - ov::reference::copy(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; +std::shared_ptr Floor::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v0_Floor_clone_with_new_inputs); + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); } -bool evaluate_floor(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); +bool Floor::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Floor_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); - switch (arg0->get_element_type()) { - OPENVINO_COPY_TENSOR(evaluate_floor, i8, arg0, out, count); - OPENVINO_COPY_TENSOR(evaluate_floor, i16, arg0, out, count); - OPENVINO_COPY_TENSOR(evaluate_floor, i32, arg0, out, count); - OPENVINO_COPY_TENSOR(evaluate_floor, i64, arg0, out, count); - OPENVINO_COPY_TENSOR(evaluate_floor, u8, arg0, out, count); - OPENVINO_COPY_TENSOR(evaluate_floor, u16, arg0, out, count); - OPENVINO_COPY_TENSOR(evaluate_floor, u32, arg0, out, count); - OPENVINO_COPY_TENSOR(evaluate_floor, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_floor, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_floor, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace floorop + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); -bool op::Floor::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Floor_evaluate); - return floorop::evaluate_floor(inputs[0], outputs[0], shape_size(inputs[0]->get_shape())); + using namespace ov::element; + return IfTypeOf::apply( + inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool op::Floor::has_evaluate() const { +bool Floor::has_evaluate() const { OV_OP_SCOPE(v0_Floor_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i8: - case ngraph::element::i16: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u16: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From 3f5f923a70091a7191e2009c2e4f17839336ada5 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Thu, 2 Nov 2023 14:31:08 +0100 Subject: [PATCH 180/275] [DOC] Update list of TF formats imported from memory. (#20834) * Update list of TF formats. * Minor correction. * Added comment. * Update docs/articles_en/openvino_workflow/model_preparation/Convert_Model_From_TensorFlow.md Co-authored-by: Roman Kazantsev * Model changed. * Update docs/articles_en/openvino_workflow/model_preparation/Convert_Model_From_TensorFlow.md Co-authored-by: Roman Kazantsev --------- Co-authored-by: Roman Kazantsev --- .../Convert_Model_From_TensorFlow.md | 45 +++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/docs/articles_en/openvino_workflow/model_preparation/Convert_Model_From_TensorFlow.md b/docs/articles_en/openvino_workflow/model_preparation/Convert_Model_From_TensorFlow.md index e74b45cbc82a91..5dca9f0775fd36 100644 --- a/docs/articles_en/openvino_workflow/model_preparation/Convert_Model_From_TensorFlow.md +++ b/docs/articles_en/openvino_workflow/model_preparation/Convert_Model_From_TensorFlow.md @@ -196,6 +196,31 @@ Converting TensorFlow Models from Memory Using Python API Model conversion API supports passing TensorFlow/TensorFlow2 models directly from memory. +* ``Trackable``. The object returned by ``hub.load()`` can be converted to ``ov.Model`` with ``convert_model()``. + + .. code-block:: py + :force: + + import tensorflow_hub as hub + import openvino as ov + + model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4") + ov_model = ov.convert_model(model) + +* ``tf.function`` + + .. code-block:: py + :force: + + @tf.function( + input_signature=[tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32), + tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32)]) + def func(x, y): + return tf.nn.sigmoid(tf.nn.relu(x + y)) + + import openvino as ov + ov_model = ov.convert_model(func) + * ``tf.keras.Model`` .. code-block:: py @@ -205,7 +230,7 @@ Model conversion API supports passing TensorFlow/TensorFlow2 models directly fro model = tf.keras.applications.ResNet50(weights="imagenet") ov_model = ov.convert_model(model) -* ``tf.keras.layers.Layer``. Requires saving model to TensorFlow ``saved_model`` file format and then loading to ``openvino.convert_model``. Saving to the file and then restoring is required due to a known bug in ``openvino.convert_model`` that ignores model signature. +* ``tf.keras.layers.Layer``. The ``ov.Model`` converted from ``tf.keras.layers.Layer`` does not contain original input and output names. So it is recommended to convert the model to ``tf.keras.Model`` before conversion or use ``hub.load()`` for TensorFlow Hub models. .. code-block:: py :force: @@ -214,10 +239,8 @@ Model conversion API supports passing TensorFlow/TensorFlow2 models directly fro import openvino as ov model = hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/classification/5") - model.build([None, 224, 224, 3]) - model.save('mobilenet_v1_100_224') # use a temporary directory + ov_model = ov.convert_model(model) - ov_model = ov.convert_model('mobilenet_v1_100_224') * ``tf.Module``. Requires setting shapes in ``input`` parameter. @@ -270,20 +293,6 @@ Model conversion API supports passing TensorFlow/TensorFlow2 models directly fro import openvino as ov ov_model = ov.convert_model(model) -* ``tf.function`` - - .. code-block:: py - :force: - - @tf.function( - input_signature=[tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32), - tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32)]) - def func(x, y): - return tf.nn.sigmoid(tf.nn.relu(x + y)) - - import openvino as ov - ov_model = ov.convert_model(func) - * ``tf.compat.v1.session`` .. code-block:: py From e8f21eefae3037d209e446b4a27720f9f2d70afa Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Thu, 2 Nov 2023 15:40:32 +0100 Subject: [PATCH 181/275] [CPU] Add FP16 support to MatrixNms (#20804) --- src/plugins/intel_cpu/src/nodes/matrix_nms.cpp | 2 +- src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp index 9a3221e7261310..639ded3c7d510f 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp @@ -115,7 +115,7 @@ void MatrixNms::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - const std::vector supportedFloatPrecision = {Precision::FP32}; + const std::vector supportedFloatPrecision = {Precision::FP32, Precision::FP16}; const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", m_inType); diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp index 72a252b9b45bad..1ce9c11bc9e371 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp @@ -107,7 +107,7 @@ void MultiClassNms::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - const std::vector supportedFloatPrecision = {Precision::FP32, Precision::BF16}; + const std::vector supportedFloatPrecision = {Precision::FP32, Precision::FP16, Precision::BF16}; const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", m_inType); From 8e4c4c3510b068c4e6847d571c6ef0446279c16b Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 2 Nov 2023 17:15:52 +0100 Subject: [PATCH 182/275] [core]Drop host tensor support in TensorAccessor (#20831) * Remove functions`get_tensor_data_as for HostTensor * Remove HostTensor support in TA * Update doxy comments Co-authored-by: Tomasz Jankowski --------- Co-authored-by: Tomasz Jankowski --- .../include/tensor_data_accessor.hpp | 11 +---- src/core/shape_inference/include/utils.hpp | 25 ---------- .../src/tensor_data_accessor.cpp | 46 +++---------------- src/core/tests/type_prop/eye.cpp | 9 ++-- .../src/subgraph/mul_conv_fusion.cpp | 1 + 5 files changed, 13 insertions(+), 79 deletions(-) diff --git a/src/core/shape_inference/include/tensor_data_accessor.hpp b/src/core/shape_inference/include/tensor_data_accessor.hpp index 8f017e05f1be42..36d29d934c21d5 100644 --- a/src/core/shape_inference/include/tensor_data_accessor.hpp +++ b/src/core/shape_inference/include/tensor_data_accessor.hpp @@ -27,11 +27,10 @@ class ITensorAccessor { * @brief Tensor data accessor functor. * * Creates the ov::Tensor found in tensors container. - * This accessor not take ownership of tensors container. + * This accessor does not take ownership of tensors container. * Supports following containers: * - ov::TensorVector - * - ngraph::HostTensorVector - * - std::map + * - std::unordered_map * * @tparam TContainer Type of tensor container. */ @@ -61,15 +60,9 @@ class TensorAccessor final : public ITensorAccessor { template <> Tensor TensorAccessor::operator()(size_t port) const; -template <> -Tensor TensorAccessor::operator()(size_t port) const; - template <> Tensor TensorAccessor>::operator()(size_t port) const; -template <> -Tensor TensorAccessor>::operator()(size_t port) const; - template <> Tensor TensorAccessor::operator()(size_t port) const; diff --git a/src/core/shape_inference/include/utils.hpp b/src/core/shape_inference/include/utils.hpp index 308a7f84594eca..4b302b6618b494 100644 --- a/src/core/shape_inference/include/utils.hpp +++ b/src/core/shape_inference/include/utils.hpp @@ -62,31 +62,6 @@ TResult get_raw_data_as(const element::Type_t et, const void* const ptr, const s return out; } -OPENVINO_SUPPRESS_DEPRECATED_START -/** - * \brief Get data from Host tensor as object TResult. - * - * \tparam T TResult data type. - * \tparam TResult Type of return object, must support creation of std::inserter. Default std::vector. - * \tparam UnaryOperation Unary function object applied on data with signature (T f(const U u)). - * - * \param tv Input host tensor. - * \param func Unary operation function object. - * - * \return Object of TResult with data from host tensor. - */ -template , class UnaryOperation> -TResult get_tensor_data_as(ngraph::HostTensor& tv, UnaryOperation&& func) { - auto t = Tensor(tv.get_element_type(), tv.get_shape(), tv.get_data_ptr()); - return get_tensor_data_as(t, std::forward(func)); -} - -template , class UnaryOperation> -TResult get_tensor_data_as(ngraph::HostTensor* tv, UnaryOperation&& func) { - return get_tensor_data_as(*tv, std::forward(func)); -} -OPENVINO_SUPPRESS_DEPRECATED_END - /** * \brief Get data from ov:tensor as object TResult. * diff --git a/src/core/shape_inference/src/tensor_data_accessor.cpp b/src/core/shape_inference/src/tensor_data_accessor.cpp index 1eeb4e6c949e68..9b8af21dba6624 100644 --- a/src/core/shape_inference/src/tensor_data_accessor.cpp +++ b/src/core/shape_inference/src/tensor_data_accessor.cpp @@ -3,55 +3,21 @@ // #include "tensor_data_accessor.hpp" - -#include "ngraph/runtime/host_tensor.hpp" - -OPENVINO_SUPPRESS_DEPRECATED_START namespace ov { template <> -Tensor TensorAccessor::operator()(size_t port) const { - if (port < m_tensors->size()) { - return (*m_tensors)[port]; - } else { - return make_tensor_accessor()(port); - } -} - -template <> -Tensor TensorAccessor::operator()(size_t port) const { - if (port < m_tensors->size()) { - auto ptr = (*m_tensors)[port]; - return {ptr->get_element_type(), ptr->get_shape(), ptr->get_data_ptr()}; - } else { - return make_tensor_accessor()(port); - } -} - -template <> -Tensor TensorAccessor>::operator()(size_t port) const { - const auto t_iter = m_tensors->find(port); - if (t_iter != m_tensors->cend()) { - return t_iter->second; - } else { - return make_tensor_accessor()(port); - } +Tensor TensorAccessor::operator()(const size_t port) const { + return (port < m_tensors->size()) ? (*m_tensors)[port] : Tensor{}; } template <> -Tensor TensorAccessor>::operator()(size_t port) const { +Tensor TensorAccessor>::operator()(const size_t port) const { const auto t_iter = m_tensors->find(port); - if (t_iter != m_tensors->cend()) { - auto ptr = t_iter->second.get(); - return {ptr->get_element_type(), ptr->get_shape(), ptr->get_data_ptr()}; - } else { - return make_tensor_accessor()(port); - } + return (t_iter != m_tensors->cend()) ? t_iter->second : Tensor{}; } template <> -Tensor TensorAccessor::operator()(size_t) const { - static const auto empty = Tensor(); - return empty; +Tensor TensorAccessor::operator()(const size_t) const { + return {}; } auto make_tensor_accessor() -> const TensorAccessor& { diff --git a/src/core/tests/type_prop/eye.cpp b/src/core/tests/type_prop/eye.cpp index b3325d5f12735e..b82dddda653317 100644 --- a/src/core/tests/type_prop/eye.cpp +++ b/src/core/tests/type_prop/eye.cpp @@ -356,17 +356,16 @@ TEST_F(TypePropEyeV9Test, default_ctor) { EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(no_label)); } -OPENVINO_SUPPRESS_DEPRECATED_START TEST_F(TypePropEyeV9Test, default_ctor_no_arguments) { auto op = make_op(); op->set_out_type(element::i32); int64_t rows = 8, cols = 5; auto batch = std::array{2, 4, 1}; - const auto constant_map = std::map{ - {0, std::make_shared(element::i64, Shape{}, &rows)}, - {1, std::make_shared(element::i64, Shape{}, &cols)}, - {3, std::make_shared(element::i32, Shape{batch.size()}, batch.data())}}; + const auto constant_map = + std::unordered_map{{0, {element::i64, Shape{}, &rows}}, + {1, {element::i64, Shape{}, &cols}}, + {3, {element::i32, Shape{batch.size()}, batch.data()}}}; const auto output_shapes = op::v9::shape_infer(op.get(), PartialShapes{{}, {}, {}, {3}}, make_tensor_accessor(constant_map)); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp index 411cff4a46ab21..4fcc4c562d42da 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp @@ -5,6 +5,7 @@ #include "shared_test_classes/subgraph/mul_conv_fusion.hpp" #include "common_test_utils/graph_comparator.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/pass/manager.hpp" #include "ov_models/builders.hpp" #include "transformations/common_optimizations/mul_conv_fusion.hpp" From ff7b49c14d1dab15373e7f0883ff786b6ed769a6 Mon Sep 17 00:00:00 2001 From: Andrei Gorbachev Date: Thu, 2 Nov 2023 16:40:47 +0000 Subject: [PATCH 183/275] add a few tests (#20824) --- .../single_layer_tests/reduce_ops.cpp | 102 +++++++----------- .../single_layer_tests/region_yolo.cpp | 18 ++-- .../single_layer_tests/reorg_yolo.cpp | 22 ++-- .../single_layer_tests/reshape.cpp | 37 +++---- .../single_layer_tests/reverse_sequence.cpp | 29 +++-- 5 files changed, 80 insertions(+), 128 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp index 48cdf0ce25bd26..f3b280170755b6 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp @@ -2,19 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/reduce_ops.hpp" +#include "single_op_tests/reduce_ops.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I8, +using ov::test::ReduceOpsLayerTest; +using ov::test::ReduceOpsLayerWithSpecificInputTest; + +const std::vector netPrecisions = { + ov::element::f32, + ov::element::i32, + ov::element::u8, + ov::element::i8, }; const std::vector keepDims = { @@ -58,19 +57,19 @@ std::vector opTypes = { ov::test::utils::OpType::VECTOR, }; -const std::vector reductionTypes = { - ngraph::helpers::ReductionType::Mean, - ngraph::helpers::ReductionType::Min, - ngraph::helpers::ReductionType::Max, - ngraph::helpers::ReductionType::Sum, - ngraph::helpers::ReductionType::Prod, - ngraph::helpers::ReductionType::L1, - ngraph::helpers::ReductionType::L2, +const std::vector reductionTypes = { + ov::test::utils::ReductionType::Mean, + ov::test::utils::ReductionType::Min, + ov::test::utils::ReductionType::Max, + ov::test::utils::ReductionType::Sum, + ov::test::utils::ReductionType::Prod, + ov::test::utils::ReductionType::L1, + ov::test::utils::ReductionType::L2, }; -const std::vector reductionLogicalTypes = { - ngraph::helpers::ReductionType::LogicalOr, - ngraph::helpers::ReductionType::LogicalAnd +const std::vector reductionLogicalTypes = { + ov::test::utils::ReductionType::LogicalOr, + ov::test::utils::ReductionType::LogicalAnd }; INSTANTIATE_TEST_SUITE_P(smoke_ReduceOneAxis, @@ -79,10 +78,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ReduceOneAxis, testing::ValuesIn(opTypes), testing::Values(true, false), testing::ValuesIn(reductionTypes), - testing::Values(InferenceEngine::Precision::FP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::element::f32), testing::ValuesIn(inputShapesOneAxis), testing::Values(ov::test::utils::DEVICE_GPU)), ReduceOpsLayerTest::getTestCaseName); @@ -93,10 +89,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ReduceLogicalOneAxis, testing::ValuesIn(opTypes), testing::Values(true, false), testing::ValuesIn(reductionLogicalTypes), - testing::Values(InferenceEngine::Precision::BOOL), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::element::boolean), testing::ValuesIn(inputShapes), testing::Values(ov::test::utils::DEVICE_GPU)), ReduceOpsLayerTest::getTestCaseName); @@ -106,12 +99,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_Reduce_Precisions, testing::Combine(testing::Values(std::vector{1, 3}), testing::Values(opTypes[1]), testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::Sum), - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::test::utils::ReductionType::Sum), + testing::Values(ov::element::f32, + ov::element::i32), testing::Values(std::vector{2, 2, 2, 2}), testing::Values(ov::test::utils::DEVICE_GPU)), ReduceOpsLayerTest::getTestCaseName); @@ -122,12 +112,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_Reduce_Ranks, std::vector{-1, -2}), testing::Values(opTypes[1]), testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::Sum), - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::test::utils::ReductionType::Sum), + testing::Values(ov::element::f32, + ov::element::i32), testing::Values(std::vector{2, 3, 4, 5, 4, 3, 2, 3}, std::vector{2, 3, 4, 5, 4, 3, 2}), testing::Values(ov::test::utils::DEVICE_GPU)), @@ -138,11 +125,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Reduce_InputShapes, testing::Combine(testing::Values(std::vector{0}), testing::Values(opTypes[1]), testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::Mean), - testing::Values(InferenceEngine::Precision::FP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::test::utils::ReductionType::Mean), + testing::Values(ov::element::f32), testing::Values(std::vector{3}, std::vector{3, 5}, std::vector{2, 4, 6}, @@ -157,11 +141,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Reduce_Axes, testing::Combine(testing::ValuesIn(axes), testing::Values(opTypes[1]), testing::ValuesIn(keepDims), - testing::Values(ngraph::helpers::ReductionType::Mean), - testing::Values(InferenceEngine::Precision::FP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::test::utils::ReductionType::Mean), + testing::Values(ov::element::f32), testing::ValuesIn(inputShapes), testing::Values(ov::test::utils::DEVICE_GPU)), ReduceOpsLayerTest::getTestCaseName); @@ -172,10 +153,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Reduce_ReductionTypes, testing::Values(opTypes[1]), testing::ValuesIn(keepDims), testing::ValuesIn(reductionTypes), - testing::Values(InferenceEngine::Precision::FP32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::element::f32), testing::Values(std::vector{2, 9, 2, 9}), testing::Values(ov::test::utils::DEVICE_GPU)), ReduceOpsLayerTest::getTestCaseName); @@ -186,10 +164,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ReduceLogical_ReductionTypes, testing::Values(opTypes[1]), testing::ValuesIn(keepDims), testing::ValuesIn(reductionLogicalTypes), - testing::Values(InferenceEngine::Precision::BOOL), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::element::boolean), testing::Values(std::vector{2, 9, 2, 9}), testing::Values(ov::test::utils::DEVICE_GPU)), ReduceOpsLayerTest::getTestCaseName); @@ -199,12 +174,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_Reduce, testing::Combine(testing::ValuesIn(decltype(axes){{0}, {1}}), testing::Values(opTypes[1]), testing::Values(true), - testing::Values(ngraph::helpers::ReductionType::Sum), - testing::Values(InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), + testing::Values(ov::test::utils::ReductionType::Sum), + testing::Values(ov::element::f32, + ov::element::i32), testing::Values(std::vector{2, 10}), testing::Values(ov::test::utils::DEVICE_GPU)), ReduceOpsLayerWithSpecificInputTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp index 75736727d8f5cd..3ded3b08bb5184 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/region_yolo.cpp @@ -2,18 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/region_yolo.hpp" +#include "single_op_tests/region_yolo.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::RegionYoloLayerTest; -const std::vector inShapes_caffe = { +const std::vector> inShapes_caffe = { {1, 125, 13, 13} }; -const std::vector inShapes_mxnet = { +const std::vector> inShapes_mxnet = { {1, 75, 52, 52}, {1, 75, 32, 32}, {1, 75, 26, 26}, @@ -22,7 +20,7 @@ const std::vector inShapes_mxnet = { {1, 75, 8, 8} }; -const std::vector inShapes_v3 = { +const std::vector> inShapes_v3 = { {1, 255, 52, 52}, {1, 255, 26, 26}, {1, 255, 13, 13} @@ -51,7 +49,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3, ::testing::Values(masks[2]), ::testing::Values(start_axis), ::testing::Values(end_axis), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), RegionYoloLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnet, @@ -64,7 +62,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnet, ::testing::Values(masks[1]), ::testing::Values(start_axis), ::testing::Values(end_axis), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), RegionYoloLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffe, @@ -77,6 +75,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffe, ::testing::Values(masks[0]), ::testing::Values(start_axis), ::testing::Values(end_axis), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), RegionYoloLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp index 63a5ca087943df..6f14ee52b786eb 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reorg_yolo.cpp @@ -2,18 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/reorg_yolo.hpp" +#include "single_op_tests/reorg_yolo.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ReorgYoloLayerTest; -const std::vector inShapes_caffe_yolov2 = { +const std::vector> inShapes_caffe_yolov2 = { {1, 64, 26, 26}, }; -const std::vector inShapes = { +const std::vector> inShapes = { {1, 4, 4, 4}, {1, 8, 4, 4}, {1, 9, 3, 3}, @@ -29,41 +27,41 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_caffe_YoloV2, ReorgYoloLayerTest, ::testing::Combine(::testing::ValuesIn(inShapes_caffe_yolov2), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReorgYoloLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_2_smallest, ReorgYoloLayerTest, ::testing::Combine(::testing::Values(inShapes[0]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReorgYoloLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_2, ReorgYoloLayerTest, ::testing::Combine(::testing::Values(inShapes[1]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReorgYoloLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_stride_3, ReorgYoloLayerTest, ::testing::Combine(::testing::Values(inShapes[2]), ::testing::Values(strides[1]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReorgYoloLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_smaller_h, ReorgYoloLayerTest, ::testing::Combine(::testing::Values(inShapes[4]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReorgYoloLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsReorgYolo_batch_2, ReorgYoloLayerTest, ::testing::Combine(::testing::Values(inShapes[3]), ::testing::Values(strides[0]), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_GPU)), ReorgYoloLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp index 1a5e7c84e6ea6f..f4dd2c84c604dd 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp @@ -2,45 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "single_layer_tests/reshape.hpp" +#include "single_op_tests/reshape.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I64 +using ov::test::ReshapeLayerTest; +using ov::test::reshapeParams; + +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i64 }; INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheck, ReshapeLayerTest, ::testing::Combine( ::testing::Values(true), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(std::vector({10, 10, 10, 10})), ::testing::Values(std::vector({10, 0, 100})), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values(std::map({}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), ReshapeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheckNegative, ReshapeLayerTest, ::testing::Combine( ::testing::Values(true), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(std::vector({10, 10, 10, 10})), ::testing::Values(std::vector({10, -1, 100})), - ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::Values(std::map({}))), + ::testing::Values(ov::test::utils::DEVICE_GPU)), ReshapeLayerTest::getTestCaseName); static std::vector generate_tests() { @@ -62,11 +52,8 @@ static std::vector generate_tests() { {{2, 2, 3, 1, 3, 2, 4, 2}, {2, 2, 3, 1, 6, 8}}, }; for (auto& p : params) { - reshapeParams test_case = std::make_tuple(false, InferenceEngine::Precision::FP16, - InferenceEngine::Precision::UNSPECIFIED, InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, InferenceEngine::Layout::ANY, - p.first, p.second, - ov::test::utils::DEVICE_GPU, std::map({})); + reshapeParams test_case = std::make_tuple(false, ov::element::f16, + p.first, p.second, ov::test::utils::DEVICE_GPU); res.push_back(test_case); } diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp index c36ebaf2c61b16..9b20ce833249ec 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/reverse_sequence.cpp @@ -2,22 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/reverse_sequence.hpp" +#include "single_op_tests/reverse_sequence.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { - -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U16, - InferenceEngine::Precision::I32 +using ov::test::ReverseSequenceLayerTest; + +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::u8, + ov::element::i8, + ov::element::u16, + ov::element::i32 }; const std::vector batchAxisIndices = { 0L }; @@ -28,9 +25,9 @@ const std::vector> inputShapes = { {3, 10} }; //, 10, 20 const std::vector> reversSeqLengthsVecShapes = { {3} }; -const std::vector secondaryInputTypes = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER +const std::vector secondaryInputTypes = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER }; INSTANTIATE_TEST_SUITE_P(Basic_smoke, ReverseSequenceLayerTest, From caa81a0b3c393b0f28b5ad519a26332492a9c220 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Fri, 3 Nov 2023 09:09:49 +0400 Subject: [PATCH 184/275] Remove use of legacy ng/runtime/shared_buffer.hpp (#20840) --- src/frontends/ir/src/ir_deserializer.cpp | 1 - src/frontends/tensorflow/src/op/var_handle.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index d245301633e4e3..e4744c6b5a8c40 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -7,7 +7,6 @@ #include #include -#include "ngraph/runtime/shared_buffer.hpp" #include "openvino/core/except.hpp" #include "openvino/core/meta_data.hpp" #include "openvino/core/type/element_type.hpp" diff --git a/src/frontends/tensorflow/src/op/var_handle.cpp b/src/frontends/tensorflow/src/op/var_handle.cpp index 501df1c504309b..e9eb0eaa1816e9 100644 --- a/src/frontends/tensorflow/src/op/var_handle.cpp +++ b/src/frontends/tensorflow/src/op/var_handle.cpp @@ -7,7 +7,6 @@ #include "helper_ops/string_constant.hpp" #include "helper_ops/unsupported_constant.hpp" #include "input_model.hpp" -#include "ngraph/runtime/shared_buffer.hpp" #include "openvino/opsets/opset8.hpp" #include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" From 0955faef93eccb91226186ba73311166c00deb4a Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Fri, 3 Nov 2023 11:00:33 +0400 Subject: [PATCH 185/275] Remove use of `convertOps2Nodes()` & `convert2OutVect()` (#20837) --- .../functional/single_layer_tests/broadcast.cpp | 10 ++++------ .../functional/single_layer_tests/classes/mvn.cpp | 10 ++++------ .../single_layer_tests/classes/reduce.cpp | 7 ++----- .../single_layer_tests/classes/softmax.cpp | 7 ++----- .../functional/single_layer_tests/convolution.cpp | 7 ++----- .../single_layer_tests/fake_quantize.cpp | 6 ++---- .../functional/single_layer_tests/gather.cpp | 8 +++----- .../functional/single_layer_tests/grid_sample.cpp | 3 +-- .../tests/functional/single_layer_tests/grn.cpp | 7 +++---- .../single_layer_tests/group_convolution.cpp | 8 +++----- .../functional/single_layer_tests/log_softmax.cpp | 3 +-- .../functional/single_layer_tests/matmul.cpp | 3 +-- .../single_layer_tests/matmul_sparse.cpp | 3 +-- .../functional/single_layer_tests/one_hot.cpp | 3 +-- .../functional/single_layer_tests/proposal.cpp | 3 +-- .../functional/single_layer_tests/roi_pooling.cpp | 7 ++----- .../functional/single_layer_tests/select.cpp | 3 +-- .../functional/single_layer_tests/shapeof.cpp | 7 +++---- .../single_layer_tests/space_to_batch.cpp | 5 ++--- .../tests/functional/single_layer_tests/split.cpp | 8 +++----- .../tests/functional/single_layer_tests/tile.cpp | 5 ++--- .../functional/single_layer_tests/unique.cpp | 5 ++--- .../subgraph_tests/src/add_convert_to_reorder.cpp | 4 +--- .../src/align_matmul_input_ranks.cpp | 4 +--- .../subgraph_tests/src/arm/convert_group_conv.cpp | 3 +-- .../src/arm/convert_group_conv1d.cpp | 5 ++--- .../src/arm/convert_reduce_multi_axis.cpp | 3 +-- .../subgraph_tests/src/conv3d_reshape.cpp | 5 ++--- .../functional/subgraph_tests/src/conv_concat.cpp | 12 ++++++------ .../subgraph_tests/src/conv_maxpool_activ.cpp | 3 +-- .../src/conv_with_zero_point_fuse.cpp | 2 -- .../src/convert_fq_rnn_to_quantized_rnn.cpp | 11 ++++------- .../subgraph_tests/src/convs_and_sums.cpp | 9 ++++----- .../subgraph_tests/src/fq_fused_with_ss.cpp | 3 +-- .../src/fullyconnected_strided_inputs_outputs.cpp | 3 +-- .../src/matmul_decompress_convert.cpp | 8 +++----- .../src/matmul_strided_inputs_outputs.cpp | 6 ++---- .../src/not_fused_conv_simple_op.cpp | 5 ++--- .../subgraph_tests/src/split_matmul_concat.cpp | 3 +-- .../src/tile_with_two_output_edges.cpp | 3 +-- .../functional/test_utils/fusing_test_utils.hpp | 8 ++------ .../single_layer_tests/convolution.cpp | 3 +-- .../single_layer_tests/gru_cell.cpp | 5 ++--- .../single_layer_tests/gru_sequence.cpp | 9 ++++----- .../single_layer_tests/lstm_sequence.cpp | 11 +++++------ .../single_layer_tests/topk.cpp | 3 +-- .../functional/single_layer_tests/convolution.cpp | 6 ++---- .../single_layer_tests/dynamic/broadcast.cpp | 10 ++++------ .../single_layer_tests/dynamic/convolution.cpp | 6 ++---- .../dynamic/detection_output.cpp | 13 ++++++------- .../single_layer_tests/dynamic/gather.cpp | 11 ++++------- .../single_layer_tests/dynamic/gather_nd.cpp | 7 ++----- .../single_layer_tests/dynamic/grid_sample.cpp | 3 +-- .../dynamic/groupconvolution.cpp | 6 ++---- .../single_layer_tests/dynamic/matmul.cpp | 3 +-- .../functional/single_layer_tests/dynamic/mvn.cpp | 7 +++---- .../single_layer_tests/dynamic/normalize_l2.cpp | 7 +++---- .../functional/single_layer_tests/dynamic/pad.cpp | 3 +-- .../single_layer_tests/dynamic/prior_box.cpp | 8 +++----- .../single_layer_tests/dynamic/reduce.cpp | 7 ++----- .../single_layer_tests/dynamic/roi_pooling.cpp | 7 ++----- .../single_layer_tests/dynamic/shapeof.cpp | 7 +++---- .../single_layer_tests/dynamic/softmax.cpp | 8 ++------ .../single_layer_tests/dynamic/split.cpp | 7 ++----- .../single_layer_tests/dynamic/tile.cpp | 5 ++--- .../single_layer_tests/dynamic/unique.cpp | 6 ++---- .../dynamic/dynamic_model_static_split_layer.cpp | 8 +++----- .../dynamic/dynamic_smoke_test_gen_impl_key.cpp | 8 +++----- ...mic_smoke_test_reduce_deconvolution_concat.cpp | 8 +++----- .../dynamic_smoke_test_shape_of_activation.cpp | 6 ++---- ...dynamic_smoke_test_shape_of_reduce_reshape.cpp | 8 +++----- .../dynamic_smoke_test_with_empty_tensor.cpp | 14 ++++++-------- .../src/op_impl_check/single_op_graph.cpp | 15 ++++----------- .../include/behavior/plugin/preprocessing.hpp | 7 +++---- .../infer_request/set_io_blob_precision.cpp | 3 +-- .../single_layer_tests/invalid_cases/proposal.cpp | 3 +-- .../shared_test_classes/src/subgraph/clamp_fq.cpp | 3 +-- .../src/subgraph/convolution_relu_sequence.cpp | 2 +- .../src/subgraph/multiply_add.cpp | 4 +--- .../src/subgraph/mvn_multiply_add.cpp | 4 +--- .../quantized_convolution_backprop_data.cpp | 3 +-- .../src/subgraph/quantized_group_convolution.cpp | 3 +-- .../quantized_group_convolution_backprop_data.cpp | 3 +-- .../src/subgraph/quantized_mat_mul.cpp | 6 ++---- .../src/subgraph/reduce_eltwise.cpp | 4 +--- .../src/subgraph/strided_slice.cpp | 4 +--- .../two_fake_quantize_to_fullyconnected.cpp | 3 +-- 87 files changed, 185 insertions(+), 330 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp index a22f29bba6c45f..cd5f2bae07f85f 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/broadcast.cpp @@ -117,8 +117,6 @@ class BroadcastLayerCPUTest : public testing::WithParamInterfaceset_friendly_name("data"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); - std::shared_ptr broadcastOp; if (mode == ov::op::BroadcastType::EXPLICIT) { std::shared_ptr targetShapeOp; @@ -133,19 +131,19 @@ class BroadcastLayerCPUTest : public testing::WithParamInterface 2 ? functionParams[2] : functionParams[1]; } - broadcastOp = std::make_shared(paramOuts[0], + broadcastOp = std::make_shared(functionParams[0], targetShapeOp, axesMappingOp, mode); } else if (mode == ov::op::BroadcastType::NUMPY) { if (isTargetShapeConst) { auto targetShapeConst = ov::op::v0::Constant::create(ov::element::i64, {targetShapeRank}, targetShape); - broadcastOp = std::make_shared(paramOuts[0], + broadcastOp = std::make_shared(functionParams[0], targetShapeConst, mode); } else { - broadcastOp = std::make_shared(paramOuts[0], - paramOuts[1], + broadcastOp = std::make_shared(functionParams[0], + functionParams[1], mode); } } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.cpp index c9234bfd32b747..b2b82043c30bef 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/mvn.cpp @@ -100,14 +100,12 @@ void MvnLayerCPUTest::SetUp() { init_input_shapes({inputShapes}); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps); + + auto mvn = ngraph::builder::makeMVN(params[0], acrossChanels, normalizeVariance, eps); if (!axes.empty()) { - mvn = ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps); + mvn = ngraph::builder::makeMVN(params[0], axes, normalizeVariance, eps); } rel_threshold = 0.015f; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp index 95c23f98399740..49be2dc0e41227 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/reduce.cpp @@ -95,11 +95,8 @@ void ReduceCPULayerTest::SetUp() { init_input_shapes(inputShapes); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector shapeAxes; switch (opType) { @@ -116,7 +113,7 @@ void ReduceCPULayerTest::SetUp() { auto reductionAxesNode = std::dynamic_pointer_cast( std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); - const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType); + const auto reduce = ngraph::builder::makeReduce(params[0], reductionAxesNode, keepDims, reductionType); // hybrid layouts if (inFmts.size() != 0 && outFmts.size() == 0) { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/softmax.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/softmax.cpp index ac8a7149f88253..59b3fc7990aaad 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/softmax.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/softmax.cpp @@ -53,13 +53,10 @@ void SoftMaxLayerCPUTest::SetUp() { selectedType = makeSelectedTypeStr(selectedType, inType); init_input_shapes({config.inputShape}); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(inType, shape)); - } - const auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - const auto softMax = std::make_shared(paramOuts.at(0), config.axis); + const auto softMax = std::make_shared(params.at(0), config.axis); function = makeNgraphFunction(inType, params, softMax, "SoftMax"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp index a7a863b5782868..f713112cfc72e4 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp @@ -200,12 +200,9 @@ class ConvolutionLayerCPUTest : public testing::WithParamInterface(ov::element::f32, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - - auto convolutionNode = ngraph::builder::makeConvolution(paramOuts.front(), netType, kernel, stride, padBegin, + auto convolutionNode = ngraph::builder::makeConvolution(inputParams[0], netType, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels); function = makeNgraphFunction(netType, inputParams, convolutionNode, "Convolution"); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp index e82f26d774a692..a3d96b81acbe56 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp @@ -112,16 +112,14 @@ class FakeQuantizeLayerCPUTest : public testing::WithParamInterface(ngInPrec, shape)); - } - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); auto il = builder::makeConstant(ngInPrec, ranges[0], rangesBounds[0], rangesBounds[0].empty()); auto ih = builder::makeConstant(ngInPrec, ranges[1], rangesBounds[1], rangesBounds[1].empty()); auto ol = builder::makeConstant(ngInPrec, ranges[2], rangesBounds[2], rangesBounds[2].empty()); auto oh = builder::makeConstant(ngInPrec, ranges[3], rangesBounds[3], rangesBounds[3].empty()); - auto fq = std::make_shared(paramOuts[0], il, ih, ol, oh, levels); + auto fq = std::make_shared(params[0], il, ih, ol, oh, levels); layerName = shouldBeDecomposed ? "" : "FakeQuantize"; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp index 480247b44ea5f1..9a171663653bca 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gather.cpp @@ -105,13 +105,12 @@ class GatherLayerTestCPU : public testing::WithParamInterface(intInputsPrecision, inputDynamicShapes[2])); params[2]->set_friendly_name("axis"); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::shared_ptr gatherNode; if (isAxisConstant) { - gatherNode = std::make_shared(paramOuts[0], paramOuts[1], + gatherNode = std::make_shared(params[0], params[1], ov::op::v0::Constant::create(intInputsPrecision, ov::Shape({1}), { axis }), batchDims); } else { - gatherNode = std::make_shared(paramOuts[0], paramOuts[1], paramOuts[2], batchDims); + gatherNode = std::make_shared(params[0], params[1], params[2], batchDims); } function = makeNgraphFunction(netPrecision, params, gatherNode, "GatherCPU"); @@ -205,8 +204,7 @@ class GatherInPlaceLayerTestCPU : public testing::WithParamInterface(netPrecision, inputDynamicShapes[0]) }; params[0]->set_friendly_name("data"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - std::shared_ptr gatherNode = std::make_shared(paramOuts[0], + std::shared_ptr gatherNode = std::make_shared(params[0], ov::op::v0::Constant::create(intInputsPrecision, ov::Shape({indices.size()}), indices), ov::op::v0::Constant::create(intInputsPrecision, ov::Shape({1}), { axis }), batchDims); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/grid_sample.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/grid_sample.cpp index 7d7c1d82b8d182..278c92ed3681a1 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/grid_sample.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/grid_sample.cpp @@ -103,9 +103,8 @@ class GridSampleLayerTestCPU : public testing::WithParamInterface(gridPrecision, inputDynamicShapes[1])}; params[0]->set_friendly_name("data"); params[1]->set_friendly_name("grid"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); GridSample::Attributes attributes = {alignCorners, interpolateMode, paddingMode}; - auto gridSampleNode = std::make_shared(paramOuts[0], paramOuts[1], attributes); + auto gridSampleNode = std::make_shared(params[0], params[1], attributes); function = makeNgraphFunction(dataPrecision, params, gridSampleNode, "GridSampleCPU"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/grn.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/grn.cpp index 583bd4535b76da..5047d3a615f602 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/grn.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/grn.cpp @@ -68,11 +68,10 @@ class GRNLayerCPUTest : public testing::WithParamInterface, init_input_shapes({inputShape}); ov::ParameterVector paramsIn; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) paramsIn.push_back(std::make_shared(netPrecision, shape)); - } - const auto paramsOut = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramsIn)); - const auto grn = std::make_shared(paramsOut[0], bias); + + const auto grn = std::make_shared(paramsIn[0], bias); const ngraph::ResultVector results{std::make_shared(grn)}; function = std::make_shared(results, paramsIn, "Grn"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp index 3d7bd15fd05f0f..7ea599b7c32847 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp @@ -193,13 +193,11 @@ class GroupConvolutionLayerCPUTest : public testing::WithParamInterface(netType, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); + auto groupConv = std::dynamic_pointer_cast( - ngraph::builder::makeGroupConvolution(paramOuts[0], netType, kernel, stride, padBegin, + ngraph::builder::makeGroupConvolution(params[0], netType, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, numGroups)); function = makeNgraphFunction(netType, params, groupConv, "groupConvolution"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/log_softmax.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/log_softmax.cpp index c0c23db047cc8d..4ba51e71dec1e2 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/log_softmax.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/log_softmax.cpp @@ -65,8 +65,7 @@ class LogSoftmaxLayerCPUTest init_input_shapes(inputShapes); ov::ParameterVector params{std::make_shared(ngPrc, inputDynamicShapes.front())}; - const auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - const auto logSoftmax = std::make_shared(paramOuts[0], axis); + const auto logSoftmax = std::make_shared(params[0], axis); const ngraph::ResultVector results{std::make_shared(logSoftmax)}; function = std::make_shared(results, params, "logSoftmax"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul.cpp index c5508e28f052c8..ab38ccb19510c0 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul.cpp @@ -164,8 +164,7 @@ class MatMulLayerCPUTest : public testing::WithParamInterface(matrixB)); } - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); - auto matMul = builder::makeMatMul(paramOuts[0], matrixB, transpA, transpB); + auto matMul = builder::makeMatMul(params[0], matrixB, transpA, transpB); function = makeNgraphFunction(netType, params, matMul, cpuNodeType); checkFusingPosition = false; } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp index cf62975e3fd14a..70f4d833d66f5a 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul_sparse.cpp @@ -190,12 +190,11 @@ class MatMulSparseCPUTest : public testing::WithParamInterface(inType, inShapeA)}; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); auto matrixB = builder::makeDynamicInputLayer(element::f32, helpers::InputLayerType::CONSTANT, inShapeB); auto weiData = generateSparseVector(ngraph::shape_size(inShapeB.get_shape()), weiSparseRate); - auto matMul = makeMatMulRelaxed(paramOuts[0], inShapeB, weiType, transpA, transpB, weiData); + auto matMul = makeMatMulRelaxed(params[0], inShapeB, weiType, transpA, transpB, weiData); function = makeNgraphFunction(element::f32, params, matMul, cpuNodeType); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp index 84f8c4b4740b22..6ebc9368e1433c 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/one_hot.cpp @@ -137,10 +137,9 @@ class OneHotLayerCPUTest : public testing::WithParamInterface(params)); auto on_value_const = std::make_shared(outType, ngraph::Shape{ }, OnValue); auto off_value_const = std::make_shared(outType, ngraph::Shape{ }, OffValue); - auto oneHot = std::make_shared(paramOuts[0], depth, on_value_const, off_value_const, Axis); + auto oneHot = std::make_shared(params[0], depth, on_value_const, off_value_const, Axis); return makeNgraphFunction(ngraph::element::i32, params, oneHot, "OneHot"); } void generateDepth() { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/proposal.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/proposal.cpp index 7bb8cae14153fb..03240dcfdebafb 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/proposal.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/proposal.cpp @@ -143,7 +143,6 @@ class ProposalLayerCPUTest : public testing::WithParamInterface(ngPrc, shape)); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); ngraph::op::ProposalAttrs attrs; attrs.base_size = base_size; @@ -162,7 +161,7 @@ class ProposalLayerCPUTest : public testing::WithParamInterface(paramOuts[0], paramOuts[1], paramOuts[2], attrs); + auto proposal = std::make_shared(params[0], params[1], params[2], attrs); ngraph::ResultVector results{ std::make_shared(proposal->output(0)), diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/roi_pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/roi_pooling.cpp index c5d3047452a122..3b07b93e45e572 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/roi_pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/roi_pooling.cpp @@ -203,13 +203,10 @@ class ROIPoolingCPULayerTest : public testing::WithParamInterface(ngPrc, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], paramOuts[1], poolShape, spatial_scale, pool_method); + auto roi_pooling = ngraph::builder::makeROIPooling(params[0], params[1], poolShape, spatial_scale, pool_method); ngraph::ResultVector results{std::make_shared(roi_pooling)}; function = makeNgraphFunction(ngPrc, params, roi_pooling, "ROIPooling"); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/select.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/select.cpp index 9cacb90f9704c9..84b6a9d19adbdb 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/select.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/select.cpp @@ -66,8 +66,7 @@ class SelectLayerCPUTest : public testing::WithParamInterface, auto param_node = std::make_shared(types[i], inputDynamicShapes[i]); parameters.push_back(param_node); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(parameters)); - auto select = ngraph::builder::makeSelect(paramOuts, broadcast); + auto select = std::make_shared(parameters[0], parameters[1], parameters[2], broadcast); function = makeNgraphFunction(precision, parameters, select, "Eltwise"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/shapeof.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/shapeof.cpp index 457a2bdfa63b22..93e5f614894d88 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/shapeof.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/shapeof.cpp @@ -69,11 +69,10 @@ class ShapeOfLayerCPUTest : public testing::WithParamInterface(inType, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto shapeOf = std::make_shared(paramOuts[0], ngraph::element::i32); + + auto shapeOf = std::make_shared(params[0], ngraph::element::i32); function = makeNgraphFunction(netPrecision, params, shapeOf, "ShapeOf"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp index bd0493d19af7f4..616474d3a4c299 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp @@ -107,8 +107,7 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface(ngPrec, inputDynamicShapes.front())}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - paramShape = {paramOuts[0].get_partial_shape().size()}; + paramShape = {params[0]->get_partial_shape().size()}; std::shared_ptr in2, in3, in4; auto blockShapeParam = std::make_shared(ov::element::i64, paramShape); @@ -121,7 +120,7 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface(paramOuts[0], in2, in3, in4); + auto s2b = std::make_shared(params[0], in2, in3, in4); function = makeNgraphFunction(inType, params, s2b, "SpaceToBatchCPU"); } }; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp index ff423786fd4234..ae27af687e7875 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/split.cpp @@ -71,12 +71,10 @@ class SplitLayerCPUTest : public testing::WithParamInterface init_input_shapes({inputShapes}); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto split = std::dynamic_pointer_cast(ngraph::builder::makeSplit(paramOuts[0], + + auto split = std::dynamic_pointer_cast(ngraph::builder::makeSplit(params[0], netPrecision, numSplits, axis)); ngraph::ResultVector results; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp index 32bb1ebc9376cf..a0a68a6f85a2e4 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/tile.cpp @@ -99,13 +99,12 @@ class TileLayerCPUTest : public testing::WithParamInterfaceset_friendly_name("data"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); std::shared_ptr tileNode; if (isRepeatsConst) { - tileNode = std::make_shared(paramOuts[0], + tileNode = std::make_shared(functionParams[0], ov::op::v0::Constant::create(ov::element::i64, { repeatsData.size() }, repeatsData)); } else { - tileNode = std::make_shared(paramOuts[0], paramOuts[1]); + tileNode = std::make_shared(functionParams[0], functionParams[1]); } function = makeNgraphFunction(netPrecision, functionParams, tileNode, "CPUTile"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp index 277a799ba7b40e..866cdbb9a3fcd8 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/unique.cpp @@ -99,13 +99,12 @@ class UniqueLayerTestCPU : public testing::WithParamInterface(dataPrecision, shape)); } params[0]->set_friendly_name("data"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::shared_ptr uniqueNode; if (flattened) { - uniqueNode = std::make_shared(paramOuts[0], sorted); + uniqueNode = std::make_shared(params[0], sorted); } else { axis = std::get<1>(flatOrAxis); - uniqueNode = std::make_shared(paramOuts[0], + uniqueNode = std::make_shared(params[0], ov::op::v0::Constant::create(ov::element::i64, ov::Shape({1}), {axis}), sorted); } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/add_convert_to_reorder.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/add_convert_to_reorder.cpp index 8be11f4a277fb7..645b85e9816ea0 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/add_convert_to_reorder.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/add_convert_to_reorder.cpp @@ -28,11 +28,9 @@ class AddConvertToReorderTest : virtual public LayerTestsUtils::LayerTestsCommon << "Indices vector size and provided indices shape doesn't fit each other"; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto indicesNode = ngraph::opset3::Constant::create(secondConstantType, ngraph::Shape(indicesShape), indices); auto axisNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}); - auto gather = std::make_shared(paramOuts[0], indicesNode, axisNode); + auto gather = std::make_shared(params[0], indicesNode, axisNode); ngraph::ResultVector results{std::make_shared(gather)}; function = std::make_shared(results, params, "gather"); } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/align_matmul_input_ranks.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/align_matmul_input_ranks.cpp index c5cca71accb4a4..a29927ec6e5195 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/align_matmul_input_ranks.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/align_matmul_input_ranks.cpp @@ -56,9 +56,7 @@ class AlignMatMulInputRanksTest : public testing::WithParamInterface(ngPrec, ov::Shape(inShapes.first)), std::make_shared(ngPrec, ov::Shape(inShapes.second))}; - - const auto outputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes(inputParams)); - const auto matMul = builder::makeMatMul(outputNodes[0], outputNodes[1], false, false); + const auto matMul = builder::makeMatMul(inputParams[0], inputParams[1], false, false); selectedType = makeSelectedTypeStr(with_cpu_x86_avx512_core() ? "brgemm_avx512" : "jit_gemm", ngPrec); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv.cpp index 763251cf5e5e7d..48aea1512428e5 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv.cpp @@ -62,8 +62,7 @@ class GroupConvToConvTransformationCPUTest: public testing::WithParamInterface(ngraph::element::f32, shape)); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - conv = builder::makeGroupConvolution(paramOuts.front(), element::f32, kernelSize, strides, padBegin, padEnd, dilation, + conv = builder::makeGroupConvolution(inputParams[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels, numOfGroups); ResultVector results; diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv1d.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv1d.cpp index e498ce930741d1..79a21d4c8bd854 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv1d.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_group_conv1d.cpp @@ -67,15 +67,14 @@ class Conv1dConvertTransformationCPUTest: public testing::WithParamInterface(ngraph::element::f32, shape)); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); switch (convType) { case nodeType::convolution : { - conv = builder::makeConvolution(paramOuts.front(), element::f32, kernelSize, strides, padBegin, padEnd, dilation, + conv = builder::makeConvolution(inputParams[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels); break; } case nodeType::groupConvolution : { - conv = builder::makeGroupConvolution(paramOuts.front(), element::f32, kernelSize, strides, padBegin, padEnd, dilation, + conv = builder::makeGroupConvolution(inputParams[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels, numOfGroups); break; } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_reduce_multi_axis.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_reduce_multi_axis.cpp index 5d6424d15f611f..3bb5a06c7d9f2c 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_reduce_multi_axis.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/arm/convert_reduce_multi_axis.cpp @@ -67,13 +67,12 @@ class reduceTransformationCPUTest: public testing::WithParamInterface(ngraph::element::f32, shape)); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector shapeAxes; shapeAxes.push_back(axes.size()); auto reductionAxesNode = std::dynamic_pointer_cast( std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); - const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType); + const auto reduce = ngraph::builder::makeReduce(params[0], reductionAxesNode, keepDims, reductionType); function = makeNgraphFunction(ElementType::f32, params, reduce, "Reduce"); } private: diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv3d_reshape.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv3d_reshape.cpp index e7b1feb6b41844..4945a75af3eee3 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv3d_reshape.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv3d_reshape.cpp @@ -42,7 +42,6 @@ class Conv3dReshapeTest : public testing::WithParamInterface(ov::element::f32, ov::Shape{1, 1024, 64})}; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(inputParams)); std::shared_ptr conv; const std::vector kernelSize = {1}; @@ -55,11 +54,11 @@ class Conv3dReshapeTest : public testing::WithParamInterface(ov::element::f32, ov::Shape(inputShapes)), std::make_shared(ov::element::f32, ov::Shape(inputShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); std::vector> convolutionNodes(2); switch (type) { case nodeType::convolution : { for (size_t conv = 0; conv < convolutionNodes.size(); conv++) { - convolutionNodes[conv] = ngraph::builder::makeConvolution(paramOuts[conv], ngraph::element::f32, kernelSize, strides, padBegin, + convolutionNodes[conv] = ngraph::builder::makeConvolution(inputParams[conv], ngraph::element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels); } break; } case nodeType::convolutionBackpropData : { for (size_t conv = 0; conv < convolutionNodes.size(); conv++) { - convolutionNodes[conv] = ngraph::builder::makeConvolutionBackpropData(paramOuts[conv], ngraph::element::f32, kernelSize, strides, padBegin, + convolutionNodes[conv] = ngraph::builder::makeConvolutionBackpropData(inputParams[conv], ngraph::element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels); } break; } case nodeType::groupConvolution : { for (size_t conv = 0; conv < convolutionNodes.size(); conv++) { - convolutionNodes[conv] = ngraph::builder::makeGroupConvolution(paramOuts[conv], ngraph::element::f32, kernelSize, strides, padBegin, + convolutionNodes[conv] = ngraph::builder::makeGroupConvolution(inputParams[conv], ngraph::element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels, numOfGroups); } break; } case nodeType::groupConvolutionBackpropData : { for (size_t conv = 0; conv < convolutionNodes.size(); conv++) { - convolutionNodes[conv] = ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[conv], ngraph::element::f32, kernelSize, strides, padBegin, - padEnd, dilation, paddingType, numOutChannels, numOfGroups); + convolutionNodes[conv] = ngraph::builder::makeGroupConvolutionBackpropData(inputParams[conv], ngraph::element::f32, kernelSize, + strides, padBegin, padEnd, dilation, paddingType, + numOutChannels, numOfGroups); } break; } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_maxpool_activ.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_maxpool_activ.cpp index cca2a8ceb69f05..b9c2a1151cfba6 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_maxpool_activ.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_maxpool_activ.cpp @@ -33,7 +33,6 @@ class ConvPoolActivTest : public testing::WithParamInterface(ov::element::f32, ov::Shape{1, 3, 40, 40})}; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(inputParams)); std::shared_ptr conv; { @@ -44,7 +43,7 @@ class ConvPoolActivTest : public testing::WithParamInterface dilation = {1, 1}; const size_t numOutChannels = 16; const op::PadType paddingType = op::PadType::EXPLICIT; - conv = builder::makeConvolution(paramOuts[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels); + conv = builder::makeConvolution(inputParams[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels); } std::shared_ptr pooling; { diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp index d5131f3d524d6d..30b8d15950d80e 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp @@ -51,8 +51,6 @@ void ConvWithZeroPointFuseSubgraphTest::SetUp() { {-12.8f}, {12.7f}); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - std::vector> branches(2); { ngraph::Strides strides{1, 1}; diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convert_fq_rnn_to_quantized_rnn.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convert_fq_rnn_to_quantized_rnn.cpp index 030cb879234608..438cffa8b2b88a 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convert_fq_rnn_to_quantized_rnn.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convert_fq_rnn_to_quantized_rnn.cpp @@ -113,11 +113,8 @@ class ConvertFqRnnToQuantizedRnn : public testing::WithParamInterface H; ov::ParameterVector inputParams; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) inputParams.push_back(std::make_shared(ngPrec, shape)); - } - - const auto outputNodes = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); auto makeDataFQ = [](const ngraph::Output& input) { const auto fqLevels = 256; @@ -126,10 +123,10 @@ class ConvertFqRnnToQuantizedRnn : public testing::WithParamInterface( X_FQ, H, C, seq_lengths, W_FQ, R_FQ, B, hiddenSize, op::RecurrentSequenceDirection::FORWARD); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convs_and_sums.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convs_and_sums.cpp index fa84673b89da93..7b4df345dc1e46 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convs_and_sums.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/convs_and_sums.cpp @@ -38,16 +38,15 @@ class ConvsAndSums : virtual public LayerTestsUtils::LayerTestsCommon { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape{1, 512, 32}), std::make_shared(ngPrc, ov::Shape{1, 128, 32})}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto FQ = ngraph::builder::makeFakeQuantize(paramOuts[1], ngPrc, 256, {}, {-2.8215785026550293}, {2.799535036087036}, + auto FQ = ngraph::builder::makeFakeQuantize(params[1], ngPrc, 256, {}, {-2.8215785026550293}, {2.799535036087036}, {-2.8215785026550293}, {2.799535036087036}); - auto FQ_0 = ngraph::builder::makeFakeQuantize(paramOuts[1], ngPrc, 256, {}, {-5.031249523162842}, {4.991942882537842}, + auto FQ_0 = ngraph::builder::makeFakeQuantize(params[1], ngPrc, 256, {}, {-5.031249523162842}, {4.991942882537842}, {-5.031249523162842}, {4.991942882537842}); auto Add_0 = ngraph::builder::makeEltwise(FQ_0, FQ, EltwiseTypes::ADD); - auto FQ_1 = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, 256, {}, {-2.122633457183838}, {2.106050491333008}, + auto FQ_1 = ngraph::builder::makeFakeQuantize(params[0], ngPrc, 256, {}, {-2.122633457183838}, {2.106050491333008}, {-2.122633457183838}, {2.106050491333008}); auto Const = ngraph::builder::makeConstant(ngPrc, {128, 512, 1}, std::vector{-0.0512377955019474}, false); @@ -58,7 +57,7 @@ class ConvsAndSums : virtual public LayerTestsUtils::LayerTestsCommon { auto Add = ngraph::builder::makeEltwise(Add_0, Conv, EltwiseTypes::ADD); - auto FQ_11 = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, 256, {}, {-3.2050728797912598}, {3.1800332069396973}, + auto FQ_11 = ngraph::builder::makeFakeQuantize(params[0], ngPrc, 256, {}, {-3.2050728797912598}, {3.1800332069396973}, {-3.2050728797912598}, {3.1800332069396973}); auto Const_ = ngraph::builder::makeConstant(ngPrc, {128, 512, 1}, std::vector{-0.001183388871140778}, false); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_fused_with_ss.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_fused_with_ss.cpp index 0e92f4b4d6ef1a..3af1ab6d795c1b 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_fused_with_ss.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_fused_with_ss.cpp @@ -47,9 +47,8 @@ class FQScaleshiftWithConstantShiftTest : public testing::WithParamInterface(ngPrec, mmShape)}; - const auto mmOutputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes(mmParams)); - const auto mm = builder::makeMatMul(mmOutputNodes[0], mmConst, false, false); + const auto mm = builder::makeMatMul(mmParams[0], mmConst, false, false); auto sum = ngraph::builder::makeEltwise(constShift, mm, ngraph::helpers::EltwiseTypes::ADD); auto fq = ngraph::builder::makeFakeQuantize(sum, ngraph::element::f32, 256, {}, {-8.0f}, {7.0f}, {-8.0f}, {7.0f}); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fullyconnected_strided_inputs_outputs.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fullyconnected_strided_inputs_outputs.cpp index 070ab78d7821d2..be89ca0cafab4c 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fullyconnected_strided_inputs_outputs.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fullyconnected_strided_inputs_outputs.cpp @@ -48,9 +48,8 @@ class FullyConnectedStridedInputsOutputsTest : public testing::WithParamInterfac ov::ParameterVector params {std::make_shared(ngPrec, ov::Shape(splitShape))}; - const auto splitOutputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); const auto splitAxis = rank == 3 ? 1 : 0; - const auto split = builder::makeSplit(splitOutputNodes[0], ngPrec, 2 /* splits */, splitAxis); + const auto split = builder::makeSplit(params[0], ngPrec, 2 /* splits */, splitAxis); SizeVector fcWeightsShape{16, 8}; if (rank == 3) bcastTo3D(fcWeightsShape); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp index 06e4624ad778f4..e041cbb288704f 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_decompress_convert.cpp @@ -209,7 +209,6 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface(inType, inShapeA)}; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); std::shared_ptr inputB = builder::makeConstant(weiConstElemType, inShapeB.get_shape(), {}, true); if (weiConstElemType == ElementType::f16) { inputB = std::make_shared(inputB, convertOutType); @@ -217,7 +216,7 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface(inType, shape)); } - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); std::shared_ptr inputWeights = builder::makeConstant(weiConstElemType, inShapeWeights.get_shape(), {}, true); if (weiConstElemType == ElementType::f16) { inputWeights = std::make_shared(inputWeights, convertOutType); @@ -503,8 +501,8 @@ class MatMulDecompressConvertTest2 : public MatMulDecompressConvertTest { // In this test, convert must be folded on the ngraph side, so the constant with fp32 precision is expected expectedWeiConstElemType = ElementType::f32; - auto matMul0 = builder::makeMatMul(paramOuts[0], inputWeights, transpA, transpB); - auto matMul1 = builder::makeMatMul(paramOuts[1], inputWeights, transpA, transpB); + auto matMul0 = builder::makeMatMul(params[0], inputWeights, transpA, transpB); + auto matMul1 = builder::makeMatMul(params[1], inputWeights, transpA, transpB); auto concat = builder::makeConcat({matMul0, matMul1}, 0); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_strided_inputs_outputs.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_strided_inputs_outputs.cpp index c5c1a43fbd8971..5a20c437d360ba 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_strided_inputs_outputs.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_strided_inputs_outputs.cpp @@ -36,8 +36,7 @@ class MatmulStridedInputsOutputsTest : public testing::WithParamInterface(ngPrec, ov::Shape(splitShape))}; - const auto splitOutputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes(splitInputParams)); - const auto split = builder::makeSplit(splitOutputNodes[0], ngPrec, 2 /* splits */, 1 /* 2nd axis */); + const auto split = builder::makeSplit(splitInputParams[0], ngPrec, 2 /* splits */, 1 /* 2nd axis */); std::vector concatShapes{{1, 1, 8, 8}, {1, 1, 8, 8}}; ov::ParameterVector concatInputParams {std::make_shared(ngPrec, concatShapes[0]), @@ -49,9 +48,8 @@ class MatmulStridedInputsOutputsTest : public testing::WithParamInterface(ngPrec, ov::Shape(matmulShape))}; - const auto matmulOutputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes(matmulInputParams)); - const auto matMul2 = builder::makeMatMul(split->output(1), matmulOutputNodes[0], false, false); + const auto matMul2 = builder::makeMatMul(split->output(1), matmulInputParams[0], false, false); const auto concatMatMuls = builder::makeConcat({matMul1, matMul2}, 2 /* 3rd axis */); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/not_fused_conv_simple_op.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/not_fused_conv_simple_op.cpp index 8cc7569233aab7..4229e15282648d 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/not_fused_conv_simple_op.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/not_fused_conv_simple_op.cpp @@ -17,7 +17,6 @@ class NotFusedConvSimpleOp : virtual public LayerTestsUtils::LayerTestsCommon { ov::ParameterVector inputParams{std::make_shared(ov::element::f32, ov::Shape{1, 3, 12, 9}), std::make_shared(ov::element::f32, ov::Shape{1, 16, 12, 9})}; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(inputParams)); std::shared_ptr conv; { @@ -28,12 +27,12 @@ class NotFusedConvSimpleOp : virtual public LayerTestsUtils::LayerTestsCommon { const std::vector dilation = {1, 1}; const size_t numOutChannels = 16; const op::PadType paddingType = op::PadType::EXPLICIT; - conv = builder::makeConvolution(paramOuts[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels); + conv = builder::makeConvolution(inputParams[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels); } const auto sharedNode = builder::makeConstant(element::f32, {1, 16, 1, 1}, std::vector{}, true); const auto postOpCandidate = builder::makeEltwise(conv, sharedNode, EltwiseTypes::ADD); - const auto secondConsumpt = builder::makeEltwise(paramOuts[1], sharedNode, EltwiseTypes::ADD); + const auto secondConsumpt = builder::makeEltwise(inputParams[1], sharedNode, EltwiseTypes::ADD); NodeVector results{postOpCandidate, secondConsumpt}; function = std::make_shared(results, inputParams, "NotFusedConvSimpleOp"); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/split_matmul_concat.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/split_matmul_concat.cpp index 2a57e1cc4133c6..aeaa041df6c09f 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/split_matmul_concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/split_matmul_concat.cpp @@ -107,10 +107,9 @@ class SplitMatMulConcatTest : public testing::WithParamInterface(ElementType::f32, inShapeA)}; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); std::shared_ptr inputB = builder::makeConstant(ElementType::f32, inShapeB.get_shape(), {}, true); - auto split = builder::makeVariadicSplit(paramOuts[0], {1, 1}, 0); + auto split = builder::makeVariadicSplit(params[0], {1, 1}, 0); auto matMul = builder::makeMatMul(split->output(0), inputB, transpA, transpB); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/tile_with_two_output_edges.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/tile_with_two_output_edges.cpp index 8fdfbff08273e1..00a8b168a214c4 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/tile_with_two_output_edges.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/tile_with_two_output_edges.cpp @@ -17,9 +17,8 @@ class TileWithTwoOutputEdges : public LayerTestsUtils::LayerTestsCommon { auto ngPrc = element::f32; ov::ParameterVector inputParams {std::make_shared(ngPrc, ov::Shape{1, 3, 12, 9})}; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(inputParams)); - auto tile = ngraph::builder::makeTile(paramOuts[0], std::vector{1, 2, 1, 1}); + auto tile = ngraph::builder::makeTile(inputParams[0], std::vector{1, 2, 1, 1}); const auto const1 = ngraph::builder::makeConstant(ngPrc, std::vector{1, 6, 1, 1}, std::vector{}, true); const auto const2 = ngraph::builder::makeConstant(ngPrc, std::vector{1, 6, 1, 1}, std::vector{}, true); diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp b/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp index 32d22393bef15d..d802e3b550f9d3 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp @@ -362,9 +362,7 @@ const auto fusingSum = fusingSpecificParams{std::make_shared(std:: auto shape = cfg.input->get_output_partial_shape(0); ov::ParameterVector newParams{std::make_shared(cfg.type, shape)}; cfg.params.insert(cfg.params.end(), newParams.begin(), newParams.end()); - auto newParamOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(newParams)); - return std::make_shared(cfg.input, newParamOuts[0]); + return std::make_shared(cfg.input, newParams[0]); }, "Add(Parameters)"}}), {"Add"}}; const auto fusingSumEluFQ = fusingSpecificParams{std::make_shared(std::vector{ @@ -372,9 +370,7 @@ const auto fusingSumEluFQ = fusingSpecificParams{std::make_shared( auto shape = cfg.input->get_output_partial_shape(0); ov::ParameterVector newParams{std::make_shared(cfg.type, shape)}; cfg.params.insert(cfg.params.end(), newParams.begin(), newParams.end()); - auto newParamOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(newParams)); - return std::make_shared(cfg.input, newParamOuts[0]); + return std::make_shared(cfg.input, newParams[0]); }, "Add(Parameters)"}, {[](postNodeConfig& cfg){ return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Elu, {}, {2.0f}); diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp index e6b35e835657d5..e3c570c393aad9 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp @@ -103,14 +103,13 @@ void ConvolutionLayerTestFixture::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector filter_weights; auto filter_size = std::accumulate(std::begin(kernel), std::end(kernel), 1, std::multiplies()); filter_weights = ov::test::utils::generate_float_numbers(convOutChannels * inputShape[1] * filter_size, -0.1f, 0.1f); - auto conv = std::dynamic_pointer_cast(ngraph::builder::makeConvolution(paramOuts[0], + auto conv = std::dynamic_pointer_cast(ngraph::builder::makeConvolution(params[0], ngPrc, kernel, stride, diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp index af9dab4bdd8beb..2efdb5d1ef3b65 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_cell.cpp @@ -58,7 +58,6 @@ class GRUCellGNATest : public GRUCellTest { ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShapes[0])), std::make_shared(ngPrc, ov::Shape(inputShapes[1]))}; std::vector WRB = {inputShapes[2], inputShapes[3], inputShapes[4]}; - auto in = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector weights_vals = ov::test::utils::generate_float_numbers(ngraph::shape_size(WRB[0]), -0.0001f, 0.0001f); std::vector reccurrenceWeights_vals = @@ -70,8 +69,8 @@ class GRUCellGNATest : public GRUCellTest { auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, WRB[1], reccurrenceWeights_vals); auto biasNode = ngraph::builder::makeConstant(ngPrc, WRB[2], bias_vals); - auto gru_cell = std::make_shared(in[0], - in[1], + auto gru_cell = std::make_shared(params[0], + params[1], weightsNode, reccurrenceWeightsNode, biasNode, diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp index 9dd9e6e4383d9a..598ca5167420bb 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/gru_sequence.cpp @@ -62,7 +62,6 @@ class GRUSequenceGNATest : public GRUSequenceTest { std::vector WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]}; - auto in = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector weights_vals = ov::test::utils::generate_float_numbers(ngraph::shape_size(WRB[0]), -0.0001f, 0.0001f); std::vector reccurrenceWeights_vals = @@ -74,13 +73,13 @@ class GRUSequenceGNATest : public GRUSequenceTest { auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, WRB[1], reccurrenceWeights_vals); auto biasNode = ngraph::builder::makeConstant(ngPrc, WRB[2], bias_vals); - std::vector lengths(in[0].get_partial_shape()[0].get_min_length(), - in[0].get_partial_shape()[1].get_min_length()); + std::vector lengths(params[0]->get_partial_shape()[0].get_min_length(), + params[0]->get_partial_shape()[1].get_min_length()); std::shared_ptr seq_length = ngraph::builder::makeConstant(ngraph::element::i64, WRB[3], lengths, false); - auto gru_sequence = std::make_shared(in[0], - in[1], + auto gru_sequence = std::make_shared(params[0], + params[1], seq_length, weightsNode, reccurrenceWeightsNode, diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp index 7e519c3f7d1bc2..0b7ea4daa080c8 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/lstm_sequence.cpp @@ -61,7 +61,6 @@ class LSTMSequenceGNATest : public LSTMSequenceTest { std::make_shared(ngPrc, ov::Shape(inputShapes[2]))}; std::vector WRB = {inputShapes[4], inputShapes[5], inputShapes[6], inputShapes[3]}; - auto in = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector weights_vals = ov::test::utils::generate_float_numbers(ngraph::shape_size(WRB[0]), -0.0001f, 0.0001f); @@ -74,13 +73,13 @@ class LSTMSequenceGNATest : public LSTMSequenceTest { auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, WRB[1], reccurrenceWeights_vals); auto biasNode = ngraph::builder::makeConstant(ngPrc, WRB[2], bias_vals); - std::vector lengths(in[0].get_partial_shape()[0].get_min_length(), - in[0].get_partial_shape()[1].get_min_length()); + std::vector lengths(params[0]->get_partial_shape()[0].get_min_length(), + params[0]->get_partial_shape()[1].get_min_length()); std::shared_ptr seq_length = ngraph::builder::makeConstant(ngraph::element::i64, WRB[3], lengths, false); - auto lstm_sequence = std::make_shared(in[0], - in[1], - in[2], + auto lstm_sequence = std::make_shared(params[0], + params[1], + params[2], seq_length, weightsNode, reccurrenceWeightsNode, diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp index 100c33bb788a21..248f0b72a0f9a9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp @@ -74,11 +74,10 @@ void TopKLayerTestGPU::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramIn = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto k = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK); auto topk = std::dynamic_pointer_cast( - std::make_shared(paramIn[0], k, axis, mode, sort, ngraph::element::Type_t::i64, stable)); + std::make_shared(params[0], k, axis, mode, sort, ngraph::element::Type_t::i64, stable)); ngraph::ResultVector results; for (size_t i = 0; i < topk->get_output_size(); i++) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp index de10fba3cfc13f..ca0c19bd3f3c54 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/convolution.cpp @@ -85,12 +85,10 @@ class ConvolutionLayerGPUTest : public testing::WithParamInterface(inType, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - auto convolutionNode = ngraph::builder::makeConvolution(paramOuts.front(), netType, kernel, stride, padBegin, + auto convolutionNode = ngraph::builder::makeConvolution(inputParams.front(), netType, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels); ngraph::ResultVector results; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/broadcast.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/broadcast.cpp index 580a67383f3aba..8b7c750756b11f 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/broadcast.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/broadcast.cpp @@ -109,8 +109,6 @@ class BroadcastLayerGPUTest : public testing::WithParamInterfaceset_friendly_name("data"); - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(functionParams)); - std::shared_ptr broadcastOp; if (mode == ov::op::BroadcastType::EXPLICIT) { std::shared_ptr targetShapeOp; @@ -125,19 +123,19 @@ class BroadcastLayerGPUTest : public testing::WithParamInterface 2 ? functionParams[2] : functionParams[1]; } - broadcastOp = std::make_shared(paramOuts[0], + broadcastOp = std::make_shared(functionParams[0], targetShapeOp, axesMappingOp, mode); } else if (mode == ov::op::BroadcastType::NUMPY) { if (isTargetShapeConst) { auto targetShapeConst = ov::op::v0::Constant::create(ov::element::i64, {targetShapeRank}, targetShape); - broadcastOp = std::make_shared(paramOuts[0], + broadcastOp = std::make_shared(functionParams[0], targetShapeConst, mode); } else { - broadcastOp = std::make_shared(paramOuts[0], - paramOuts[1], + broadcastOp = std::make_shared(functionParams[0], + functionParams[1], mode); } } diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp index a3c84e5cd517ab..cf9ae70ee7f518 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp @@ -96,12 +96,10 @@ class ConvolutionLayerGPUTestDynamic : public testing::WithParamInterface(inType, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - auto convolutionNode = ngraph::builder::makeConvolution(paramOuts.front(), netType, kernel, stride, padBegin, + auto convolutionNode = ngraph::builder::makeConvolution(inputParams.front(), netType, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels); if (activationFusing) { auto activationNode = ngraph::builder::makeActivation(convolutionNode, netType, ngraph::helpers::ActivationTypes::Relu); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp index a5dfc13c5c8dcb..e58f749c93e964 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp @@ -196,24 +196,23 @@ class DetectionOutputLayerGPUTest : public testing::WithParamInterface(ngraph::element::f32, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); if (attrs.num_classes == -1) { std::shared_ptr detOut; - if (paramOuts.size() == 3) - detOut = std::make_shared(paramOuts[0], paramOuts[1], paramOuts[2], attrs); - else if (paramOuts.size() == 5) - detOut = std::make_shared(paramOuts[0], paramOuts[1], paramOuts[2], paramOuts[3], paramOuts[4], attrs); + if (params.size() == 3) + detOut = std::make_shared(params[0], params[1], params[2], attrs); + else if (params.size() == 5) + detOut = std::make_shared(params[0], params[1], params[2], params[3], params[4], attrs); else throw std::runtime_error("DetectionOutput layer supports only 3 or 5 inputs"); ngraph::ResultVector results{std::make_shared(detOut)}; function = std::make_shared(results, params, "DetectionOutputDynamic"); } else { + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs); ngraph::ResultVector results{std::make_shared(detOut)}; function = std::make_shared(results, params, "DetectionOutputDynamic"); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather.cpp index b97dd9927002e8..bdda5ccbe2a947 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather.cpp @@ -119,14 +119,11 @@ class GatherGPUTest : public testing::WithParamInterface, params.back()->set_friendly_name("axis"); } - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - - gatherNode = std::make_shared(paramOuts[0], - isIndicesConstant ? indicesNode : paramOuts[1], + gatherNode = std::make_shared(params[0], + isIndicesConstant ? indicesNode : params[1], isAxisConstant ? axisNode - : isIndicesConstant ? paramOuts[1] - : paramOuts[2], + : isIndicesConstant ? params[1] + : params[2], batchDims); ngraph::ResultVector results{std::make_shared(gatherNode)}; function = std::make_shared(results, params, "Gather"); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather_nd.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather_nd.cpp index cdd9ace992225f..f3d98ff82decd8 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather_nd.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/gather_nd.cpp @@ -100,11 +100,8 @@ class GatherNDGPUTest : public testing::WithParamInterfaceset_friendly_name("indices"); } - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - - gather_ndNode = std::make_shared(paramOuts[0], - isIndicesConstant ? indicesNode : paramOuts[1], + gather_ndNode = std::make_shared(params[0], + isIndicesConstant ? indicesNode : params[1], batchDims); ngraph::ResultVector results{std::make_shared(gather_ndNode)}; function = std::make_shared(results, params, "GatherND"); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/grid_sample.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/grid_sample.cpp index fb9c60318be8b2..11862e3d42cbb2 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/grid_sample.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/grid_sample.cpp @@ -83,9 +83,8 @@ class GridSampleLayerTestGPU : public testing::WithParamInterface(gridPrecision, inputDynamicShapes[1])}; params[0]->set_friendly_name("data"); params[1]->set_friendly_name("grid"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); GridSample::Attributes attributes = {alignCorners, interpolateMode, paddingMode}; - auto gridSampleNode = std::make_shared(paramOuts[0], paramOuts[1], attributes); + auto gridSampleNode = std::make_shared(params[0], params[1], attributes); ngraph::ResultVector results; for (size_t i = 0; i < gridSampleNode->get_output_size(); i++) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/groupconvolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/groupconvolution.cpp index e47866f1f5709a..01fad8fc9f6817 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/groupconvolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/groupconvolution.cpp @@ -87,12 +87,10 @@ class GroupConvolutionLayerGPUTestDynamic : public testing::WithParamInterface(inType, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - auto groupConvolutionNode = ngraph::builder::makeGroupConvolution(paramOuts.front(), netType, kernel, stride, padBegin, + auto groupConvolutionNode = ngraph::builder::makeGroupConvolution(inputParams.front(), netType, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, numGroups); ngraph::ResultVector results; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp index 8965a42ee35dc9..45348753685039 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/matmul.cpp @@ -122,8 +122,7 @@ class MatMulLayerGPUTest : public testing::WithParamInterface(matrixB)); } - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); - auto matMul = builder::makeMatMul(paramOuts[0], matrixB, transpA, transpB); + auto matMul = builder::makeMatMul(params[0], matrixB, transpA, transpB); auto makeFunction = [](const ngraph::element::Type &ngPrc, ngraph::ParameterVector ¶ms, const std::shared_ptr &lastNode) { ngraph::ResultVector results; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/mvn.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/mvn.cpp index 2dee03efde16dd..4d58cb59f848bb 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/mvn.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/mvn.cpp @@ -72,12 +72,11 @@ class MvnLayerGPUTest : public testing::WithParamInterface(netPrecision, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + auto axesNode = ngraph::builder::makeConstant(axesType, ngraph::Shape{axes.size()}, axes); - auto mvn = ngraph::builder::makeMVN6(paramOuts[0], axesNode, normalizeVariance, eps, eps_mode); + auto mvn = ngraph::builder::makeMVN6(params[0], axesNode, normalizeVariance, eps, eps_mode); rel_threshold = 0.015f; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/normalize_l2.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/normalize_l2.cpp index 7d4adb660fe18f..f955ef243a23e8 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/normalize_l2.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/normalize_l2.cpp @@ -56,11 +56,10 @@ class NormalizeL2LayerGPUTest : public testing::WithParamInterface(netPrecision, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto normalize = ngraph::builder::makeNormalizeL2(paramOuts[0], axes, eps, epsMode); + + auto normalize = ngraph::builder::makeNormalizeL2(params[0], axes, eps, epsMode); ngraph::ResultVector results{std::make_shared(normalize)}; function = std::make_shared(results, params, "NormalizeL2"); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/pad.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/pad.cpp index d219b693016587..4a30f042df0226 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/pad.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/pad.cpp @@ -121,8 +121,7 @@ class PadLayerGPUTest : public testing::WithParamInterface(inType, ngraph::Shape{}, &argPadValue); } - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(functionParams)); - auto pad = std::make_shared(paramOuts[0], pads_begin, pads_end, arg_pad_value, padMode); + auto pad = std::make_shared(functionParams[0], pads_begin, pads_end, arg_pad_value, padMode); ngraph::ResultVector results; for (size_t i = 0; i < pad->get_output_size(); ++i) { diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/prior_box.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/prior_box.cpp index 9c22ee657eedde..6c3ad3f168f4cb 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/prior_box.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/prior_box.cpp @@ -95,13 +95,11 @@ class PriorBoxLayerGPUTest : public testing::WithParamInterface(inType, shape)); - } - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(functionParams)); - auto shapeOfOp1 = std::make_shared(paramOuts[0], element::i32); - auto shapeOfOp2 = std::make_shared(paramOuts[1], element::i32); + auto shapeOfOp1 = std::make_shared(functionParams[0], element::i32); + auto shapeOfOp2 = std::make_shared(functionParams[1], element::i32); auto stridedSliceOp1 = ngraph::builder::makeStridedSlice(shapeOfOp1, beginInput, endInput, strideInput, element::i32, diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/reduce.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/reduce.cpp index 1f13d7998a6f20..e4ae7b23381b00 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/reduce.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/reduce.cpp @@ -78,11 +78,8 @@ class ReduceLayerGPUTest : public testing::WithParamInterface(netPrecision, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); std::vector shapeAxes; shapeAxes.push_back(axes.size()); @@ -90,7 +87,7 @@ class ReduceLayerGPUTest : public testing::WithParamInterface( std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); - const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType); + const auto reduce = ngraph::builder::makeReduce(params[0], reductionAxesNode, keepDims, reductionType); auto makeFunction = [](ParameterVector ¶ms, const std::shared_ptr &lastNode) { ResultVector results; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/roi_pooling.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/roi_pooling.cpp index 34ac60f2752c08..5add9f1cc1dbca 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/roi_pooling.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/roi_pooling.cpp @@ -183,13 +183,10 @@ class ROIPoolingLayerGPUTest : public testing::WithParamInterface(ngPrc, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], paramOuts[1], poolShape, spatial_scale, pool_method); + auto roi_pooling = ngraph::builder::makeROIPooling(params[0], params[1], poolShape, spatial_scale, pool_method); ngraph::ResultVector results; for (size_t i = 0; i < roi_pooling->get_output_size(); i++) diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/shapeof.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/shapeof.cpp index c1217d68744640..d231567a6a33fc 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/shapeof.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/shapeof.cpp @@ -54,11 +54,10 @@ class ShapeOfLayerGPUTest : public testing::WithParamInterface(netPrecision, shape)); - } - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(functionParams)); - auto shapeOfOp = std::make_shared(paramOuts[0], element::i32); + + auto shapeOfOp = std::make_shared(functionParams[0], element::i32); auto makeFunction = [](ParameterVector ¶ms, const std::shared_ptr &lastNode) { ResultVector results; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/softmax.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/softmax.cpp index a8cc19ea0cbd89..5de070d5fab5dd 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/softmax.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/softmax.cpp @@ -53,14 +53,10 @@ class SoftMaxLayerGPUTest : public testing::WithParamInterface(inType, shape)); - } - - const auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - const auto softMax = std::make_shared(paramOuts.at(0), axis); + const auto softMax = std::make_shared(params.at(0), axis); auto makeFunction = [](ParameterVector ¶ms, const std::shared_ptr &lastNode) { ResultVector results; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp index 472960b7574d71..d922da0efc3671 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/split.cpp @@ -66,10 +66,8 @@ class SplitLayerGPUDynamicTest : public testing::WithParamInterface(netPrecision, inputDynamicShapes[0])}; - auto paramOuts = - ngraph::helpers::convert2OutputVector(helpers::castOps2Nodes(dyn_params)); auto split = std::dynamic_pointer_cast( - ngraph::builder::makeSplit(paramOuts[0], netPrecision, numSplits, axis)); + ngraph::builder::makeSplit(dyn_params[0], netPrecision, numSplits, axis)); ngraph::ResultVector results; for (size_t i = 0; i < outIndices.size(); i++) { results.push_back(std::make_shared(split->output(outIndices[i]))); @@ -205,7 +203,6 @@ class VariadicSplitLayerGPUDynamicTest : public testing::WithParamInterface(netPrecision, inputDynamicShapes[0])}; - auto paramOuts = ngraph::helpers::convert2OutputVector(helpers::castOps2Nodes(dyn_params)); auto splitAxisOp = std::make_shared(ngraph::element::i64, ngraph::Shape{}, std::vector{static_cast(axis)}); @@ -218,7 +215,7 @@ class VariadicSplitLayerGPUDynamicTest : public testing::WithParamInterface(ngraph::element::Type_t::i64, ngraph::Shape{splitLength.size()}, splitLength); } - auto varSplit = std::make_shared(paramOuts[0], splitAxisOp, splitLengthOp); + auto varSplit = std::make_shared(dyn_params[0], splitAxisOp, splitLengthOp); ngraph::ResultVector results; for (size_t i = 0; i < splitLength.size(); i++) { results.push_back(std::make_shared(varSplit->output(i))); diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/tile.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/tile.cpp index 3f3e0734eb2905..2c1268d76db7aa 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/tile.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/tile.cpp @@ -93,13 +93,12 @@ class TileLayerGPUTest : public testing::WithParamInterfaceset_friendly_name("data"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); std::shared_ptr tileNode; if (isRepeatsConst) { - tileNode = std::make_shared(paramOuts[0], + tileNode = std::make_shared(functionParams[0], ov::op::v0::Constant::create(ov::element::i64, { repeatsData.size() }, repeatsData)); } else { - tileNode = std::make_shared(paramOuts[0], paramOuts[1]); + tileNode = std::make_shared(functionParams[0], functionParams[1]); } ngraph::ResultVector results; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/unique.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/unique.cpp index a8bf9bc51735b6..9e60d64fb7d5f5 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/unique.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/unique.cpp @@ -74,15 +74,13 @@ class UniqueLayerDynamicGPUTest : public testing::WithParamInterface(dataPrecision, shape)); } params[0]->set_friendly_name("data"); - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::shared_ptr uniqueNode; if (flattened) { - uniqueNode = std::make_shared(paramOuts[0], sorted); + uniqueNode = std::make_shared(params[0], sorted); } else { axis = std::get<1>(flatOrAxis); uniqueNode = std::make_shared( - paramOuts[0], + params[0], ov::op::v0::Constant::create(ov::element::i64, ov::Shape({1}), {axis}), sorted); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_model_static_split_layer.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_model_static_split_layer.cpp index 0f38bf1787bd2c..658102694142a9 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_model_static_split_layer.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_model_static_split_layer.cpp @@ -79,10 +79,8 @@ class DynamicModelStaticSplitLayerGPUTest : public testing::WithParamInterface(netType, shape)); - } - auto paramOuts = helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); axis->set_friendly_name("axis"); @@ -90,10 +88,10 @@ class DynamicModelStaticSplitLayerGPUTest : public testing::WithParamInterfaceset_friendly_name("split_sizes"); - auto variadicSplitOp = std::make_shared(paramOuts[0], axis, split_sizes); + auto variadicSplitOp = std::make_shared(params[0], axis, split_sizes); variadicSplitOp->set_friendly_name("variadicSplit"); - auto addOp = ngraph::builder::makeEltwise(paramOuts[1], variadicSplitOp->output(1), ngraph::helpers::EltwiseTypes::ADD); + auto addOp = ngraph::builder::makeEltwise(params[1], variadicSplitOp->output(1), ngraph::helpers::EltwiseTypes::ADD); addOp->set_friendly_name("add"); ngraph::ResultVector results = {std::make_shared(addOp)}; diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_gen_impl_key.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_gen_impl_key.cpp index 06aa4a11817e75..c365f1cd888205 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_gen_impl_key.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_gen_impl_key.cpp @@ -84,12 +84,10 @@ class GenlImplKeyDynamicGPUTest : public testing::WithParamInterface(netType, shape)); - } - auto paramOuts = helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto addOp1 = ngraph::builder::makeEltwise(paramOuts[1], paramOuts[1], ngraph::helpers::EltwiseTypes::ADD); + auto addOp1 = ngraph::builder::makeEltwise(params[1], params[1], ngraph::helpers::EltwiseTypes::ADD); addOp1->set_friendly_name("add1"); auto shapeOfOp1 = std::make_shared(addOp1, ElementType::i64); @@ -110,7 +108,7 @@ class GenlImplKeyDynamicGPUTest : public testing::WithParamInterface(addOp1, concatOp1, false); reshapeOp1->set_friendly_name("reshapeOp1"); - auto addOp2 = ngraph::builder::makeEltwise(paramOuts[1], paramOuts[1], ngraph::helpers::EltwiseTypes::ADD); + auto addOp2 = ngraph::builder::makeEltwise(params[1], params[1], ngraph::helpers::EltwiseTypes::ADD); addOp2->set_friendly_name("add2"); auto shapeOfOp2 = std::make_shared(addOp2, ElementType::i64); diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_reduce_deconvolution_concat.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_reduce_deconvolution_concat.cpp index f09470c91a8442..5ddc7977e27539 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_reduce_deconvolution_concat.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_reduce_deconvolution_concat.cpp @@ -81,19 +81,17 @@ class ReduceDeconvConcatDynamicGPUTest : public testing::WithParamInterface(netType, shape)); - } - auto paramOuts = helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto deconvOp = ngraph::builder::makeConvolutionBackpropData(paramOuts[0], netType, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, + auto deconvOp = ngraph::builder::makeConvolutionBackpropData(params[0], netType, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, ov::op::PadType::EXPLICIT, 16); deconvOp->set_friendly_name("deconv"); std::vector reduce_axes = {5}; auto reduceAxesNode = std::dynamic_pointer_cast( std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape({1}), reduce_axes)); - auto reduceOp = ngraph::builder::makeReduce(paramOuts[1], reduceAxesNode, false, ngraph::helpers::ReductionType::Max); + auto reduceOp = ngraph::builder::makeReduce(params[1], reduceAxesNode, false, ngraph::helpers::ReductionType::Max); reduceOp->set_friendly_name("reduce"); auto concatOp = ngraph::builder::makeConcat({deconvOp, reduceOp}, 1); diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_activation.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_activation.cpp index 4967c716b14d6a..815a2954acb301 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_activation.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_activation.cpp @@ -92,15 +92,13 @@ class shapeofActivationDynamicGPUTest : public testing::WithParamInterface(netType, shape)); - } - auto paramOuts = helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector shape_pattern = {0, 1, -1, 0}; auto shapePatternsNode = std::dynamic_pointer_cast( std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape({4}), shape_pattern)); - auto reshapeOp = std::make_shared(paramOuts[0], shapePatternsNode, true); + auto reshapeOp = std::make_shared(params[0], shapePatternsNode, true); reshapeOp->set_friendly_name("reshape"); auto shapeOfOp = std::make_shared(reshapeOp, ElementType::i32); diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp index 107cb0f2bcd569..ce39dfc1225066 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp @@ -84,15 +84,13 @@ class ShapeOfReshapeReduceDynamicGPUTest : public testing::WithParamInterface(netType, shape)); - } - auto paramOuts = helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto addOp = ngraph::builder::makeEltwise(paramOuts[1], paramOuts[1], ngraph::helpers::EltwiseTypes::ADD); + auto addOp = ngraph::builder::makeEltwise(params[1], params[1], ngraph::helpers::EltwiseTypes::ADD); addOp->set_friendly_name("add"); - auto shapeOfOp1 = std::make_shared(paramOuts[0], ElementType::i64); + auto shapeOfOp1 = std::make_shared(params[0], ElementType::i64); shapeOfOp1->set_friendly_name("shapeof1"); std::vector reduce_axes = {0}; auto reduceAxesNode = std::dynamic_pointer_cast( diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_with_empty_tensor.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_with_empty_tensor.cpp index 448c629d1a5579..cd0bad894d3a74 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_with_empty_tensor.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_with_empty_tensor.cpp @@ -91,19 +91,17 @@ class EmptyTensorDynamicGPUTest : public testing::WithParamInterface(netType, shape)); - } - auto paramOuts = - helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + const ElementType intInputsPrecision = ElementType::i32; - auto nonzeroEmptyResultOp = std::make_shared(paramOuts[0]); + auto nonzeroEmptyResultOp = std::make_shared(params[0]); auto convertEmptyInputOp = ngraph::builder::makeConversion(nonzeroEmptyResultOp, ElementType::i32, ngraph::helpers::ConversionTypes::CONVERT); auto concatPartialInputEmptyOp = - ngraph::builder::makeConcat({convertEmptyInputOp, paramOuts[1], convertEmptyInputOp}, + ngraph::builder::makeConcat({convertEmptyInputOp, params[1], convertEmptyInputOp}, 1); // partially empty input / non empty output auto concatEmptyInputEmptyOutputOp = ngraph::builder::makeConcat({convertEmptyInputOp, convertEmptyInputOp, convertEmptyInputOp}, @@ -117,14 +115,14 @@ class EmptyTensorDynamicGPUTest : public testing::WithParamInterface(intInputsPrecision, ov::Shape({1}), {0}); auto gatherEmptyIndicesOp = - std::make_shared(paramOuts[0], squeezeEmptyInputOp, axisNode, 0); + std::make_shared(params[0], squeezeEmptyInputOp, axisNode, 0); auto shapeofEmptyInputOp = std::make_shared(gatherEmptyIndicesOp, ElementType::i32); ngraph::ResultVector results = {std::make_shared(shapeofEmptyInputOp), std::make_shared(concatPartialInputEmptyOp), std::make_shared(concatEmptyInputEmptyOutputOp)}; function = std::make_shared(results, params, "result"); - auto nonzero = std::make_shared(paramOuts[0]); + auto nonzero = std::make_shared(params[0]); } }; diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp index 05328c6b5fb8d0..c57017666bd463 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp @@ -1604,11 +1604,8 @@ std::shared_ptr generateMultiSubGraph(const std::shared_ptr generate(const std::shared_ptr &node) { ov::ParameterVector params{std::make_shared(ov::element::f32, ov::Shape{{1, 2, 4}}), std::make_shared(ov::element::f32, ov::Shape{{1, 2, 2}})}; - - const auto outputs = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); const auto nms = - std::make_shared(outputs[0], outputs[1], ov::op::v8::MatrixNms::Attributes()); + std::make_shared(params[0], params[1], ov::op::v8::MatrixNms::Attributes()); ov::ResultVector results{std::make_shared(nms)}; return std::make_shared(results, params, "MatrixNms"); } @@ -1616,14 +1613,12 @@ std::shared_ptr generate(const std::shared_ptr std::shared_ptr generateMulticlassNmsBase(const std::shared_ptr &node) { ov::ParameterVector params{std::make_shared(ov::element::f32, ov::Shape{{1, 2, 4}}), std::make_shared(ov::element::f32, ov::Shape{{1, 2, 2}})}; - const auto outputs = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); if (ov::is_type(node)) { - const auto nms = std::make_shared(outputs[0], outputs[1], ov::op::v8::MulticlassNms::Attributes()); + const auto nms = std::make_shared(params[0], params[1], ov::op::v8::MulticlassNms::Attributes()); ov::ResultVector results{std::make_shared(nms)}; return std::make_shared(results, params, "MulticlassNms"); } else if (ov::is_type(node)) { - const auto nms = std::make_shared(outputs[0], outputs[1], ov::op::v9::MulticlassNms::Attributes()); + const auto nms = std::make_shared(params[0], params[1], ov::op::v9::MulticlassNms::Attributes()); ov::ResultVector results{std::make_shared(nms)}; return std::make_shared(results, params, "MulticlassNms"); } else { @@ -1808,8 +1803,6 @@ std::shared_ptr generate(const std::shared_ptr(ov::element::f32, ov::Shape{{2, 2, 3, 4}}), std::make_shared(ov::element::f32, ov::Shape{{1, 12, 2, 2}}), std::make_shared(ov::element::f32, ov::Shape{{1, 3, 2, 2}})}; - const auto outputs = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); ov::op::v9::GenerateProposals::Attributes attrs; attrs.min_size = 1; attrs.nms_threshold = 0.8; @@ -1817,7 +1810,7 @@ std::shared_ptr generate(const std::shared_ptr(node)) { const auto gp = std::make_shared( - outputs[0], outputs[1], outputs[2], outputs[3], attrs); + params[0], params[1], params[2], params[3], attrs); ov::ResultVector results{std::make_shared(gp)}; return std::make_shared(results, params, "GenerateProposalsGraph"); } else { diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/preprocessing.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/preprocessing.hpp index 4d82286103e055..aaf35c28aaebbc 100644 --- a/src/tests/functional/plugin/shared/include/behavior/plugin/preprocessing.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/plugin/preprocessing.hpp @@ -87,15 +87,14 @@ struct PreprocessingPrecisionConvertTest : auto make_ngraph = [&](bool with_extra_conv) { auto in_prec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(with_extra_conv ? inPrc : decltype(inPrc)(InferenceEngine::Precision::FP32)); ov::ParameterVector paramsIn {std::make_shared(in_prec, ov::Shape(inputShape))}; - auto paramIn = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(paramsIn)); - auto toF32 = std::make_shared(paramIn[0], ngraph::element::Type_t::f32); + auto toF32 = std::make_shared(paramsIn[0], ngraph::element::Type_t::f32); auto constNode = std::make_shared( ngraph::element::Type_t::i64, ngraph::Shape{inputShape.size()}, inputShape); + std::shared_ptr reshape_input = with_extra_conv ? toF32->shared_from_this() : paramsIn[0]; auto reshape = std::dynamic_pointer_cast( - std::make_shared(with_extra_conv ? toF32 : paramIn[0], constNode, specialZero)); + std::make_shared(reshape_input, constNode, specialZero)); ngraph::ResultVector results{std::make_shared(reshape)}; return std::make_shared(results, paramsIn, "Reshape"); }; diff --git a/src/tests/functional/plugin/shared/src/behavior/infer_request/set_io_blob_precision.cpp b/src/tests/functional/plugin/shared/src/behavior/infer_request/set_io_blob_precision.cpp index 212bfded16af0d..b938ecea564da4 100644 --- a/src/tests/functional/plugin/shared/src/behavior/infer_request/set_io_blob_precision.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/infer_request/set_io_blob_precision.cpp @@ -105,9 +105,8 @@ void SetBlobTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(precNg); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(IS))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto axisNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector{-1})->output(0); - auto cumSum = std::dynamic_pointer_cast(ngraph::builder::makeCumSum(paramOuts[0], axisNode, false, false)); + auto cumSum = std::dynamic_pointer_cast(ngraph::builder::makeCumSum(params[0], axisNode, false, false)); ngraph::ResultVector results{std::make_shared(cumSum)}; function = std::make_shared(results, params, "InferSetBlob"); } diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/invalid_cases/proposal.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/invalid_cases/proposal.cpp index ac697f1f1ac2ff..19e014e40906cb 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/invalid_cases/proposal.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/invalid_cases/proposal.cpp @@ -73,10 +73,9 @@ void ProposalBehTest::SetUp() { std::make_shared(ngPrc, ov::Shape(boxesShape))}; params[0]->set_friendly_name("scores"); params[1]->set_friendly_name("boxes"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto proposal = std::dynamic_pointer_cast( - ngraph::builder::makeProposal(paramOuts[0], paramOuts[1], img_info, ngPrc, + ngraph::builder::makeProposal(params[0], params[1], img_info, ngPrc, base_size, pre_nms_topn, post_nms_topn, diff --git a/src/tests/functional/shared_test_classes/src/subgraph/clamp_fq.cpp b/src/tests/functional/shared_test_classes/src/subgraph/clamp_fq.cpp index af9652a4c388d7..b8dfc25f12f1fe 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/clamp_fq.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/clamp_fq.cpp @@ -62,9 +62,8 @@ namespace SubgraphTestsDefinitions { } auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto clamp = std::make_shared(paramOuts[0], clamp_min_max[0], clamp_min_max[1]); + auto clamp = std::make_shared(params[0], clamp_min_max[0], clamp_min_max[1]); auto FQNode = ngraph::builder::makeFakeQuantize(clamp, ngraph::element::f32, levels[0], constShape[0], { inputDataMin }, { inputDataMax }, { inputDataMin }, { inputDataMax }); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/convolution_relu_sequence.cpp b/src/tests/functional/shared_test_classes/src/subgraph/convolution_relu_sequence.cpp index c0e7fb730f35a6..b3f0c42713c717 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/convolution_relu_sequence.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/convolution_relu_sequence.cpp @@ -49,7 +49,7 @@ void ConvolutionReluSequenceTest::SetUp() { configuration.insert(config.begin(), config.end()); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(convParamsAll.inputShape))}; - auto lastOutputs = ngraph::helpers::castOps2Nodes(params).front(); + std::shared_ptr lastOutputs = params.front(); auto inputChannels = convParamsAll.inputShape[1]; for (auto&& single : convParamsAll.sequenceDesc) { diff --git a/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp b/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp index dfc1dcdb5f7fd5..02dbd4ef4650a3 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/multiply_add.cpp @@ -28,14 +28,12 @@ void MultiplyAddLayerTest::SetUp() { ov::element::Type element_type; std::tie(inputShape, element_type, targetDevice) = this->GetParam(); ov::ParameterVector params{std::make_shared(element_type, ov::PartialShape(inputShape))}; - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector constShape(inputShape.size(), 1); constShape[1] = inputShape[1]; auto const_mul = ngraph::builder::makeConstant(element_type, constShape, {}, true); - auto mul = std::make_shared(paramOuts[0], const_mul); + auto mul = std::make_shared(params[0], const_mul); auto const_add = ngraph::builder::makeConstant(element_type, constShape, {}, true); auto add = std::make_shared(mul, const_add); ov::ResultVector results{std::make_shared(add)}; diff --git a/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp b/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp index 9ff6272b9ab529..78d572cafd1066 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/mvn_multiply_add.cpp @@ -46,10 +46,8 @@ void MVNMultiplyAdd::SetUp() { std::tie(inputShapes, constantShapes) = shapes; ov::ParameterVector param{std::make_shared(dataType, ov::Shape(inputShapes))}; - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); auto axesNode = ngraph::builder::makeConstant(axesType, ov::Shape{axes.size()}, axes); - auto mvn = ngraph::builder::makeMVN6(paramOuts[0], axesNode, normalizeVariance, eps, epsMode); + auto mvn = ngraph::builder::makeMVN6(param[0], axesNode, normalizeVariance, eps, epsMode); auto gamma = ngraph::builder::makeConstant(dataType, constantShapes, {}, true); auto mul = std::make_shared(mvn, gamma); auto beta = ngraph::builder::makeConstant(dataType, constantShapes, {}, true); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp index 9f872e524d1b68..e9fd7e7f837093 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp @@ -53,12 +53,11 @@ void QuantConvBackpropDataLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector dataFqConstShapes(inputShape.size(), 1); if (quantGranularity == ngraph::helpers::Perchannel) dataFqConstShapes[1] = inputShape[1]; - auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes); + auto dataFq = ngraph::builder::makeFakeQuantize(params[0], ngPrc, quantLevels, dataFqConstShapes); std::vector weightsShapes = {inputShape[1], convOutChannels}; weightsShapes.insert(weightsShapes.end(), kernel.begin(), kernel.end()); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp index 3ad85f760d20c0..1f9a505e83f689 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp @@ -58,12 +58,11 @@ void QuantGroupConvLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, quantLevels, quantGranularity, quantizeWeights) = groupConvParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector dataFqConstShapes(inputShape.size(), 1); if (quantGranularity == ngraph::helpers::Perchannel) dataFqConstShapes[1] = inputShape[1]; - auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes); + auto dataFq = ngraph::builder::makeFakeQuantize(params[0], ngPrc, quantLevels, dataFqConstShapes); std::vector weightsShapes = {convOutChannels, inputShape[1]}; if (weightsShapes[0] % numGroups || weightsShapes[1] % numGroups) diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp index 9e028c745b75f0..42022f1ec483a2 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp @@ -54,12 +54,11 @@ void QuantGroupConvBackpropDataLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector dataFqConstShapes(inputShape.size(), 1); if (quantGranularity == ngraph::helpers::Perchannel) dataFqConstShapes[1] = inputShape[1]; - auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes); + auto dataFq = ngraph::builder::makeFakeQuantize(params[0], ngPrc, quantLevels, dataFqConstShapes); std::vector weightsShapes = {inputShape[1], convOutChannels}; if (weightsShapes[0] % numGroups || weightsShapes[1] % numGroups) diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp index d4dae9e9e86f97..77413c4f8abe13 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp @@ -73,8 +73,6 @@ void QuantMatMulTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShape0)), std::make_shared(ngPrc, ov::Shape(inputShape1))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto makeFakeQuantizeNode = [ngPrc](size_t quantLevels, QuantRange inputRange, QuantRange outputRange, QuantizationGranularity quantGranularity, const ngraph::Output &in, std::vector inputShape, @@ -93,8 +91,8 @@ void QuantMatMulTest::SetUp() { return ngraph::builder::makeFakeQuantize(in, ngPrc, quantLevels, dataFqConstShapes, inputLowData, inputHighData, outputLowData, outputHighData); }; - auto dataFq0 = makeFakeQuantizeNode(quantLevels0, inputRange0, outputRange0, quantGranularity0, paramOuts[0], inputShape0, fqPrec0); - auto dataFq1 = makeFakeQuantizeNode(quantLevels1, inputRange1, outputRange1, quantGranularity1, paramOuts[1], inputShape1, fqPrec1); + auto dataFq0 = makeFakeQuantizeNode(quantLevels0, inputRange0, outputRange0, quantGranularity0, params[0], inputShape0, fqPrec0); + auto dataFq1 = makeFakeQuantizeNode(quantLevels1, inputRange1, outputRange1, quantGranularity1, params[1], inputShape1, fqPrec1); auto MatMul = std::dynamic_pointer_cast( ngraph::builder::makeMatMul(dataFq0, dataFq1)); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp b/src/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp index 99066cc445665c..4c8dbd44e041b8 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp @@ -34,8 +34,6 @@ void ReduceEltwiseTest::SetUp() { std::tie(inputShape, axes, opType, keepDims, netPrecision, targetDevice) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); std::vector shapeAxes; switch (opType) { @@ -54,7 +52,7 @@ void ReduceEltwiseTest::SetUp() { auto reductionAxesNode = std::dynamic_pointer_cast( std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); - auto reduce = std::make_shared(paramOuts[0], reductionAxesNode, keepDims); + auto reduce = std::make_shared(params[0], reductionAxesNode, keepDims); std::vector constShape(reduce.get()->get_output_partial_shape(0).rank().get_length(), 1); ASSERT_GT(constShape.size(), 2); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/strided_slice.cpp b/src/tests/functional/shared_test_classes/src/subgraph/strided_slice.cpp index 51593c0adff3ad..fb2050287e6050 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/strided_slice.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/strided_slice.cpp @@ -47,9 +47,7 @@ void StridedSliceTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(ssParams.inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto relu = std::make_shared(paramOuts[0]); + auto relu = std::make_shared(params[0]); auto ss = ngraph::builder::makeStridedSlice(relu, ssParams.begin, ssParams.end, ssParams.strides, ngPrc, ssParams.beginMask, ssParams.endMask, ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask); ngraph::ResultVector results{std::make_shared(ss)}; diff --git a/src/tests/functional/shared_test_classes/src/subgraph/two_fake_quantize_to_fullyconnected.cpp b/src/tests/functional/shared_test_classes/src/subgraph/two_fake_quantize_to_fullyconnected.cpp index 80d23fd4e71633..7fa90952ec1dd5 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/two_fake_quantize_to_fullyconnected.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/two_fake_quantize_to_fullyconnected.cpp @@ -65,7 +65,6 @@ void FakeQuantizeSubgraphTest::SetUp() { } auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); const int seed = 0; std::mt19937 gen(seed); @@ -114,7 +113,7 @@ void FakeQuantizeSubgraphTest::SetUp() { auto lowNode = ngraph::builder::makeConstant(ngraph::element::f32, channelDataSize, inputMinRange, false); auto highNode = ngraph::builder::makeConstant(ngraph::element::f32, channelDataSize, inputMaxRange, false); - auto inputFQNode = ngraph::builder::makeFakeQuantize(paramOuts[0], ngraph::element::f32, levels[0], constShape[0], + auto inputFQNode = ngraph::builder::makeFakeQuantize(params[0], ngraph::element::f32, levels[0], constShape[0], { inputDataMin }, { inputDataMax }, { inputDataMin }, { inputDataMax }); auto weightsFQNode = std::make_shared(const_param, From 0effa378114e30015157832677691e1046312b16 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Fri, 3 Nov 2023 09:10:32 +0100 Subject: [PATCH 186/275] [core] Migrate HSigmoid operator to new API (#20836) * Drop ngraph remains * Use ov::Tensor instaed of ngraph::HostTensor --- src/core/include/openvino/op/hsigmoid.hpp | 6 +- src/core/src/op/hsigmoid.cpp | 87 ++++++++++------------- 2 files changed, 38 insertions(+), 55 deletions(-) diff --git a/src/core/include/openvino/op/hsigmoid.hpp b/src/core/include/openvino/op/hsigmoid.hpp index 71b07ada902617..2c5503b488e0dc 100644 --- a/src/core/include/openvino/op/hsigmoid.hpp +++ b/src/core/include/openvino/op/hsigmoid.hpp @@ -25,12 +25,8 @@ class OPENVINO_API HSigmoid : public util::UnaryElementwiseArithmetic { /// \param data Input tensor HSigmoid(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; - std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v5 diff --git a/src/core/src/op/hsigmoid.cpp b/src/core/src/op/hsigmoid.cpp index 19fb55de3c5fc8..2abc4c02c5da9d 100644 --- a/src/core/src/op/hsigmoid.cpp +++ b/src/core/src/op/hsigmoid.cpp @@ -2,77 +2,64 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/hsigmoid.hpp" - -#include +#include "openvino/op/hsigmoid.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/hsigmoid.hpp" -using namespace std; -using namespace ngraph; - -op::v5::HSigmoid::HSigmoid(const Output& arg) : UnaryElementwiseArithmetic(arg) { +namespace ov { +namespace op { +namespace v5 { +HSigmoid::HSigmoid(const Output& arg) : UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -bool op::v5::HSigmoid::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v5_HSigmoid_visit_attributes); - return true; -} - -shared_ptr op::v5::HSigmoid::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr HSigmoid::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v5_HSigmoid_clone_with_new_inputs); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -OPENVINO_SUPPRESS_DEPRECATED_START +namespace hsigmoid { namespace { -template -inline bool evaluate(const HostTensorPtr& arg, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - - ov::reference::hsigmoid(arg->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_hsigmoid(const HostTensorPtr& arg, const HostTensorPtr& out) { - bool rc = true; - size_t count = shape_size(arg->get_shape()); - out->set_unary(arg); - - switch (arg->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_hsigmoid, bf16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_hsigmoid, f16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_hsigmoid, f32, arg, out, count); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in, Tensor& out, const size_t count) { + ov::reference::hsigmoid(in.data(), out.data(), count); + return true; } - return rc; -} +}; } // namespace +} // namespace hsigmoid -bool op::v5::HSigmoid::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool HSigmoid::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v5_HSigmoid_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return evaluate_hsigmoid(inputs[0], outputs[0]); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); + + const auto& input_shape = inputs[0].get_shape(); + const auto count = shape_size(input_shape); + outputs[0].set_shape(input_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + count); } -bool op::v5::HSigmoid::has_evaluate() const { +bool HSigmoid::has_evaluate() const { OV_OP_SCOPE(v5_HSigmoid_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::bf16: - case ngraph::element::f16: - case ngraph::element::f32: + case element::bf16: + case element::f16: + case element::f32: return true; default: - break; + return false; } - return false; } +} // namespace v5 +} // namespace op +} // namespace ov From c20d52dc4fe9c5300baf1a6afcef8fc9e04cdef7 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 3 Nov 2023 08:51:22 +0000 Subject: [PATCH 187/275] Extend sync benchmark CLI parameters (#20844) --- .../openvino_samples/cpp_sample_sync_benchmark.md | 2 +- .../openvino_samples/python_sample_sync_benchmark.md | 4 ++-- samples/cpp/benchmark/sync_benchmark/main.cpp | 12 ++++++++---- .../benchmark/sync_benchmark/sync_benchmark.py | 11 +++++++---- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_sync_benchmark.md b/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_sync_benchmark.md index e4430f0331b091..0adb885e6fa54e 100644 --- a/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_sync_benchmark.md +++ b/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_sync_benchmark.md @@ -70,7 +70,7 @@ Running .. code-block:: sh - sync_benchmark + sync_benchmark (default: CPU) To run the sample, you need to specify a model: diff --git a/docs/articles_en/learn_openvino/openvino_samples/python_sample_sync_benchmark.md b/docs/articles_en/learn_openvino/openvino_samples/python_sample_sync_benchmark.md index ecd142cffca98e..fdbd0fddb2f741 100644 --- a/docs/articles_en/learn_openvino/openvino_samples/python_sample_sync_benchmark.md +++ b/docs/articles_en/learn_openvino/openvino_samples/python_sample_sync_benchmark.md @@ -67,7 +67,7 @@ Running .. code-block:: sh - python sync_benchmark.py + python sync_benchmark.py (default: CPU) To run the sample, you need to specify a model: @@ -138,4 +138,4 @@ See Also * :doc:`Model Downloader ` * :doc:`Convert a Model ` -@endsphinxdirective \ No newline at end of file +@endsphinxdirective diff --git a/samples/cpp/benchmark/sync_benchmark/main.cpp b/samples/cpp/benchmark/sync_benchmark/main.cpp index b2f09d9053d8ce..c0bb1656ceffaa 100644 --- a/samples/cpp/benchmark/sync_benchmark/main.cpp +++ b/samples/cpp/benchmark/sync_benchmark/main.cpp @@ -20,8 +20,12 @@ int main(int argc, char* argv[]) { try { slog::info << "OpenVINO:" << slog::endl; slog::info << ov::get_openvino_version(); - if (argc != 2) { - slog::info << "Usage : " << argv[0] << " " << slog::endl; + + std::string device_name = "CPU"; + if (argc == 3) { + device_name = argv[2]; + } else if (argc != 2) { + slog::info << "Usage : " << argv[0] << " (default: CPU)" << slog::endl; return EXIT_FAILURE; } // Optimize for latency. Most of the devices are configured for latency by default, @@ -29,11 +33,11 @@ int main(int argc, char* argv[]) { ov::AnyMap latency{{ov::hint::performance_mode.name(), ov::hint::PerformanceMode::LATENCY}}; // Create ov::Core and use it to compile a model. - // Pick a device by replacing CPU, for example AUTO:GPU,CPU. + // Select the device by providing the name as the second parameter to CLI. // Using MULTI device is pointless in sync scenario // because only one instance of ov::InferRequest is used ov::Core core; - ov::CompiledModel compiled_model = core.compile_model(argv[1], "CPU", latency); + ov::CompiledModel compiled_model = core.compile_model(argv[1], device_name, latency); ov::InferRequest ireq = compiled_model.create_infer_request(); // Fill input data for the ireq for (const ov::Output& model_input : compiled_model.inputs()) { diff --git a/samples/python/benchmark/sync_benchmark/sync_benchmark.py b/samples/python/benchmark/sync_benchmark/sync_benchmark.py index e270d25a64fbbd..6aed1a489e12f4 100755 --- a/samples/python/benchmark/sync_benchmark/sync_benchmark.py +++ b/samples/python/benchmark/sync_benchmark/sync_benchmark.py @@ -30,19 +30,22 @@ def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) log.info('OpenVINO:') log.info(f"{'Build ':.<39} {get_version()}") - if len(sys.argv) != 2: - log.info(f'Usage: {sys.argv[0]} ') + device_name = 'CPU' + if len(sys.argv) == 3: + device_name = sys.argv[2] + elif len(sys.argv) != 2: + log.info(f'Usage: {sys.argv[0]} (default: CPU)') return 1 # Optimize for latency. Most of the devices are configured for latency by default, # but there are exceptions like GNA latency = {'PERFORMANCE_HINT': 'LATENCY'} # Create Core and use it to compile a model. - # Pick a device by replacing CPU, for example AUTO:GPU,CPU. + # Select the device by providing the name as the second parameter to CLI. # Using MULTI device is pointless in sync scenario # because only one instance of openvino.runtime.InferRequest is used core = ov.Core() - compiled_model = core.compile_model(sys.argv[1], 'CPU', latency) + compiled_model = core.compile_model(sys.argv[1], device_name, latency) ireq = compiled_model.create_infer_request() # Fill input data for the ireq for model_input in compiled_model.inputs: From f890bf79304674761a43a3608febd2b84111f020 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 3 Nov 2023 08:57:04 +0000 Subject: [PATCH 188/275] Extend throughput benchmark with device CLI parameter (#20816) * Extend throughput benchmark CLI parameters * Added device name as the second CLI parameter with default CPU value * Update samples/cpp/benchmark/throughput_benchmark/main.cpp Co-authored-by: Zlobin Vladimir * Fix comments to review * Modified python version * Modified documentation * Fix comments to review * Fixed the comment * Modified python doc * Fixed device name handling in python version * Update main.cpp * Update throughput_benchmark.py --------- Co-authored-by: Zlobin Vladimir --- .../cpp_sample_throughput_benchmark.md | 2 +- .../python_sample_throughput_benchmark.md | 2 +- samples/cpp/benchmark/throughput_benchmark/main.cpp | 12 ++++++++---- .../throughput_benchmark/throughput_benchmark.py | 11 +++++++---- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_throughput_benchmark.md b/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_throughput_benchmark.md index 0f4fb60cf8b6ae..582a2f0038eed7 100644 --- a/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_throughput_benchmark.md +++ b/docs/articles_en/learn_openvino/openvino_samples/cpp_sample_throughput_benchmark.md @@ -75,7 +75,7 @@ Running .. code-block:: sh - throughput_benchmark + throughput_benchmark (default: CPU) To run the sample, you need to specify a model: diff --git a/docs/articles_en/learn_openvino/openvino_samples/python_sample_throughput_benchmark.md b/docs/articles_en/learn_openvino/openvino_samples/python_sample_throughput_benchmark.md index a9b9bc86598b9f..1cc9c02501045e 100644 --- a/docs/articles_en/learn_openvino/openvino_samples/python_sample_throughput_benchmark.md +++ b/docs/articles_en/learn_openvino/openvino_samples/python_sample_throughput_benchmark.md @@ -72,7 +72,7 @@ Running .. code-block:: sh - python throughput_benchmark.py + python throughput_benchmark.py (default: CPU) To run the sample, you need to specify a model: diff --git a/samples/cpp/benchmark/throughput_benchmark/main.cpp b/samples/cpp/benchmark/throughput_benchmark/main.cpp index 885bd27713b2d8..4961fdacceead6 100644 --- a/samples/cpp/benchmark/throughput_benchmark/main.cpp +++ b/samples/cpp/benchmark/throughput_benchmark/main.cpp @@ -22,8 +22,12 @@ int main(int argc, char* argv[]) { try { slog::info << "OpenVINO:" << slog::endl; slog::info << ov::get_openvino_version(); - if (argc != 2) { - slog::info << "Usage : " << argv[0] << " " << slog::endl; + + std::string device_name = "CPU"; + if (argc == 3) { + device_name = argv[2]; + } else if (argc != 2) { + slog::info << "Usage : " << argv[0] << " (default: CPU)" << slog::endl; return EXIT_FAILURE; } // Optimize for throughput. Best throughput can be reached by @@ -31,10 +35,10 @@ int main(int argc, char* argv[]) { ov::AnyMap tput{{ov::hint::performance_mode.name(), ov::hint::PerformanceMode::THROUGHPUT}}; // Create ov::Core and use it to compile a model. - // Pick a device by replacing CPU, for example MULTI:CPU(4),GPU(8). + // Select the device by providing the name as the second parameter to CLI. // It is possible to set CUMULATIVE_THROUGHPUT as ov::hint::PerformanceMode for AUTO device ov::Core core; - ov::CompiledModel compiled_model = core.compile_model(argv[1], "CPU", tput); + ov::CompiledModel compiled_model = core.compile_model(argv[1], device_name, tput); // Create optimal number of ov::InferRequest instances uint32_t nireq = compiled_model.get_property(ov::optimal_number_of_infer_requests); std::vector ireqs(nireq); diff --git a/samples/python/benchmark/throughput_benchmark/throughput_benchmark.py b/samples/python/benchmark/throughput_benchmark/throughput_benchmark.py index c934a7650172e6..0573642fb2fdaa 100755 --- a/samples/python/benchmark/throughput_benchmark/throughput_benchmark.py +++ b/samples/python/benchmark/throughput_benchmark/throughput_benchmark.py @@ -30,18 +30,21 @@ def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) log.info('OpenVINO:') log.info(f"{'Build ':.<39} {get_version()}") - if len(sys.argv) != 2: - log.info(f'Usage: {sys.argv[0]} ') + device_name = 'CPU' + if len(sys.argv) == 3: + device_name = sys.argv[2] + elif len(sys.argv) != 2: + log.info(f'Usage: {sys.argv[0]} (default: CPU)') return 1 # Optimize for throughput. Best throughput can be reached by # running multiple openvino.runtime.InferRequest instances asyncronously tput = {'PERFORMANCE_HINT': 'THROUGHPUT'} # Create Core and use it to compile a model. - # Pick a device by replacing CPU, for example MULTI:CPU(4),GPU(8). + # Select the device by providing the name as the second parameter to CLI. # It is possible to set CUMULATIVE_THROUGHPUT as PERFORMANCE_HINT for AUTO device core = ov.Core() - compiled_model = core.compile_model(sys.argv[1], 'CPU', tput) + compiled_model = core.compile_model(sys.argv[1], device_name, tput) # AsyncInferQueue creates optimal number of InferRequest instances ireqs = ov.AsyncInferQueue(compiled_model) # Fill input data for ireqs From 3386b85c08d8f407dec47e518884d5010d2966a0 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Fri, 3 Nov 2023 10:35:43 +0100 Subject: [PATCH 189/275] [core] Migrate Divide operator to new API (#20766) * Use ov:: namespace * Drop HostTensor * Use ov::util::make_tensor_of_max_value instead of ngraph::get_constant_max_of_type * Use ov::util::make_tensor_of_min_value instead of ngraph::get_constant_min_of_type * Refactor get_constant_min_of_type --- src/core/dev_api/validation_util.hpp | 5 + src/core/include/openvino/op/divide.hpp | 4 +- src/core/src/op/divide.cpp | 257 +++++++++++------------- src/core/src/validation_util.cpp | 70 ++++--- 4 files changed, 171 insertions(+), 165 deletions(-) diff --git a/src/core/dev_api/validation_util.hpp b/src/core/dev_api/validation_util.hpp index 2495fd1029959a..7e0ca7b8d52c16 100644 --- a/src/core/dev_api/validation_util.hpp +++ b/src/core/dev_api/validation_util.hpp @@ -58,6 +58,11 @@ OPENVINO_API std::shared_ptr get_constant_from_source(const Ou /// \return Tensor with maximum value. Tensor make_tensor_of_max_value(const element::Type_t et); +/// \brief Make scalar tensor which stores minimum value of ov::element::Type. +/// \param et Element type to get its minimum. +/// \return Tensor with minimum value. +Tensor make_tensor_of_min_value(const element::Type_t et); + /// \brief Apply auto padding to padding_above and padding_below inputs /// if all needed informations are known. /// diff --git a/src/core/include/openvino/op/divide.hpp b/src/core/include/openvino/op/divide.hpp index 4d83d0043f4a64..33e8ff0904346c 100644 --- a/src/core/include/openvino/op/divide.hpp +++ b/src/core/include/openvino/op/divide.hpp @@ -45,9 +45,7 @@ class OPENVINO_API Divide : public util::BinaryElementwiseArithmetic { } std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; diff --git a/src/core/src/op/divide.cpp b/src/core/src/op/divide.cpp index 03fa88dfbc8a31..c2a9020cb03654 100644 --- a/src/core/src/op/divide.cpp +++ b/src/core/src/op/divide.cpp @@ -2,88 +2,72 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/divide.hpp" - -#include +#include "openvino/op/divide.hpp" #include "bound_evaluate.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/and.hpp" -#include "ngraph/op/equal.hpp" -#include "ngraph/op/less.hpp" -#include "ngraph/op/not.hpp" -#include "ngraph/op/or.hpp" -#include "ngraph/op/select.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "openvino/core/shape_util.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/less.hpp" +#include "openvino/op/logical_and.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/select.hpp" #include "openvino/reference/divide.hpp" +#include "utils.hpp" +#include "validation_util.hpp" -using namespace std; -using namespace ngraph; - -OPENVINO_SUPPRESS_DEPRECATED_START +namespace ov { +namespace op { +namespace v1 { namespace divide { namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec, - bool pythondiv) { - ov::reference::divide(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), +using ov::op::v0::Constant; +using ov::op::v0::Parameter; + +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& in0, + const Tensor& in1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const op::AutoBroadcastSpec& broadcast_spec, + const bool pythondiv) { + reference::divide(in0.data(), + in1.data(), + out.data(), + shape0, + shape1, broadcast_spec, pythondiv); - return true; -} - -bool evaluate_divide(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec, - bool pythondiv) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_divide, i32, arg0, arg1, out, broadcast_spec, pythondiv); - OPENVINO_TYPE_CASE(evaluate_divide, i64, arg0, arg1, out, broadcast_spec, pythondiv); - OPENVINO_TYPE_CASE(evaluate_divide, u32, arg0, arg1, out, broadcast_spec, pythondiv); - OPENVINO_TYPE_CASE(evaluate_divide, u64, arg0, arg1, out, broadcast_spec, pythondiv); - OPENVINO_TYPE_CASE(evaluate_divide, f16, arg0, arg1, out, broadcast_spec, pythondiv); - OPENVINO_TYPE_CASE(evaluate_divide, f32, arg0, arg1, out, broadcast_spec, pythondiv); - OPENVINO_TYPE_CASE(evaluate_divide, bf16, arg0, arg1, out, broadcast_spec, pythondiv); - default: - rc = false; - break; + return true; } - return rc; -} +}; -ov::Tensor equality_mask(const ov::Tensor& tensor, const shared_ptr& constant) { - auto mask_out = ov::TensorVector{{element::boolean, tensor.get_shape()}}; +Tensor equality_mask(const Tensor& lhs, const Tensor& rhs) { + auto mask_out = TensorVector{{element::boolean, lhs.get_shape()}}; - auto c_tensor = ov::Tensor(constant->get_element_type(), constant->get_shape()); - memcpy(c_tensor.data(), constant->get_data_ptr(), c_tensor.get_byte_size()); - - const auto& param = std::make_shared(tensor.get_element_type(), tensor.get_shape()); - op::v1::Equal(param, constant).evaluate(mask_out, ov::TensorVector{tensor, c_tensor}); + const auto lhs_node = std::make_shared(lhs.get_element_type(), lhs.get_shape()); + const auto rhs_node = std::make_shared(rhs.get_element_type(), rhs.get_shape()); + Equal(lhs_node, rhs_node).evaluate(mask_out, TensorVector{lhs, rhs}); return mask_out.front(); } -ov::Tensor or_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) { - auto logical_or = op::v1::LogicalOr(std::make_shared(lhs.get_element_type(), lhs.get_shape()), - std::make_shared(rhs.get_element_type(), rhs.get_shape()), - ngraph::op::AutoBroadcastType::NUMPY); +Tensor or_tensor(const Tensor& lhs, const Tensor& rhs) { + auto logical_or = LogicalOr(std::make_shared(lhs.get_element_type(), lhs.get_shape()), + std::make_shared(rhs.get_element_type(), rhs.get_shape()), + AutoBroadcastType::NUMPY); - auto outs = ov::TensorVector{{lhs.get_element_type(), logical_or.get_output_shape(0)}}; - logical_or.evaluate(outs, ov::TensorVector{lhs, rhs}); + auto outs = TensorVector{{lhs.get_element_type(), logical_or.get_output_shape(0)}}; + logical_or.evaluate(outs, TensorVector{lhs, rhs}); return outs.front(); } -bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_upper) { +bool evaluate_bound(const Node* node, TensorVector& output_values, bool is_upper) { // for positive arg2 divide will have limits [low/up , up/low] // for negative arg2 limits for divide will be [up/low, low/up] // for arg2 range with both positive and negative values, divide can give any result [-inf, inf] @@ -96,109 +80,102 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u OPENVINO_ASSERT(PartialShape::broadcast_merge_into(input_shape, input2.get_partial_shape(), node->get_autob()), "Argument shapes in divide operation are inconsistent."); - auto input1_low = ov::evaluate_lower_bound(input1); + const auto input1_low = evaluate_lower_bound(input1); if (!input1_low) return false; - auto input1_up = ov::evaluate_upper_bound(input1); + const auto input1_up = evaluate_upper_bound(input1); if (!input1_up) return false; - auto input2_low = ov::evaluate_lower_bound(input2); + const auto input2_low = evaluate_lower_bound(input2); if (!input2_low) return false; - auto input2_up = ov::evaluate_upper_bound(input2); + const auto input2_up = evaluate_upper_bound(input2); if (!input2_up) return false; - auto zeros_const = op::Constant::create(input2.get_element_type(), {}, {0}); - const auto zero_t = ov::Tensor(input2.get_element_type(), Shape{}); + const auto zeros_const = Constant::create(input2.get_element_type(), {}, {0}); + const auto zero_t = Tensor(input2.get_element_type(), Shape{}); memcpy(zero_t.data(), zeros_const->get_data_ptr(), zero_t.get_byte_size()); - OPENVINO_SUPPRESS_DEPRECATED_START - auto max_constant = get_constant_max_of_type(input2.get_element_type()); - auto dynamic_mask = or_tensor(equality_mask(input1_up, max_constant), equality_mask(input2_up, max_constant)); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto max_value = ov::util::make_tensor_of_max_value(input2.get_element_type()); + const auto dynamic_mask = or_tensor(equality_mask(input1_up, max_value), equality_mask(input2_up, max_value)); // mask to find out positive values for arg2 - auto less_up_outputs = ov::TensorVector{{element::boolean, input2.get_shape()}}; + auto less_up_outputs = TensorVector{{element::boolean, input2.get_shape()}}; auto& input2_positive_up_mask = less_up_outputs.front(); - bool status = op::v1::Less().evaluate(less_up_outputs, ov::TensorVector{zero_t, input2_up}); + bool status = Less().evaluate(less_up_outputs, TensorVector{zero_t, input2_up}); if (!status) return status; // mask to find out negative values for arg2 - auto less_low_outputs = ov::TensorVector{{element::boolean, input2.get_shape()}}; + auto less_low_outputs = TensorVector{{element::boolean, input2.get_shape()}}; auto& input2_negative_low_mask = less_low_outputs.front(); - status = op::v1::Less().evaluate(less_low_outputs, {input2_low, zero_t}); + status = Less().evaluate(less_low_outputs, {input2_low, zero_t}); if (!status) return status; // mask to find out ranges around 0 for arg2 - auto logical_and_up_outputs = ov::TensorVector{{element::boolean, input2.get_shape()}}; + auto logical_and_up_outputs = TensorVector{{element::boolean, input2.get_shape()}}; auto& input2_low_negative_up_positive_mask = logical_and_up_outputs.front(); - status = op::v1::LogicalAnd().evaluate(logical_and_up_outputs, {input2_negative_low_mask, input2_positive_up_mask}); + status = LogicalAnd().evaluate(logical_and_up_outputs, {input2_negative_low_mask, input2_positive_up_mask}); if (!status) return status; - auto value1_outs = ov::TensorVector{{input1.get_element_type(), input_shape.get_shape()}}; + auto value1_outs = TensorVector{{input1.get_element_type(), input_shape.get_shape()}}; auto& value1 = value1_outs.front(); - auto value2_outs = ov::TensorVector{{input2.get_element_type(), input2.get_shape()}}; + auto value2_outs = TensorVector{{input2.get_element_type(), input2.get_shape()}}; auto& value2 = value2_outs.front(); if (!is_upper) { - status = op::v1::Select().evaluate(value1_outs, {input2_positive_up_mask, input1_low, input1_up}); + status = Select().evaluate(value1_outs, {input2_positive_up_mask, input1_low, input1_up}); if (!status) return status; - status = op::v1::Select().evaluate(value2_outs, {input2_positive_up_mask, input2_up, input2_low}); + status = Select().evaluate(value2_outs, {input2_positive_up_mask, input2_up, input2_low}); if (!status) return status; - status = node->evaluate(output_values, ov::TensorVector{value1, value2}); + status = node->evaluate(output_values, TensorVector{value1, value2}); if (!status) return status; // replace values where zeros inside range of second arg to maximum values - OPENVINO_SUPPRESS_DEPRECATED_START - auto output_minimum_value = get_constant_min_of_type(output_values[0].get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_END - if (output_minimum_value == nullptr) + const auto output_min_value = ov::util::make_tensor_of_min_value(output_values[0].get_element_type()); + if (!output_min_value) return false; - auto out_min_v = ov::Tensor(output_minimum_value->get_element_type(), output_minimum_value->get_shape()); - memcpy(out_min_v.data(), output_minimum_value->get_data_ptr(), out_min_v.get_byte_size()); - - status = op::v1::Select().evaluate(output_values, - {input2_low_negative_up_positive_mask, out_min_v, output_values[0]}); + status = Select().evaluate(output_values, + {input2_low_negative_up_positive_mask, output_min_value, output_values[0]}); if (!status) return status; - status = op::v1::Select().evaluate(output_values, {dynamic_mask, zero_t, output_values[0]}); + status = Select().evaluate(output_values, {dynamic_mask, zero_t, output_values[0]}); if (!status) return status; } else { - status = op::v1::Select().evaluate(value1_outs, {input2_positive_up_mask, input1_up, input1_low}); + status = Select().evaluate(value1_outs, {input2_positive_up_mask, input1_up, input1_low}); if (!status) return status; - status = op::v1::Select().evaluate(value2_outs, {input2_positive_up_mask, input2_low, input2_up}); + status = Select().evaluate(value2_outs, {input2_positive_up_mask, input2_low, input2_up}); if (!status) return status; // create mask where zeros in the second argument are placed - auto eq_zero_mask = ov::TensorVector{{element::boolean, input2.get_shape()}}; + auto eq_zero_mask = TensorVector{{element::boolean, input2.get_shape()}}; auto& input2_zeros_mask = eq_zero_mask.front(); - bool status = op::v1::Equal().evaluate(eq_zero_mask, {value2, zero_t}); + bool status = Equal().evaluate(eq_zero_mask, {value2, zero_t}); if (!status) return status; // replace zeros by 1 values to get result of divide for other values of arguments - auto ones = op::Constant::create(input2.get_element_type(), input2.get_shape(), {1}); - auto ones_t = ov::Tensor(ones->get_element_type(), ones->get_shape()); + const auto ones = Constant::create(input2.get_element_type(), input2.get_shape(), {1}); + const auto ones_t = Tensor(ones->get_element_type(), ones->get_shape()); memcpy(ones_t.data(), ones->get_data_ptr(), ones_t.get_byte_size()); - status = op::v1::Select().evaluate(value2_outs, {input2_zeros_mask, ones_t, value2}); + status = Select().evaluate(value2_outs, {input2_zeros_mask, ones_t, value2}); if (!status) return status; @@ -207,27 +184,22 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u return status; // replace values where zeros were found in the second argument to maximum values - OPENVINO_SUPPRESS_DEPRECATED_START - auto output_maximum_value = get_constant_max_of_type(output_values[0].get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_END - if (output_maximum_value == nullptr) + const auto out_max_value = ov::util::make_tensor_of_max_value(output_values[0].get_element_type()); + if (!out_max_value) return false; - auto out_max_v = ov::Tensor(output_maximum_value->get_element_type(), output_maximum_value->get_shape()); - memcpy(out_max_v.data(), output_maximum_value->get_data_ptr(), out_max_v.get_byte_size()); - - status = op::v1::Select().evaluate(output_values, {input2_zeros_mask, out_max_v, output_values[0]}); + status = Select().evaluate(output_values, {input2_zeros_mask, out_max_value, output_values[0]}); if (!status) return status; // replace values where zeros inside [low, ip] values range of second arg to maximum values - status = op::v1::Select().evaluate(output_values, - {input2_low_negative_up_positive_mask, out_max_v, output_values[0]}); + status = + Select().evaluate(output_values, {input2_low_negative_up_positive_mask, out_max_value, output_values[0]}); if (!status) return status; // in case input elements were dynamic we replace them with zero - status = op::v1::Select().evaluate(output_values, {dynamic_mask, out_max_v, output_values[0]}); + status = Select().evaluate(output_values, {dynamic_mask, out_max_value, output_values[0]}); if (!status) return status; } @@ -236,61 +208,74 @@ bool evaluate_bound(const Node* node, ov::TensorVector& output_values, bool is_u } // namespace } // namespace divide -// ------------------------------ v1 ------------------------------------------- - -op::v1::Divide::Divide(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +Divide::Divide(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -op::v1::Divide::Divide(const Output& arg0, - const Output& arg1, - bool pythondiv, - const AutoBroadcastSpec& auto_broadcast) +Divide::Divide(const Output& arg0, + const Output& arg1, + bool pythondiv, + const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast), m_pythondiv(pythondiv) { constructor_validate_and_infer_types(); } -bool op::v1::Divide::visit_attributes(AttributeVisitor& visitor) { +bool Divide::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_Divide_visit_attributes); BinaryElementwiseArithmetic::visit_attributes(visitor); visitor.on_attribute("m_pythondiv", m_pythondiv); return true; } -shared_ptr op::v1::Divide::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Divide::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Divide_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->is_pythondiv(), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), this->is_pythondiv(), this->get_autob()); } -bool op::v1::Divide::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Divide::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Divide_evaluate); - return divide::evaluate_divide(inputs[0], inputs[1], outputs[0], get_autob(), is_pythondiv()); + + OPENVINO_ASSERT(outputs.size() == 1); + + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob(), + is_pythondiv()); + return true; } -bool op::v1::Divide::has_evaluate() const { +bool Divide::has_evaluate() const { OV_OP_SCOPE(v1_Divide_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::bf16: - case ngraph::element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: + case element::f16: + case element::bf16: + case element::f32: return true; default: - break; + return false; } - return false; } -bool ov::op::v1::Divide::evaluate_lower(TensorVector& outputs) const { +bool Divide::evaluate_lower(TensorVector& outputs) const { return divide::evaluate_bound(this, outputs, false); } -bool ov::op::v1::Divide::evaluate_upper(TensorVector& outputs) const { +bool Divide::evaluate_upper(TensorVector& outputs) const { return divide::evaluate_bound(this, outputs, true); } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp index 7662229f2fa701..13db184c04b3ee 100644 --- a/src/core/src/validation_util.cpp +++ b/src/core/src/validation_util.cpp @@ -916,32 +916,8 @@ std::shared_ptr get_constant_max_of_type(element::Type_t t) { } std::shared_ptr get_constant_min_of_type(element::Type_t t) { -#define OPENVINO_TYPE_TO_MIN_CONST(t) \ - case t: \ - return ov::op::v0::Constant::create( \ - t, \ - {}, \ - {std::numeric_limits::value_type>::min()}); \ - break - - switch (t) { - OPENVINO_TYPE_TO_MIN_CONST(element::boolean); - OPENVINO_TYPE_TO_MIN_CONST(element::bf16); - OPENVINO_TYPE_TO_MIN_CONST(element::f16); - OPENVINO_TYPE_TO_MIN_CONST(element::f32); - OPENVINO_TYPE_TO_MIN_CONST(element::f64); - OPENVINO_TYPE_TO_MIN_CONST(element::i8); - OPENVINO_TYPE_TO_MIN_CONST(element::i16); - OPENVINO_TYPE_TO_MIN_CONST(element::i32); - OPENVINO_TYPE_TO_MIN_CONST(element::i64); - OPENVINO_TYPE_TO_MIN_CONST(element::u1); - OPENVINO_TYPE_TO_MIN_CONST(element::u8); - OPENVINO_TYPE_TO_MIN_CONST(element::u16); - OPENVINO_TYPE_TO_MIN_CONST(element::u32); - OPENVINO_TYPE_TO_MIN_CONST(element::u64); - default: - return nullptr; - } + auto tensor = ov::util::make_tensor_of_min_value(t); + return tensor ? std::make_shared(tensor) : nullptr; } std::shared_ptr get_constant_lowest_of_type(element::Type_t t) { @@ -1407,6 +1383,48 @@ Tensor make_tensor_of_max_value(const element::Type_t et) { } } +template +Tensor make_tensor_of_min_value(const element::Type_t et) { + Tensor t{et, Shape{}}; + *t.data() = std::numeric_limits::min(); + return t; +} + +Tensor make_tensor_of_min_value(const element::Type_t et) { + switch (et) { + case element::boolean: + return make_tensor_of_min_value>(et); + case element::bf16: + return make_tensor_of_min_value>(et); + case element::f16: + return make_tensor_of_min_value>(et); + case element::f32: + return make_tensor_of_min_value>(et); + case element::f64: + return make_tensor_of_min_value>(et); + case element::i8: + return make_tensor_of_min_value>(et); + case element::i16: + return make_tensor_of_min_value>(et); + case element::i32: + return make_tensor_of_min_value>(et); + case element::i64: + return make_tensor_of_min_value>(et); + case element::u1: + return make_tensor_of_min_value>(et); + case element::u8: + return make_tensor_of_min_value>(et); + case element::u16: + return make_tensor_of_min_value>(et); + case element::u32: + return make_tensor_of_min_value>(et); + case element::u64: + return make_tensor_of_min_value>(et); + default: + return {}; + } +} + std::vector get_tensors_partial_shapes(const TensorVector& tensors) { std::vector shapes; shapes.reserve(tensors.size()); From 1960536e8ec68e89a10d16a45c0fd6d582f4884f Mon Sep 17 00:00:00 2001 From: Sergey Lyalin Date: Fri, 3 Nov 2023 13:47:51 +0400 Subject: [PATCH 190/275] Fix GPTQ model conversion after two breaking changes (#20823) * Fix GPTQ model conversion after two breaking changes * Code style fix * Remove redundant check --- .../pytorch/src/transforms/u4_block_repack.cpp | 3 +-- src/frontends/pytorch/src/utils_quantize.cpp | 13 ++++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp index 9dcd4569ea8f66..ed0e5b6bbf5a7f 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -85,8 +85,7 @@ U4BlockRepack::U4BlockRepack() { } } - copy_runtime_info({std::move(constant), std::move(reshape1), std::move(transpose), std::move(reshape2)}, - new_const); + copy_runtime_info({std::move(constant), std::move(reshape1), std::move(transpose), reshape2}, new_const); replace_node(reshape2, new_const); return true; diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index 1346fd76971fcc..70253b7f75799e 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -5,6 +5,7 @@ #include "utils_quantize.hpp" #include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/bitwise_and.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" @@ -175,9 +176,15 @@ std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64 if (list_elems.size() != 2) return nullptr; - auto bitwise_and = cast_fw_node(list_elems[0].get_node_shared_ptr(), "aten::bitwise_and"); - if (!bitwise_and) - return nullptr; + + auto bitwise_and_candidate = list_elems[0].get_node_shared_ptr(); + std::shared_ptr bitwise_and = cast_fw_node(bitwise_and_candidate, "aten::bitwise_and"); + if (!bitwise_and) { + bitwise_and = std::dynamic_pointer_cast(bitwise_and_candidate); + if (!bitwise_and) + return nullptr; + } + auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), "aten::bitwise_right_shift"); if (!bitwise_shift) return nullptr; From 09010657e21081eaf2df596e8358f7ac8dcc3042 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Fri, 3 Nov 2023 13:16:51 +0100 Subject: [PATCH 191/275] [core] Migrate Gelu operator to new API (#20833) * Drop HostTensor * Remove useless overwrite method --- src/core/include/openvino/op/gelu.hpp | 6 +- src/core/src/op/gelu.cpp | 132 ++++++++++++-------------- 2 files changed, 60 insertions(+), 78 deletions(-) diff --git a/src/core/include/openvino/op/gelu.hpp b/src/core/include/openvino/op/gelu.hpp index ae868e3909bbfd..9a47b1437aae68 100644 --- a/src/core/include/openvino/op/gelu.hpp +++ b/src/core/include/openvino/op/gelu.hpp @@ -23,8 +23,6 @@ class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic { /// \param data Input tensor Gelu(const Output& data); - bool visit_attributes(AttributeVisitor& visitor) override; - void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; @@ -56,9 +54,7 @@ class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic { void validate_and_infer_types() override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; diff --git a/src/core/src/op/gelu.cpp b/src/core/src/op/gelu.cpp index f7c974af77c7e3..cc261ca1650a1b 100644 --- a/src/core/src/op/gelu.cpp +++ b/src/core/src/op/gelu.cpp @@ -2,41 +2,36 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/gelu.hpp" +#include "openvino/op/gelu.hpp" #include -#include +#include "element_visitor.hpp" #include "itt.hpp" +#include "openvino/core/type.hpp" #include "openvino/reference/gelu.hpp" -using namespace std; -using namespace ngraph; - -// ------------------------------ V0 ------------------------------ -op::v0::Gelu::Gelu() : UnaryElementwiseArithmetic() {} +namespace ov { +namespace op { +namespace v0 { +Gelu::Gelu() : UnaryElementwiseArithmetic() {} -op::v0::Gelu::Gelu(const Output& data) : UnaryElementwiseArithmetic(data) { +Gelu::Gelu(const Output& data) : UnaryElementwiseArithmetic(data) { constructor_validate_and_infer_types(); } -bool op::v0::Gelu::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Gelu_visit_attributes); - return true; -} - -shared_ptr op::v0::Gelu::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Gelu::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Gelu_clone_with_new_inputs); if (new_args.size() != 1) { OPENVINO_THROW("Incorrect number of new arguments"); } - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -void op::v0::Gelu::validate_and_infer_types() { +void Gelu::validate_and_infer_types() { OV_OP_SCOPE(v0_Gelu_validate_and_infer_types); element::Type input_element_type = get_input_element_type(0); - ov::PartialShape input_pshape = get_input_partial_shape(0); + PartialShape input_pshape = get_input_partial_shape(0); NODE_VALIDATION_CHECK(this, input_element_type.is_dynamic() || input_element_type.is_real(), @@ -46,47 +41,33 @@ void op::v0::Gelu::validate_and_infer_types() { set_output_type(0, input_element_type, input_pshape); } +} // namespace v0 -// ------------------------------ V7 ------------------------------ - -namespace ov { -template <> -NGRAPH_API EnumNames& EnumNames::get() { - static auto enum_names = EnumNames( - "op::GeluApproximationMode", - {{"TANH", ngraph::op::GeluApproximationMode::TANH}, {"ERF", ngraph::op::GeluApproximationMode::ERF}}); - return enum_names; -} - -std::ostream& op::operator<<(std::ostream& s, const op::GeluApproximationMode& type) { - return s << as_string(type); -} -} // namespace ov - -op::v7::Gelu::Gelu(const Output& data, GeluApproximationMode mode) +namespace v7 { +Gelu::Gelu(const Output& data, GeluApproximationMode mode) : UnaryElementwiseArithmetic(data), m_approximation_mode(mode) { constructor_validate_and_infer_types(); } -bool op::v7::Gelu::visit_attributes(AttributeVisitor& visitor) { +bool Gelu::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v7_Gelu_visit_attributes); visitor.on_attribute("approximation_mode", m_approximation_mode); return true; } -shared_ptr op::v7::Gelu::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Gelu::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v7_Gelu_clone_with_new_inputs); if (new_args.size() != 1) { OPENVINO_THROW("Incorrect number of new arguments"); } - return make_shared(new_args.at(0), m_approximation_mode); + return std::make_shared(new_args.at(0), m_approximation_mode); } -void op::v7::Gelu::validate_and_infer_types() { +void Gelu::validate_and_infer_types() { OV_OP_SCOPE(v7_Gelu_validate_and_infer_types); element::Type input_element_type = get_input_element_type(0); - ov::PartialShape input_pshape = get_input_partial_shape(0); + PartialShape input_pshape = get_input_partial_shape(0); NODE_VALIDATION_CHECK(this, input_element_type.is_dynamic() || input_element_type.is_real(), @@ -97,56 +78,61 @@ void op::v7::Gelu::validate_and_infer_types() { set_output_type(0, input_element_type, input_pshape); } -op::GeluApproximationMode op::v7::Gelu::get_approximation_mode() const { +op::GeluApproximationMode Gelu::get_approximation_mode() const { return m_approximation_mode; } -OPENVINO_SUPPRESS_DEPRECATED_START namespace gelu { namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& out, - op::GeluApproximationMode mode, - const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::gelu(arg0->get_data_ptr(), out->get_data_ptr(), mode, count); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_gelu(const HostTensorPtr& arg0, const HostTensorPtr& out, op::GeluApproximationMode mode) { - bool rc = true; - size_t count = shape_size(arg0->get_shape()); - out->set_unary(arg0); - - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_gelu, f16, arg0, out, mode, count); - OPENVINO_TYPE_CASE(evaluate_gelu, f32, arg0, out, mode, count); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in, Tensor& out, const op::GeluApproximationMode mode, const size_t count) { + reference::gelu(in.data(), out.data(), mode, count); + return true; } - return rc; -} +}; } // namespace } // namespace gelu -bool op::v7::Gelu::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Gelu::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v7_Gelu_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return gelu::evaluate_gelu(inputs[0], outputs[0], m_approximation_mode); + OPENVINO_ASSERT(inputs.size() == 1 && outputs.size() == 1); + + const auto& input_shape = inputs[0].get_shape(); + const auto count = shape_size(input_shape); + outputs[0].set_shape(input_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + m_approximation_mode, + count); } -bool op::v7::Gelu::has_evaluate() const { +bool Gelu::has_evaluate() const { OV_OP_SCOPE(v7_Gelu_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: return true; default: - break; + return false; } - return false; } +} // namespace v7 +} // namespace op + +template <> +OPENVINO_API EnumNames& EnumNames::get() { + static auto enum_names = EnumNames( + "op::GeluApproximationMode", + {{"TANH", op::GeluApproximationMode::TANH}, {"ERF", op::GeluApproximationMode::ERF}}); + return enum_names; +} + +std::ostream& op::operator<<(std::ostream& s, const op::GeluApproximationMode& type) { + return s << as_string(type); +} +} // namespace ov From 86c638a595a5e9305f6c4be8517788c463c1055c Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Fri, 3 Nov 2023 16:59:47 +0100 Subject: [PATCH 192/275] Temporarily restrict flake8_builtins version (#20864) --- src/bindings/python/requirements_test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt index d05ebd2ec7e74f..539feaa2f12efa 100644 --- a/src/bindings/python/requirements_test.txt +++ b/src/bindings/python/requirements_test.txt @@ -21,7 +21,7 @@ flake8-rst-docstrings pygments flake8-string-format flake8-variables-names -flake8_builtins +flake8_builtins<2.2.0 # Issue 124276 flake8_coding flake8_commas flake8_pep3101 From cc389c23cacf9f4117695ad92be21169047561af Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Fri, 3 Nov 2023 17:45:34 +0100 Subject: [PATCH 193/275] Removed logic of building example_input by shape. (#20859) --- .../src/openvino/frontend/tensorflow/utils.py | 31 ------------------- .../mo_python_api_tests/test_mo_convert_tf.py | 28 ++++++++++++----- .../ovc_python_api_tests/test_tf.py | 23 +++++++++----- 3 files changed, 36 insertions(+), 46 deletions(-) diff --git a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py index b75f371d0c16f2..298914ffdbd27d 100644 --- a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py +++ b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py @@ -118,33 +118,6 @@ def get_input_spec_from_model(model): return input_spec -def create_example_input_by_user_shapes(input_shapes, input_types): - import tensorflow as tf - if input_shapes is None: - return None - if isinstance(input_shapes, dict): - res = {} - for name, shape in input_shapes.items(): - shape = get_static_shape(shape, 1) - args = {} - if name in input_types: - args["dtype"] = input_types[name] - tensor = tf.zeros(shape=shape, **args) - res[name] = tensor - return res - elif isinstance(input_shapes, list): - res = [] - for idx, shape in enumerate(input_shapes): - shape = get_static_shape(shape, 1) - args = {} - if idx < len(input_types): - args["dtype"] = input_types[idx] - tensor = tf.zeros(shape=shape, **args) - res.append(tensor) - return res - raise Exception("Could not create example input by provided shape {}".format(input_shapes)) - - def get_concrete_func(tf_function, example_input, input_needs_packing, error_message, use_example_input=True): """ Runs tracing of TF function and returns a concrete function. @@ -281,10 +254,6 @@ def are_shapes_defined(shape: Union[List, Dict]): if example_input is not None: concrete_func = get_concrete_func(tf_function, example_input, input_needs_packing, "Could not trace the TF model with the following error: {}") - elif are_shapes_defined(input_shapes): - inp = create_example_input_by_user_shapes(input_shapes, input_types) - concrete_func = get_concrete_func(tf_function, inp, input_needs_packing, - "Could not trace the TF model with the following error: {}") else: if isinstance(tf_function, tf.types.experimental.GenericFunction) and \ tf_function.input_signature is not None: diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py index 59c90cd7b3d415..dbc36c9fcd3c2d 100644 --- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py +++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_tf.py @@ -139,7 +139,8 @@ def __call__(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = Net() - return net, model_ref, {'input': [PartialShape([1, 2, 3]), PartialShape([1, 2, 3])]} + return net, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_tf_module_layout_list(tmp_dir): @@ -166,7 +167,8 @@ def __call__(self, x, y): model_ref.inputs[1].node.layout = Layout('NHC') net = Net() - return net, model_ref, {'input_shape': [PartialShape([1, 2, 3]), PartialShape([1, 2, 3])], 'layout': ["NCH", "NHC"], + return net, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32)), 'layout': ["NCH", "NHC"], 'use_convert_model_from_mo': True} @@ -193,7 +195,10 @@ def __call__(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = Net() - return net, model_ref, {'input': input_shapes} + return net, model_ref, {'input': input_shapes, + 'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32)) + } def create_keras_layer(tmp_dir): @@ -217,7 +222,9 @@ def call(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = LayerModel() - return net, model_ref, {'input': [PartialShape([1, 2, 3]), PartialShape([1, 2, 3])]} + return net, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32)) + } def create_keras_layer_dynamic(tmp_dir): @@ -243,7 +250,10 @@ def call(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = LayerModel() - return net, model_ref, {'input': input_shapes} + return net, model_ref, {'input': input_shapes, + 'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32)) + } def create_tf_checkpoint(tmp_dir): @@ -531,17 +541,19 @@ def create_keras_layer_with_example_input_2(tmp_dir): def create_keras_layer_with_input_shapes_case1(tmp_dir): model, model_ref = create_keras_layer_input_list() - return model, model_ref, {'input': [[1, 2, 3], [1, 2, 3]]} + return model, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_keras_layer_with_input_shapes_case2(tmp_dir): model, model_ref = create_keras_layer_input_list() - return model, model_ref, {'input': [([1, 2, 3], np.float32), ([1, 2, 3], np.float32)]} + return model, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_keras_layer_with_input_shapes_case3(tmp_dir): model, model_ref = create_keras_layer_input_dict_one_inp() - return model, model_ref, {'input': [('args', [1, 2, 3])]} + return model, model_ref, {'example_input': {"args": np.random.rand(1, 2, 3).astype(np.float32)}} def create_keras_layer_with_input_shapes_case4(tmp_dir): diff --git a/tests/layer_tests/ovc_python_api_tests/test_tf.py b/tests/layer_tests/ovc_python_api_tests/test_tf.py index b894ec7153e910..8657ca1c8bb3ce 100644 --- a/tests/layer_tests/ovc_python_api_tests/test_tf.py +++ b/tests/layer_tests/ovc_python_api_tests/test_tf.py @@ -131,7 +131,8 @@ def __call__(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = Net() - return net, model_ref, {'input': [PartialShape([1, 2, 3]), PartialShape([1, 2, 3])]} + return net, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_tf_module_dynamic(tmp_dir): @@ -155,7 +156,9 @@ def __call__(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = Net() - return net, model_ref, {'input': input_shapes} + return net, model_ref, {'input': input_shapes, + 'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_keras_layer(tmp_dir): @@ -178,7 +181,8 @@ def call(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = LayerModel() - return net, model_ref, {'input': [PartialShape([1, 2, 3]), PartialShape([1, 2, 3])]} + return net, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_keras_layer_dynamic(tmp_dir): @@ -203,7 +207,10 @@ def call(self, x, y): model_ref = Model([sigm], parameter_list, "test") net = LayerModel() - return net, model_ref, {'input': input_shapes} + return net, model_ref, {'input': input_shapes, + 'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32)) + } def create_tf_checkpoint(tmp_dir): @@ -478,17 +485,19 @@ def create_keras_layer_with_example_input_2(tmp_dir): def create_keras_layer_with_input_shapes_case1(tmp_dir): model, model_ref = create_keras_layer_input_list() - return model, model_ref, {'input': [[1, 2, 3], [1, 2, 3]]} + return model, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_keras_layer_with_input_shapes_case2(tmp_dir): model, model_ref = create_keras_layer_input_list() - return model, model_ref, {'input': [([1, 2, 3], np.float32), ([1, 2, 3], np.float32)]} + return model, model_ref, {'example_input': (np.random.rand(1, 2, 3).astype(np.float32), + np.random.rand(1, 2, 3).astype(np.float32))} def create_keras_layer_with_input_shapes_case3(tmp_dir): model, model_ref = create_keras_layer_input_dict_one_inp() - return model, model_ref, {'input': [('args', [1, 2, 3])]} + return model, model_ref, {'example_input': {'args': np.random.rand(1, 2, 3).astype(np.float32)}} def create_keras_layer_with_input_shapes_case4(tmp_dir): From fda1fd9dc1f7213cf170b9c7e64cc1763815574f Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Sat, 4 Nov 2023 07:58:01 +0000 Subject: [PATCH 194/275] [CI] [GHA] Add missing `setup-python` action checkout; use custom action across all pipelines (#20863) * use unified setup-python actions across all pipelines * rm triggers --- .../linux_conditional_compilation.yml | 16 +++++-- .github/workflows/windows.yml | 47 ++++++++++++++++--- .../windows_conditional_compilation.yml | 9 ++++ 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index a264bea8436ee3..bb9252060e88f2 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -308,6 +308,7 @@ jobs: container: image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:22.04 env: + OPENVINO_REPO: /__w/openvino/openvino/openvino DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input INSTALL_TEST_DIR: /__w/openvino/openvino/install/tests PARALLEL_TEST_SCRIPT: /__w/openvino/openvino/install/tests/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py @@ -326,12 +327,19 @@ jobs: - name: Install OpenVINO dependencies run: bash ${INSTALL_TEST_DIR}/scripts/install_dependencies/install_openvino_dependencies.sh -c=core -c=gpu -y - - name: Install 'actions/setup-python@v4' dependencies - run: apt-get install -y libssl3 + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: ${{ env.OPENVINO_REPO }} - - uses: actions/setup-python@v4 + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' - name: Install python dependencies for run_parallel.py run: python3 -m pip install -r ${INSTALL_TEST_DIR}/src/tests/test_utils/functional_test_utils/layer_tests_summary/requirements.txt diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index e36f2f0204a489..35274b5cccdf75 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -197,6 +197,7 @@ jobs: shell: pwsh runs-on: windows-2019 env: + OPENVINO_REPO: "${{ github.workspace }}\\openvino" INSTALL_DIR: "${{ github.workspace }}\\install" INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" SAMPLES_INSTALL_DIR: "${{ github.workspace }}\\install\\samples" @@ -224,9 +225,20 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}" popd - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 with: - python-version: ${{ env.PYTHON_VERSION }} + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Build cpp samples run: | @@ -295,9 +307,20 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}" popd - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 with: - python-version: ${{ env.PYTHON_VERSION }} + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install OpenVINO Python wheels run: | @@ -660,6 +683,7 @@ jobs: shell: pwsh runs-on: windows-2019-8-core env: + OPENVINO_REPO: "${{ github.workspace }}\\openvino" INSTALL_DIR: "${{ github.workspace }}\\install" INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" PARALLEL_TEST_SCRIPT: "${{ github.workspace }}\\install\\tests\\functional_test_utils\\layer_tests_summary\\run_parallel.py" @@ -687,9 +711,20 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}" popd - - uses: actions/setup-python@v4 + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python with: - python-version: ${{ env.PYTHON_VERSION }} + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' - name: Install python dependencies shell: cmd diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 0304cbfe92bf7e..e2155ab06997f3 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -288,6 +288,7 @@ jobs: shell: pwsh runs-on: windows-latest-8-cores env: + OPENVINO_REPO: "${{ github.workspace }}\\openvino" INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" PARALLEL_TEST_SCRIPT: "${{ github.workspace }}\\tests_install\\layer_tests_summary\\run_parallel.py" PARALLEL_TEST_CACHE: "${{ github.workspace }}\\tests_install\\test_cache.lst" @@ -302,6 +303,14 @@ jobs: - name: Extract OpenVINO tests package run: Expand-Archive ${{ env.INSTALL_TEST_DIR }}/openvino_tests.zip -DestinationPath "${{ env.INSTALL_TEST_DIR }}" + - name: Fetch setup_python action + uses: actions/checkout@v4 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + - name: Setup Python ${{ env.PYTHON_VERSION }} uses: ./openvino/.github/actions/setup_python with: From 47cdbb9df5b5d86208dfdcb206a6998de8f6880b Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Sat, 4 Nov 2023 12:36:16 +0400 Subject: [PATCH 195/275] Template plugin folder to API2.0 (#20862) * Remove unused IE namespace from template plugin * Remove unused `ngraph::HostTensor` * Template `ConvolutionLayerTest` to API2.0 * Template `ReshapeLayerTest` to API2.0 * Template `SplitLayerTest` to API2.0 * Remove extra `InferenceEngine::PluginConfigParams` * CLangFormat --- src/plugins/template/backend/ops/convert.cpp | 7 +- .../op_reference/convert_color_i420.cpp | 1 - .../op_reference/convert_color_nv12.cpp | 1 - .../op_reference/embedding_segments_sum.cpp | 1 - .../op_reference/embeddingbag_offsetssum.cpp | 1 - .../op_reference/embeddingbag_packedsum.cpp | 1 - .../tests/functional/op_reference/exp.cpp | 1 - .../tests/functional/op_reference/gelu.cpp | 1 - .../tests/functional/op_reference/hswish.cpp | 1 - .../tests/functional/op_reference/if.cpp | 1 - .../functional/op_reference/log_softmax.cpp | 1 - .../ov_executable_network/get_metric.cpp | 2 - .../single_layer_tests/convolution.cpp | 88 ++++++++----------- .../single_layer_tests/reshape.cpp | 35 +++----- .../single_layer_tests/split.cpp | 27 +++--- .../subgraph_reference/preprocess.cpp | 4 - 16 files changed, 63 insertions(+), 110 deletions(-) diff --git a/src/plugins/template/backend/ops/convert.cpp b/src/plugins/template/backend/ops/convert.cpp index 4563e0081fd4bb..5a2b4ea79c0c7d 100644 --- a/src/plugins/template/backend/ops/convert.cpp +++ b/src/plugins/template/backend/ops/convert.cpp @@ -16,10 +16,9 @@ inline void evaluate(const std::shared_ptr& op, outputs[0].set_shape(inputs[0].get_shape()); size_t element_count = ov::shape_size(outputs[0].get_shape()); - if (((ti == ngraph::element::u1) || (to == ngraph::element::u1)) || - ((ti == ngraph::element::u4) || (to == ngraph::element::u4)) || - ((ti == ngraph::element::i4) || (to == ngraph::element::i4)) || - ((ti == ngraph::element::nf4) || (to == ngraph::element::nf4))) { + if (((ti == ov::element::u1) || (to == ov::element::u1)) || ((ti == ov::element::u4) || (to == ov::element::u4)) || + ((ti == ov::element::i4) || (to == ov::element::i4)) || + ((ti == ov::element::nf4) || (to == ov::element::nf4))) { ov::reference::detail::lp_convert(inputs[0].data(), outputs[0].data(), element_count, ti, to); } else { ov::reference::convert(inputs[0].data(), outputs[0].data(), element_count); diff --git a/src/plugins/template/tests/functional/op_reference/convert_color_i420.cpp b/src/plugins/template/tests/functional/op_reference/convert_color_i420.cpp index 2e9747596965b6..d58da252eaff6b 100644 --- a/src/plugins/template/tests/functional/op_reference/convert_color_i420.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert_color_i420.cpp @@ -13,7 +13,6 @@ #include "openvino/op/i420_to_rgb.hpp" using namespace ov; -using namespace InferenceEngine; using namespace reference_tests; class ReferenceConvertColorI420LayerTest : public testing::Test, public CommonReferenceTest { diff --git a/src/plugins/template/tests/functional/op_reference/convert_color_nv12.cpp b/src/plugins/template/tests/functional/op_reference/convert_color_nv12.cpp index 77781766478765..902d72b96ca7fc 100644 --- a/src/plugins/template/tests/functional/op_reference/convert_color_nv12.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert_color_nv12.cpp @@ -13,7 +13,6 @@ #include "openvino/op/nv12_to_rgb.hpp" using namespace ov; -using namespace InferenceEngine; using namespace reference_tests; class ReferenceConvertColorNV12LayerTest : public testing::Test, public CommonReferenceTest { diff --git a/src/plugins/template/tests/functional/op_reference/embedding_segments_sum.cpp b/src/plugins/template/tests/functional/op_reference/embedding_segments_sum.cpp index 4726ccdffd6a51..a9fc92d6df87fc 100644 --- a/src/plugins/template/tests/functional/op_reference/embedding_segments_sum.cpp +++ b/src/plugins/template/tests/functional/op_reference/embedding_segments_sum.cpp @@ -9,7 +9,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; struct EmbeddingSegmentsSumParams { template diff --git a/src/plugins/template/tests/functional/op_reference/embeddingbag_offsetssum.cpp b/src/plugins/template/tests/functional/op_reference/embeddingbag_offsetssum.cpp index 9967e99b8e2317..ed12500db0c50d 100644 --- a/src/plugins/template/tests/functional/op_reference/embeddingbag_offsetssum.cpp +++ b/src/plugins/template/tests/functional/op_reference/embeddingbag_offsetssum.cpp @@ -9,7 +9,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; struct EmbeddingBagOffsetsSumParams { template diff --git a/src/plugins/template/tests/functional/op_reference/embeddingbag_packedsum.cpp b/src/plugins/template/tests/functional/op_reference/embeddingbag_packedsum.cpp index ea9dfe34decb22..f2bb7946fb9603 100644 --- a/src/plugins/template/tests/functional/op_reference/embeddingbag_packedsum.cpp +++ b/src/plugins/template/tests/functional/op_reference/embeddingbag_packedsum.cpp @@ -9,7 +9,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; struct EmbeddingBagPackedSumParams { template diff --git a/src/plugins/template/tests/functional/op_reference/exp.cpp b/src/plugins/template/tests/functional/op_reference/exp.cpp index 4e06b1dc2a8be5..5267c473927e30 100644 --- a/src/plugins/template/tests/functional/op_reference/exp.cpp +++ b/src/plugins/template/tests/functional/op_reference/exp.cpp @@ -11,7 +11,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; namespace { struct ExpParams { diff --git a/src/plugins/template/tests/functional/op_reference/gelu.cpp b/src/plugins/template/tests/functional/op_reference/gelu.cpp index 08624796c06658..2f87fbe1832b41 100644 --- a/src/plugins/template/tests/functional/op_reference/gelu.cpp +++ b/src/plugins/template/tests/functional/op_reference/gelu.cpp @@ -10,7 +10,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; namespace { struct GeluParams { diff --git a/src/plugins/template/tests/functional/op_reference/hswish.cpp b/src/plugins/template/tests/functional/op_reference/hswish.cpp index 0da8582df0bb3b..8deb3553df3f55 100644 --- a/src/plugins/template/tests/functional/op_reference/hswish.cpp +++ b/src/plugins/template/tests/functional/op_reference/hswish.cpp @@ -10,7 +10,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; namespace { struct HSwishParams { diff --git a/src/plugins/template/tests/functional/op_reference/if.cpp b/src/plugins/template/tests/functional/op_reference/if.cpp index eef66bb3e5ad61..5f51a03f02d427 100644 --- a/src/plugins/template/tests/functional/op_reference/if.cpp +++ b/src/plugins/template/tests/functional/op_reference/if.cpp @@ -13,7 +13,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; struct IfFunctionalBase { virtual std::shared_ptr create_function(const std::vector& if_inputs, diff --git a/src/plugins/template/tests/functional/op_reference/log_softmax.cpp b/src/plugins/template/tests/functional/op_reference/log_softmax.cpp index 1d834f6ff71cfb..0b929c418612a1 100644 --- a/src/plugins/template/tests/functional/op_reference/log_softmax.cpp +++ b/src/plugins/template/tests/functional/op_reference/log_softmax.cpp @@ -10,7 +10,6 @@ using namespace reference_tests; using namespace ov; -using namespace InferenceEngine; namespace { struct LogSoftmaxParams { diff --git a/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp b/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp index 6f1c98f11bc00c..2e525c84cc60d0 100644 --- a/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp +++ b/src/plugins/template/tests/functional/shared_tests_instances/behavior/ov_executable_network/get_metric.cpp @@ -9,8 +9,6 @@ using namespace ov::test::behavior; -using namespace InferenceEngine::PluginConfigParams; - namespace { // diff --git a/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp b/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp index f1d1b2c31e2ec5..43f44cd68c8587 100644 --- a/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp +++ b/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp @@ -2,20 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/convolution.hpp" +#include "single_op_tests/convolution.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ConvolutionLayerTest; namespace { // ! [test_convolution:declare_parameters] -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +const std::vector model_types = { + ov::element::f32, + ov::element::f16, }; /* ============= 2D Convolution ============= */ @@ -46,30 +46,24 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine(::testing::ValuesIn(ke ::testing::Values(ov::op::PadType::VALID)); // ! [test_convolution:instantiate] -INSTANTIATE_TEST_SUITE_P(Convolution2D_ExplicitPadding, - ConvolutionLayerTest, - ::testing::Combine(conv2DParams_ExplicitPadding, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), - ConvolutionLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + Convolution2D_ExplicitPadding, + ConvolutionLayerTest, + ::testing::Combine(conv2DParams_ExplicitPadding, + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), + ConvolutionLayerTest::getTestCaseName); // ! [test_convolution:instantiate] -INSTANTIATE_TEST_SUITE_P(Convolution2D_AutoPadValid, - ConvolutionLayerTest, - ::testing::Combine(conv2DParams_AutoPadValid, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), - ConvolutionLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + Convolution2D_AutoPadValid, + ConvolutionLayerTest, + ::testing::Combine(conv2DParams_AutoPadValid, + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 30, 30}})), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), + ConvolutionLayerTest::getTestCaseName); /* ============= 3D Convolution ============= */ @@ -95,28 +89,22 @@ const auto conv3DParams_AutoPadValid = ::testing::Combine(::testing::ValuesIn(ke ::testing::Values(5), ::testing::Values(ov::op::PadType::VALID)); -INSTANTIATE_TEST_SUITE_P(smoke_Convolution3D_ExplicitPadding, - ConvolutionLayerTest, - ::testing::Combine(conv3DParams_ExplicitPadding, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 10, 10, 10})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), - ConvolutionLayerTest::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(nightly_Convolution3D_AutoPadValid, - ConvolutionLayerTest, - ::testing::Combine(conv3DParams_AutoPadValid, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 10, 10, 10})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), - ConvolutionLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_Convolution3D_ExplicitPadding, + ConvolutionLayerTest, + ::testing::Combine(conv3DParams_ExplicitPadding, + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 10, 10, 10}})), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), + ConvolutionLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + nightly_Convolution3D_AutoPadValid, + ConvolutionLayerTest, + ::testing::Combine(conv3DParams_AutoPadValid, + ::testing::ValuesIn(model_types), + ::testing::Values(ov::test::static_shapes_to_test_representation({{1, 3, 10, 10, 10}})), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), + ConvolutionLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp b/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp index 576490fa79d8de..f6c17028cafd0f 100644 --- a/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp +++ b/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp @@ -2,58 +2,43 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/reshape.hpp" +#include "single_op_tests/reshape.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ReshapeLayerTest; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, +const std::vector model_types = { + ov::element::f32, }; INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTest, ::testing::Combine(::testing::Values(true), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(model_types), ::testing::Values(std::vector({30, 30, 30, 30})), ::testing::Values(std::vector({30, 30, 30, 30})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE), - ::testing::Values(std::map({}))), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), ReshapeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheck, ReshapeLayerTest, ::testing::Combine(::testing::Values(true), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(model_types), ::testing::Values(std::vector({10, 10, 10, 10})), ::testing::Values(std::vector({10, 0, 100})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE), - ::testing::Values(std::map({}))), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), ReshapeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_ReshapeCheckNegative, ReshapeLayerTest, ::testing::Combine(::testing::Values(true), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(model_types), ::testing::Values(std::vector({10, 10, 10, 10})), ::testing::Values(std::vector({10, -1, 100})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE), - ::testing::Values(std::map({}))), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), ReshapeLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/split.cpp b/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/split.cpp index 064f1b5150274f..e2d84bad5e7f0b 100644 --- a/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/split.cpp +++ b/src/plugins/template/tests/functional/shared_tests_instances/single_layer_tests/split.cpp @@ -2,28 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/split.hpp" +#include "single_op_tests/split.hpp" #include #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::SplitLayerTest; namespace { -INSTANTIATE_TEST_SUITE_P(smoke_NumSplitsCheck, - SplitLayerTest, - ::testing::Combine(::testing::Values(1, 2, 3, 5, 6, 10, 30), - ::testing::Values(0, 1, 2, 3), - ::testing::Values(InferenceEngine::Precision::FP32), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({30, 30, 30, 30})), - ::testing::Values(std::vector({})), - ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), - SplitLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_NumSplitsCheck, + SplitLayerTest, + ::testing::Combine(::testing::Values(1, 2, 3, 5, 6, 10, 30), + ::testing::Values(0, 1, 2, 3), + ::testing::Values(ov::element::f32), + ::testing::Values(ov::test::static_shapes_to_test_representation({{30, 30, 30, 30}})), + ::testing::Values(std::vector({})), + ::testing::Values(ov::test::utils::DEVICE_TEMPLATE)), + SplitLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/template/tests/functional/subgraph_reference/preprocess.cpp b/src/plugins/template/tests/functional/subgraph_reference/preprocess.cpp index cdd0cf5c2318dc..865d8c3d92b2f9 100644 --- a/src/plugins/template/tests/functional/subgraph_reference/preprocess.cpp +++ b/src/plugins/template/tests/functional/subgraph_reference/preprocess.cpp @@ -351,7 +351,6 @@ static RefPreprocessParams resize_to_network_width_height() { return f; }; - auto result = std::make_shared(); // clang-format off std::vector input = {0., 1., 2., 3., 4., 1., 2., 3., 4., 5., @@ -380,7 +379,6 @@ static RefPreprocessParams resize_to_specified_width_height() { return f; }; - auto result = std::make_shared(); // clang-format off std::vector input = {0., 1., 2., 3., 4., 1., 2., 3., 4., 5., @@ -747,7 +745,6 @@ static RefPreprocessParams resize_and_convert_layout() { return f; }; - auto result = std::make_shared(); // clang-format off std::vector input = { 1., 1., 1., 1., // channel 1 @@ -857,7 +854,6 @@ static RefPreprocessParams convert_color_nv12_layout_resize() { return f; }; - auto result = std::make_shared(); // clang-format off auto input = std::vector {81, 81, 145, 145, // RRGG 81, 81, 145, 145, // RRGG From ae343a0178afcb2326b6221136700d98afee36c3 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 6 Nov 2023 06:26:47 +0100 Subject: [PATCH 196/275] [core]Migrate FloorMod operator to new API (#20829) * Migrate FloorMod operator to new API * Remove `visit_attributes` is same as base class * Restore FloorMod calculation for signed values floating-point and integral --- src/core/include/openvino/op/floor_mod.hpp | 5 +- .../include/openvino/reference/floor_mod.hpp | 24 +++- src/core/src/op/floor_mod.cpp | 121 ++++++++---------- 3 files changed, 72 insertions(+), 78 deletions(-) diff --git a/src/core/include/openvino/op/floor_mod.hpp b/src/core/include/openvino/op/floor_mod.hpp index 4df54f3bcd7334..624914825cf924 100644 --- a/src/core/include/openvino/op/floor_mod.hpp +++ b/src/core/include/openvino/op/floor_mod.hpp @@ -34,10 +34,7 @@ class OPENVINO_API FloorMod : public util::BinaryElementwiseArithmetic { const AutoBroadcastSpec& auto_broadcast = AutoBroadcastType::NUMPY); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - bool visit_attributes(AttributeVisitor& visitor) override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/floor_mod.hpp b/src/core/reference/include/openvino/reference/floor_mod.hpp index 2c63b92310cbaa..9868627dc183c3 100644 --- a/src/core/reference/include/openvino/reference/floor_mod.hpp +++ b/src/core/reference/include/openvino/reference/floor_mod.hpp @@ -8,9 +8,26 @@ #include #include "openvino/reference/autobroadcast_binop.hpp" +#include "openvino/reference/mod.hpp" namespace ov { namespace reference { +namespace func { + +template ::value>::type* = nullptr> +constexpr T floor_mod(const T x, const T y) { + return mod(x, y); +} + +template () || std::is_signed::value>::type* = nullptr> +T floor_mod(const T x, const T y) { + // Cast to double is needed for integer input (signed), + // otherwise std::floor will act like std::trunc + const double divisor = static_cast(y); + return static_cast(x - y * std::floor(x / divisor)); +} +} // namespace func + template void floor_mod(const T* arg0, const T* arg1, @@ -18,12 +35,7 @@ void floor_mod(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T { - // Cast to double is needed for integer input, - // otherwise std::floor will act like std::trunc - const double divisor = static_cast(y); - return static_cast(x - y * std::floor(x / divisor)); - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::floor_mod); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/floor_mod.cpp b/src/core/src/op/floor_mod.cpp index 012d55a6f4c1da..225c70a5e5d5eb 100644 --- a/src/core/src/op/floor_mod.cpp +++ b/src/core/src/op/floor_mod.cpp @@ -2,94 +2,79 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/floor_mod.hpp" +#include "openvino/op/floor_mod.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/floor_mod.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace floor_mod { + +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::floor_mod(arg0.data(), arg1.data(), out.data(), shape0, shape1, broadcast_spec); + return true; + } +}; +} // namespace floor_mod -op::v1::FloorMod::FloorMod(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +FloorMod::FloorMod(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::FloorMod::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr FloorMod::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_FloorMod_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace floor_mod { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::floor_mod(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} +bool FloorMod::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v1_FloorMod_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); -bool evaluate_floor_mod(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_floor_mod, i8, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, u8, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, bf16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_floor_mod, f32, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace floor_mod + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); -bool op::v1::FloorMod::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v1_FloorMod_evaluate); - return floor_mod::evaluate_floor_mod(inputs[0], inputs[1], outputs[0], get_autob()); + using namespace ov::element; + return IfTypeOf::apply( + inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::FloorMod::has_evaluate() const { +bool FloorMod::has_evaluate() const { OV_OP_SCOPE(v1_FloorMod_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i8: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::bf16: - case ngraph::element::f16: - case ngraph::element::f32: + case element::bf16: + case element::f16: + case element::f32: + case element::i8: + case element::i32: + case element::i64: + case element::u8: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; -} - -bool op::v1::FloorMod::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v1_FloorMod_visit_attributes); - BinaryElementwiseArithmetic::visit_attributes(visitor); - return true; } +} // namespace v1 +} // namespace op +} // namespace ov From 7d74dac3ee4127526a5003073d84a4f7b2e0a3ae Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 6 Nov 2023 06:31:02 +0100 Subject: [PATCH 197/275] [core]Migrate GridSample operator to new API (#20852) * MIgrate GridSample to new API * Refactor GridSample to reduce binary size - use function pointer instead std::function (simpler less code size) - use RoundingGuard instead manual set/restore rounding mode - move interpolate selection outside main data processing loop --- src/core/include/openvino/op/grid_sample.hpp | 4 +- .../openvino/reference/grid_sample.hpp | 58 ++++--- src/core/src/op/grid_sample.cpp | 157 +++++++++--------- 3 files changed, 112 insertions(+), 107 deletions(-) diff --git a/src/core/include/openvino/op/grid_sample.hpp b/src/core/include/openvino/op/grid_sample.hpp index 74c172fcb57d9b..13f2fab78d6505 100644 --- a/src/core/include/openvino/op/grid_sample.hpp +++ b/src/core/include/openvino/op/grid_sample.hpp @@ -59,9 +59,7 @@ class OPENVINO_API GridSample : public Op { m_attributes = attributes; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; private: diff --git a/src/core/reference/include/openvino/reference/grid_sample.hpp b/src/core/reference/include/openvino/reference/grid_sample.hpp index 88c071538cc1cc..6c765881e536db 100644 --- a/src/core/reference/include/openvino/reference/grid_sample.hpp +++ b/src/core/reference/include/openvino/reference/grid_sample.hpp @@ -12,6 +12,7 @@ #include "openvino/core/shape.hpp" #include "openvino/op/grid_sample.hpp" +#include "openvino/reference/rounding_guard.hpp" namespace ov { namespace reference { @@ -20,10 +21,20 @@ namespace { using index_4D_t = typename std::array; template -using denormalize_fn_t = typename std::function; +using denormalize_fn_t = GRID_ET (*)(GRID_ET, size_t); template -using get_padded_fn_t = typename std::function; +using get_padded_fn_t = DATA_ET (*)(const DATA_ET*, const Shape&, size_t, size_t, long, long); + +template +using interpolate_fn_t = DATA_ET (*)(const DATA_ET* data, + const Shape&, + const size_t n, + const size_t c, + const GRID_ET, + const GRID_ET, + const get_padded_fn_t&, + const denormalize_fn_t&); template T& get_single_value(T* buffer, const Shape& shape, const index_4D_t& index) { @@ -240,8 +251,7 @@ void grid_sample(DATA_ET* output, const auto W_out = grid_shape[2]; const Shape output_shape{N, C, H_out, W_out}; - const auto prev_rounding_mode = std::fegetround(); - std::fesetround(FE_TONEAREST); + const RoundingGuard rounding_guard{FE_TONEAREST}; get_padded_fn_t get_padded_fn; switch (padding_mode) { @@ -253,18 +263,25 @@ void grid_sample(DATA_ET* output, get_padded_fn = border_padding; break; case ov::op::v9::GridSample::PaddingMode::REFLECTION: - if (align_corners) - get_padded_fn = reflection_data_with_align; - else - get_padded_fn = reflection_data_no_align; + get_padded_fn = align_corners ? reflection_data_with_align : reflection_data_no_align; break; } - denormalize_fn_t denormalize_fn; - if (align_corners) - denormalize_fn = rescale_align; - else - denormalize_fn = rescale_noalign; + const auto denormalize_fn = align_corners ? rescale_align : rescale_noalign; + + interpolate_fn_t interpolate_fn; + switch (interpolation_mode) { + default: + case ov::op::v9::GridSample::InterpolationMode::BILINEAR: + interpolate_fn = bilinear; + break; + case ov::op::v9::GridSample::InterpolationMode::NEAREST: + interpolate_fn = nearest; + break; + case ov::op::v9::GridSample::InterpolationMode::BICUBIC: + interpolate_fn = bicubic; + break; + } for (size_t n = 0; n < N; ++n) { for (size_t c = 0; c < C; ++c) { @@ -274,24 +291,11 @@ void grid_sample(DATA_ET* output, const auto x_n = get_single_value(grid, grid_shape, index_4D_t{n, y, x, 0}); auto& out = get_single_value(output, output_shape, index_4D_t{n, c, y, x}); - - switch (interpolation_mode) { - case ov::op::v9::GridSample::InterpolationMode::BILINEAR: - out = bilinear(data, data_shape, n, c, y_n, x_n, get_padded_fn, denormalize_fn); - break; - case ov::op::v9::GridSample::InterpolationMode::NEAREST: - out = nearest(data, data_shape, n, c, y_n, x_n, get_padded_fn, denormalize_fn); - break; - case ov::op::v9::GridSample::InterpolationMode::BICUBIC: - out = bicubic(data, data_shape, n, c, y_n, x_n, get_padded_fn, denormalize_fn); - break; - } + out = interpolate_fn(data, data_shape, n, c, y_n, x_n, get_padded_fn, denormalize_fn); } } } } - - std::fesetround(prev_rounding_mode); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/grid_sample.cpp b/src/core/src/op/grid_sample.cpp index 000b38cfbdc363..65ffb365c3e618 100644 --- a/src/core/src/op/grid_sample.cpp +++ b/src/core/src/op/grid_sample.cpp @@ -4,19 +4,67 @@ #include "openvino/op/grid_sample.hpp" +#include "element_visitor.hpp" #include "grid_sample_shape_inference.hpp" #include "itt.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/reference/grid_sample.hpp" +#include "validation_util.hpp" namespace ov { -op::v9::GridSample::GridSample(const Output& data, const Output& grid, const Attributes& attributes) +namespace op { +namespace v9 { + +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(Tensor& output, + const Tensor& data, + const Tensor& grid, + const Shape& data_shape, + const Shape& grid_shape, + const GridSample::Attributes& attributes) { + using namespace ov::element; + return IfTypeOf::apply(grid.get_element_type(), + output.data(), + data.data(), + grid, + data_shape, + grid_shape, + attributes); + } + +private: + struct EvalByGridType : public element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(T* output, + const T* data, + const Tensor& grid, + const Shape& data_shape, + const Shape& grid_shape, + const GridSample::Attributes& attributes) { + reference::grid_sample(output, + data, + grid.data(), + data_shape, + grid_shape, + attributes.align_corners, + attributes.mode, + attributes.padding_mode); + return true; + } + }; +}; + +GridSample::GridSample(const Output& data, const Output& grid, const Attributes& attributes) : op::Op{{data, grid}}, m_attributes{attributes} { constructor_validate_and_infer_types(); } -bool op::v9::GridSample::visit_attributes(AttributeVisitor& visitor) { +bool GridSample::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v9_GridSample_visit_attributes); visitor.on_attribute("align_corners", m_attributes.align_corners); visitor.on_attribute("mode", m_attributes.mode); @@ -24,7 +72,7 @@ bool op::v9::GridSample::visit_attributes(AttributeVisitor& visitor) { return true; } -void op::v9::GridSample::validate_and_infer_types() { +void GridSample::validate_and_infer_types() { OV_OP_SCOPE(v9_GridSample_validate_and_infer_types); if (!get_input_element_type(1).is_dynamic()) { NODE_VALIDATION_CHECK(this, @@ -39,12 +87,36 @@ void op::v9::GridSample::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), out_shapes[0]); } -std::shared_ptr op::v9::GridSample::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr GridSample::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v9_GridSample_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), this->get_attributes()); + return std::make_shared(new_args.at(0), new_args.at(1), get_attributes()); +} + +bool GridSample::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v9_GridSample_evaluate); + + OPENVINO_ASSERT(outputs.size() == 1); + + const auto& out_shape = shape_infer(this, ov::util::get_tensors_partial_shapes(inputs)).front().to_shape(); + outputs[0].set_shape(out_shape); + + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + outputs[0], + inputs[0], + inputs[1], + inputs[0].get_shape(), + inputs[1].get_shape(), + m_attributes); } +bool GridSample::has_evaluate() const { + return get_input_element_type(0) == element::f32 && get_input_element_type(1) == element::f32; +} +} // namespace v9 +} // namespace op + std::ostream& operator<<(std::ostream& s, const op::v9::GridSample::InterpolationMode& mode) { return s << as_string(mode); } @@ -54,7 +126,7 @@ std::ostream& operator<<(std::ostream& s, const op::v9::GridSample::PaddingMode& } template <> -NGRAPH_API EnumNames& EnumNames::get() { +OPENVINO_API EnumNames& EnumNames::get() { static auto enum_names = EnumNames("op::v9::GridSample::InterpolationMode", {{"bilinear", op::v9::GridSample::InterpolationMode::BILINEAR}, @@ -64,7 +136,7 @@ NGRAPH_API EnumNames& EnumNames -NGRAPH_API EnumNames& EnumNames::get() { +OPENVINO_API EnumNames& EnumNames::get() { static auto enum_names = EnumNames("op::v9::GridSample::PaddingMode", {{"zeros", op::v9::GridSample::PaddingMode::ZEROS}, @@ -72,73 +144,4 @@ NGRAPH_API EnumNames& EnumNames -bool evaluate_exec(const ngraph::HostTensorPtr& output, - const ngraph::HostTensorPtr& data, - const ngraph::HostTensorPtr& grid, - const op::v9::GridSample::Attributes& attributes) { - ov::reference::grid_sample(output->get_data_ptr(), - data->get_data_ptr(), - grid->get_data_ptr(), - data->get_shape(), - grid->get_shape(), - attributes.align_corners, - attributes.mode, - attributes.padding_mode); - return true; -} - -#define GRID_SAMPLE_TYPE_CASE(a, ...) \ - case element::Type_t::a: { \ - OV_OP_SCOPE(OV_PP_CAT3(evaluate_exec_grid_sample, _, a)); \ - rc = evaluate_exec(__VA_ARGS__); \ - } break - -template -bool evaluate(const ngraph::HostTensorPtr& output, - const ngraph::HostTensorPtr& data, - const ngraph::HostTensorPtr& grid, - const op::v9::GridSample::Attributes& attributes) { - auto rc = true; - switch (grid->get_element_type()) { - GRID_SAMPLE_TYPE_CASE(f32, output, data, grid, attributes); - default: - rc = false; - break; - } - return rc; -} - -bool evaluate_grid_sample(const ngraph::HostTensorPtr& output, - const ngraph::HostTensorPtr& data, - const ngraph::HostTensorPtr& grid, - const op::v9::GridSample::Attributes& attributes) { - auto rc = true; - switch (output->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_grid_sample, f32, output, data, grid, attributes); - default: - rc = false; - break; - } - return rc; -} -} // namespace - -bool op::v9::GridSample::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v9_GridSample_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(ngraph::validate_host_tensor_vector(inputs, 2), "Invalid GridSample input TensorVector."); - OPENVINO_ASSERT(ngraph::validate_host_tensor_vector(outputs, 1), "Invalid GridSample output TensorVector."); - OPENVINO_SUPPRESS_DEPRECATED_END - - return evaluate_grid_sample(outputs[0], inputs[0], inputs[1], m_attributes); -} - -bool op::v9::GridSample::has_evaluate() const { - return get_input_element_type(0) == element::f32 && get_input_element_type(1) == element::f32; -} } // namespace ov From a554611644b09e2d488e8686cbbd7fef16c99253 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Mon, 6 Nov 2023 10:08:58 +0100 Subject: [PATCH 198/275] Updating notebooks (#20865) --- docs/articles_en/learn_openvino/tutorials.md | 517 +++-- docs/nbdoc/consts.py | 2 +- .../notebooks/001-hello-world-with-output.rst | 90 +- ...g => 001-hello-world-with-output_11_1.png} | 0 .../index.html | 6 +- .../002-openvino-api-with-output.rst | 128 +- .../003-hello-segmentation-with-output.rst | 94 +- ...3-hello-segmentation-with-output_11_2.png} | 0 .../index.html | 10 +- .../004-hello-detection-with-output.rst | 83 +- ... 004-hello-detection-with-output_11_1.png} | 0 .../index.html | 8 +- ...classification-to-openvino-with-output.rst | 159 +- ...fication-to-openvino-with-output_19_1.png} | 0 .../index.html | 6 +- ...2-pytorch-onnx-to-openvino-with-output.rst | 156 +- .../index.html | 10 +- .../102-pytorch-to-openvino-with-output.rst | 208 +- .../index.html | 32 +- ...to-openvino-classification-with-output.rst | 149 +- ...nvino-classification-with-output_16_3.png} | 0 ...nvino-classification-with-output_24_1.png} | 0 ...nvino-classification-with-output_28_1.png} | 0 ...nvino-classification-with-output_31_1.png} | 0 ...envino-classification-with-output_9_1.png} | 0 .../index.html | 14 +- .../notebooks/104-model-tools-with-output.rst | 114 +- ...105-language-quantize-bert-with-output.rst | 324 +-- .../notebooks/106-auto-device-with-output.rst | 202 +- .../106-auto-device-with-output_13_0.jpg | 3 - .../106-auto-device-with-output_13_0.png | 3 - ...g => 106-auto-device-with-output_14_1.jpg} | 0 ...g => 106-auto-device-with-output_14_1.png} | 0 .../106-auto-device-with-output_25_0.png | 3 - .../106-auto-device-with-output_26_0.png | 3 - .../106-auto-device-with-output_27_0.png | 4 +- .../106-auto-device-with-output_28_0.png | 3 + .../index.html | 12 +- ...tion-quantization-data2vec-with-output.rst | 686 ++++--- ...tion-quantization-wav2vec2-with-output.rst | 925 --------- docs/notebooks/108-gpu-device-with-output.rst | 330 ++-- .../109-latency-tricks-with-output.rst | 175 +- .../109-latency-tricks-with-output_30_0.png | 4 +- .../index.html | 20 +- .../109-throughput-tricks-with-output.rst | 172 +- ...109-throughput-tricks-with-output_33_0.png | 4 +- .../index.html | 21 +- ...110-ct-scan-live-inference-with-output.rst | 114 +- .../index.html | 6 +- ...segmentation-quantize-nncf-with-output.rst | 252 +-- ...ntation-quantize-nncf-with-output_37_1.png | 4 +- .../index.html | 10 +- ...ov5-quantization-migration-with-output.rst | 479 +++-- .../index.html | 8 +- ...training-quantization-nncf-with-output.rst | 267 +-- ...lassification-quantization-with-output.rst | 293 ++- .../index.html | 6 +- docs/notebooks/115-async-api-with-output.rst | 116 +- .../115-async-api-with-output_21_0.png | 4 +- .../index.html | 12 +- .../116-sparsity-optimization-with-output.rst | 226 ++- .../117-model-server-with-output.rst | 125 +- ... => 117-model-server-with-output_21_1.png} | 0 ... => 117-model-server-with-output_26_1.png} | 0 .../index.html | 8 +- ...118-optimize-preprocessing-with-output.rst | 210 +- .../index.html | 6 +- .../119-tflite-to-openvino-with-output.rst | 142 +- .../index.html | 8 +- ...e-segmentation-to-openvino-with-output.rst | 664 +++++++ ...mentation-to-openvino-with-output_25_1.png | 3 + ...mentation-to-openvino-with-output_39_0.png | 3 + .../index.html | 8 + ...ject-detection-to-openvino-with-output.rst | 233 +-- ...detection-to-openvino-with-output_38_0.png | 4 +- .../index.html | 8 +- .../121-convert-to-openvino-with-output.rst | 333 ++-- ...tion-quantization-wav2vec2-with-output.rst | 1733 +---------------- ...tion-with-accuracy-control-with-output.rst | 779 ++------ ...123-detectron2-to-openvino-with-output.rst | 433 ++++ ...etectron2-to-openvino-with-output_22_0.jpg | 3 + ...etectron2-to-openvino-with-output_22_0.png | 3 + ...etectron2-to-openvino-with-output_32_0.jpg | 3 + ...etectron2-to-openvino-with-output_32_0.png | 3 + ...detectron2-to-openvino-with-output_8_0.jpg | 3 + ...detectron2-to-openvino-with-output_8_0.png | 3 + .../index.html | 12 + .../124-hugging-face-hub-with-output.rst | 405 ++++ ...25-convnext-classification-with-output.rst | 268 +++ ...onvnext-classification-with-output_4_0.jpg | 3 + ...onvnext-classification-with-output_4_0.png | 3 + .../index.html | 8 + .../126-tensorflow-hub-with-output.rst | 447 +++++ .../126-tensorflow-hub-with-output_11_0.jpg | 3 + .../126-tensorflow-hub-with-output_11_0.png | 3 + .../126-tensorflow-hub-with-output_26_0.png | 3 + .../126-tensorflow-hub-with-output_45_0.png | 3 + .../index.html | 10 + .../201-vision-monodepth-with-output.rst | 119 +- .../index.html | 6 +- ...sion-superresolution-image-with-output.rst | 155 +- .../index.html | 12 +- ...sion-superresolution-video-with-output.rst | 104 +- .../203-meter-reader-with-output.rst | 88 +- .../index.html | 14 +- ...nter-semantic-segmentation-with-output.rst | 262 +-- ...emantic-segmentation-with-output_32_0.jpg} | 0 ...emantic-segmentation-with-output_32_0.png} | 0 .../index.html | 12 +- ...-vision-background-removal-with-output.rst | 131 +- .../index.html | 8 +- ...206-vision-paddlegan-anime-with-output.rst | 136 +- ...ision-paddlegan-anime-with-output_37_0.png | 4 +- .../index.html | 8 +- ...-paddlegan-superresolution-with-output.rst | 176 +- ...egan-superresolution-with-output_26_1.png} | 0 ...legan-superresolution-with-output_29_1.png | 3 - ...legan-superresolution-with-output_30_1.png | 3 + ...legan-superresolution-with-output_31_0.png | 3 - ...legan-superresolution-with-output_32_0.png | 3 + .../index.html | 10 +- ...ical-character-recognition-with-output.rst | 135 +- .../index.html | 32 +- .../209-handwritten-ocr-with-output.rst | 160 +- ... 209-handwritten-ocr-with-output_32_1.png} | 0 .../index.html | 8 +- ...slowfast-video-recognition-with-output.rst | 273 +-- .../211-speech-to-text-with-output.rst | 214 +- .../211-speech-to-text-with-output_20_0.png | 3 + .../211-speech-to-text-with-output_20_2.png | 3 + .../211-speech-to-text-with-output_21_1.png | 3 - .../211-speech-to-text-with-output_21_3.png | 3 - .../211-speech-to-text-with-output_28_0.png | 3 + .../211-speech-to-text-with-output_28_1.png | 3 + .../211-speech-to-text-with-output_29_0.png | 3 - .../211-speech-to-text-with-output_29_1.png | 3 - .../index.html | 12 +- ...annote-speaker-diarization-with-output.rst | 109 +- ...e-speaker-diarization-with-output_27_0.png | 4 +- .../index.html | 10 +- .../213-question-answering-with-output.rst | 211 +- .../214-grammar-correction-with-output.rst | 391 +++- .../215-image-inpainting-with-output.rst | 79 +- ...215-image-inpainting-with-output_15_0.png} | 0 ...215-image-inpainting-with-output_17_0.png} | 0 ...215-image-inpainting-with-output_19_0.png} | 0 ...215-image-inpainting-with-output_23_0.png} | 0 .../index.html | 12 +- .../216-attention-center-with-output.rst | 309 --- .../216-attention-center-with-output_14_1.png | 3 - .../216-attention-center-with-output_16_1.png | 3 - .../index.html | 8 - .../217-vision-deblur-with-output.rst | 125 +- ...=> 217-vision-deblur-with-output_25_0.png} | 0 .../217-vision-deblur-with-output_27_0.png | 3 - .../217-vision-deblur-with-output_28_0.png | 3 + .../217-vision-deblur-with-output_29_0.png | 3 - .../217-vision-deblur-with-output_30_0.png | 3 + .../index.html | 10 +- ...-detection-and-recognition-with-output.rst | 170 +- ...tion-and-recognition-with-output_14_0.png} | 0 ...tion-and-recognition-with-output_21_0.png} | 0 ...tion-and-recognition-with-output_27_0.png} | 0 .../index.html | 10 +- ...219-knowledge-graphs-conve-with-output.rst | 276 ++- ...ss-lingual-books-alignment-with-output.rst | 236 +-- ...ngual-books-alignment-with-output_31_0.png | 2 +- ...ngual-books-alignment-with-output_48_0.png | 4 +- .../index.html | 8 +- .../221-machine-translation-with-output.rst | 159 +- ...-vision-image-colorization-with-output.rst | 111 +- ...on-image-colorization-with-output_20_0.png | 3 - ...on-image-colorization-with-output_21_0.png | 4 +- ...on-image-colorization-with-output_22_0.png | 3 + .../index.html | 8 +- .../223-text-prediction-with-output.rst | 679 ------- ...-segmentation-point-clouds-with-output.rst | 96 +- ...ntation-point-clouds-with-output_11_1.png} | 0 ...ntation-point-clouds-with-output_16_0.png} | 0 .../index.html | 8 +- ...le-diffusion-text-to-image-with-output.rst | 233 +-- ...fusion-text-to-image-with-output_38_1.png} | 0 ...fusion-text-to-image-with-output_40_1.png} | 0 .../index.html | 10 +- .../226-yolov7-optimization-with-output.rst | 331 ++-- ...6-yolov7-optimization-with-output_10_0.jpg | 3 + ...6-yolov7-optimization-with-output_10_0.png | 3 + ...6-yolov7-optimization-with-output_26_0.jpg | 3 - ...6-yolov7-optimization-with-output_26_0.png | 3 - ...6-yolov7-optimization-with-output_27_0.jpg | 3 + ...6-yolov7-optimization-with-output_27_0.png | 3 + ...6-yolov7-optimization-with-output_43_0.jpg | 3 - ...6-yolov7-optimization-with-output_43_0.png | 3 - ...6-yolov7-optimization-with-output_44_0.jpg | 3 + ...6-yolov7-optimization-with-output_44_0.png | 3 + ...26-yolov7-optimization-with-output_9_0.jpg | 3 - ...26-yolov7-optimization-with-output_9_0.png | 3 - .../index.html | 16 +- .../227-whisper-convert-with-output.rst | 537 +++++ .../227-whisper-nncf-quantize-with-output.rst | 611 ++++++ ...isper-subtitles-generation-with-output.rst | 669 ------- ...228-clip-zero-shot-convert-with-output.rst | 294 ++- ...lip-zero-shot-convert-with-output_12_0.png | 2 +- ...lip-zero-shot-convert-with-output_17_0.png | 3 - ...clip-zero-shot-convert-with-output_4_0.png | 3 - ...clip-zero-shot-convert-with-output_5_0.png | 3 + .../index.html | 9 +- ...28-clip-zero-shot-quantize-with-output.rst | 145 +- ...ip-zero-shot-quantize-with-output_16_0.png | 4 +- .../index.html | 6 +- ...rt-sequence-classification-with-output.rst | 152 +- ...lov8-instance-segmentation-with-output.rst | 1393 +++++++++++++ ...instance-segmentation-with-output_11_1.jpg | 3 + ...instance-segmentation-with-output_11_1.png | 3 + ...instance-segmentation-with-output_22_0.jpg | 3 + ...instance-segmentation-with-output_22_0.png | 3 + ...instance-segmentation-with-output_44_0.jpg | 3 + ...instance-segmentation-with-output_44_0.png | 3 + ...instance-segmentation-with-output_60_0.png | 3 + .../index.html | 13 + ...-yolov8-keypoint-detection-with-output.rst | 1361 +++++++++++++ ...v8-keypoint-detection-with-output_11_1.jpg | 3 + ...v8-keypoint-detection-with-output_11_1.png | 3 + ...v8-keypoint-detection-with-output_22_0.jpg | 3 + ...v8-keypoint-detection-with-output_22_0.png | 3 + ...v8-keypoint-detection-with-output_46_0.jpg | 3 + ...v8-keypoint-detection-with-output_46_0.png | 3 + ...v8-keypoint-detection-with-output_62_0.png | 3 + .../index.html | 13 + ...0-yolov8-object-detection-with-output.rst} | 1078 +++------- ...lov8-object-detection-with-output_11_1.jpg | 3 + ...lov8-object-detection-with-output_11_1.png | 3 + ...lov8-object-detection-with-output_22_0.jpg | 3 + ...lov8-object-detection-with-output_22_0.png | 3 + ...lov8-object-detection-with-output_45_0.jpg | 3 + ...lov8-object-detection-with-output_45_0.png | 3 + ...lov8-object-detection-with-output_68_0.jpg | 3 + ...lov8-object-detection-with-output_68_0.png | 3 + ...lov8-object-detection-with-output_74_0.png | 3 + .../index.html | 15 + ...0-yolov8-optimization-with-output_13_1.png | 3 - ...0-yolov8-optimization-with-output_15_1.png | 3 - ...0-yolov8-optimization-with-output_27_0.png | 3 - ...0-yolov8-optimization-with-output_29_0.png | 3 - ...0-yolov8-optimization-with-output_59_0.png | 3 - ...0-yolov8-optimization-with-output_61_0.png | 3 - ...0-yolov8-optimization-with-output_91_0.png | 3 - ...0-yolov8-optimization-with-output_97_0.png | 3 - ...0-yolov8-optimization-with-output_99_0.png | 3 - .../index.html | 15 - ...ruct-pix2pix-image-editing-with-output.rst | 815 +++++--- ...pix2pix-image-editing-with-output_24_0.png | 3 + ...pix2pix-image-editing-with-output_25_0.png | 3 - ...pix2pix-image-editing-with-output_36_2.png | 3 + .../index.html | 7 +- ...clip-language-saliency-map-with-output.rst | 445 +++-- ...language-saliency-map-with-output_15_0.png | 4 +- ...language-saliency-map-with-output_17_0.png | 4 +- ...language-saliency-map-with-output_19_1.png | 4 +- ...language-saliency-map-with-output_29_1.png | 3 + ...language-saliency-map-with-output_31_1.png | 3 - ...language-saliency-map-with-output_35_1.png | 3 + ...language-saliency-map-with-output_37_1.png | 3 - .../index.html | 11 + .../233-blip-convert-with-output.rst | 680 +++++++ .../233-blip-convert-with-output_25_0.png | 3 + .../233-blip-convert-with-output_27_0.png | 3 + .../233-blip-convert-with-output_7_0.png | 3 + .../index.html | 9 + .../233-blip-optimize-with-output.rst | 447 +++++ .../233-blip-optimize-with-output_23_0.png | 3 + .../233-blip-optimize-with-output_25_0.png} | 0 .../index.html | 8 + ...visual-language-processing-with-output.rst | 943 --------- ...l-language-processing-with-output_28_0.png | 3 - ...al-language-processing-with-output_8_0.png | 3 - .../index.html | 9 - ...-encodec-audio-compression-with-output.rst | 149 +- ...dec-audio-compression-with-output_38_1.png | 4 +- .../index.html | 10 +- ...ontrolnet-stable-diffusion-with-output.rst | 419 ++-- ...lnet-stable-diffusion-with-output_17_0.png | 4 +- ...olnet-stable-diffusion-with-output_8_0.png | 4 +- .../index.html | 8 +- ...diffusion-v2-infinite-zoom-with-output.rst | 482 ++--- ...v2-optimum-demo-comparison-with-output.rst | 80 +- ...timum-demo-comparison-with-output_13_1.png | 3 - .../index.html | 9 +- ...-diffusion-v2-optimum-demo-with-output.rst | 66 +- .../index.html | 6 +- ...sion-v2-text-to-image-demo-with-output.rst | 163 +- ...v2-text-to-image-demo-with-output_25_0.jpg | 3 + ...v2-text-to-image-demo-with-output_25_0.png | 4 +- .../index.html | 7 +- ...diffusion-v2-text-to-image-with-output.rst | 354 ++-- .../237-segment-anything-with-output.rst | 861 ++------ .../237-segment-anything-with-output_21_0.png | 2 +- .../237-segment-anything-with-output_28_0.png | 2 +- .../237-segment-anything-with-output_35_0.png | 2 +- .../237-segment-anything-with-output_39_0.png | 2 +- .../237-segment-anything-with-output_44_0.png | 4 +- .../237-segment-anything-with-output_48_0.png | 2 +- .../237-segment-anything-with-output_53_0.png | 4 +- .../237-segment-anything-with-output_68_1.jpg | 4 +- .../237-segment-anything-with-output_68_1.png | 4 +- .../237-segment-anything-with-output_80_0.png | 4 +- .../237-segment-anything-with-output_82_1.jpg | 4 +- .../237-segment-anything-with-output_82_1.png | 4 +- .../index.html | 28 +- ...238-deep-floyd-if-optimize-with-output.rst | 816 ++++++++ ...eep-floyd-if-optimize-with-output_23_6.jpg | 3 + ...eep-floyd-if-optimize-with-output_23_6.png | 3 + ...eep-floyd-if-optimize-with-output_24_5.jpg | 3 + ...eep-floyd-if-optimize-with-output_24_5.png | 3 + ...eep-floyd-if-optimize-with-output_25_1.jpg | 3 + ...eep-floyd-if-optimize-with-output_25_1.png | 3 + .../index.html | 12 + .../238-deep-floyd-if-with-output.rst | 884 --------- .../238-deep-floyd-if-with-output_29_3.png | 3 - .../238-deep-floyd-if-with-output_31_3.png | 3 - .../238-deep-floyd-if-with-output_41_0.png | 3 - .../index.html | 9 - .../239-image-bind-convert-with-output.rst | 200 +- ...39-image-bind-convert-with-output_20_0.png | 4 +- ...39-image-bind-convert-with-output_22_0.png | 2 +- ...39-image-bind-convert-with-output_24_0.png | 4 +- .../index.html | 22 +- ...ly-2-instruction-following-with-output.rst | 270 +-- ...41-riffusion-text-to-music-with-output.rst | 285 +-- ...ffusion-text-to-music-with-output_14_0.jpg | 3 + ...ffusion-text-to-music-with-output_14_0.png | 3 + ...ffusion-text-to-music-with-output_15_0.jpg | 3 - ...ffusion-text-to-music-with-output_15_0.png | 3 - .../index.html | 8 +- ...42-freevc-voice-conversion-with-output.rst | 178 +- ...tflite-selfie-segmentation-with-output.rst | 130 +- ...e-selfie-segmentation-with-output_25_0.png | 4 +- ...e-selfie-segmentation-with-output_33_0.png | 4 +- .../index.html | 8 +- ...4-named-entity-recognition-with-output.rst | 254 +-- .../245-typo-detector-with-output.rst | 225 +-- ...6-depth-estimation-videpth-with-output.rst | 228 ++- .../index.html | 8 +- .../247-code-language-id-with-output.rst | 278 +-- .../248-stable-diffusion-xl-with-output.rst | 171 +- ...8-stable-diffusion-xl-with-output_10_3.jpg | 4 +- ...8-stable-diffusion-xl-with-output_10_3.png | 4 +- ...8-stable-diffusion-xl-with-output_18_3.jpg | 4 +- ...8-stable-diffusion-xl-with-output_18_3.png | 4 +- ...8-stable-diffusion-xl-with-output_29_2.jpg | 3 + ...8-stable-diffusion-xl-with-output_29_2.png | 3 + ...8-stable-diffusion-xl-with-output_29_3.jpg | 3 - ...8-stable-diffusion-xl-with-output_29_3.png | 3 - .../index.html | 16 +- ...249-oneformer-segmentation-with-output.rst | 467 ++++- ...eformer-segmentation-with-output_23_1.jpg} | 0 ...eformer-segmentation-with-output_23_1.png} | 0 ...neformer-segmentation-with-output_26_0.jpg | 3 - ...neformer-segmentation-with-output_26_0.png | 3 - ...neformer-segmentation-with-output_27_0.jpg | 3 + ...neformer-segmentation-with-output_27_0.png | 3 + ...neformer-segmentation-with-output_39_1.jpg | 3 + ...neformer-segmentation-with-output_39_1.png | 3 + ...neformer-segmentation-with-output_39_3.jpg | 3 + ...neformer-segmentation-with-output_39_3.png | 3 + .../index.html | 16 +- .../250-music-generation-with-output.rst | 177 +- ...1-tiny-sd-image-generation-with-output.rst | 266 ++- ...y-sd-image-generation-with-output_33_1.jpg | 3 + ...y-sd-image-generation-with-output_33_1.png | 3 + ...y-sd-image-generation-with-output_37_1.jpg | 3 + ...y-sd-image-generation-with-output_37_1.png | 3 + ...y-sd-image-generation-with-output_39_1.jpg | 3 + ...-sd-image-generation-with-output_39_1.png} | 0 .../251-tiny-sd-image-generation_33_1.png | 3 - .../251-tiny-sd-image-generation_37_1.png | 3 - .../index.html | 12 + ...tcomposer-image-generation-with-output.rst | 160 +- .../multi-subject.png | 3 - .../253-zeroscope-text2video-with-output.rst | 192 +- ...zeroscope-text2video-with-output_01_02.png | 3 - ...zeroscope-text2video-with-output_01_03.gif | 3 - .../notebooks/254-llm-chatbot-with-output.rst | 1040 ++++++++++ ...sively-multilingual-speech-with-output.rst | 966 +++++++++ .../256-bark-text-to-audio-with-output.rst | 1111 +++++++++++ ...7-llava-multimodal-chatbot-with-output.rst | 1206 ++++++++++++ ...va-multimodal-chatbot-with-output_19_1.jpg | 3 + ...va-multimodal-chatbot-with-output_19_1.png | 3 + .../index.html | 8 + ...ffusion-subject-generation-with-output.rst | 1429 ++++++++++++++ .../1c472f1f-1fce-4a13-9d44-b10f6f760ddb.png | 3 + ...on-subject-generation-with-output_12_0.png | 3 + ...on-subject-generation-with-output_16_0.png | 3 + ...on-subject-generation-with-output_19_0.png | 3 + ...on-subject-generation-with-output_52_0.png | 3 + ...on-subject-generation-with-output_55_0.png | 3 + ...on-subject-generation-with-output_58_0.png | 3 + .../index.html | 12 + ...diffusion-image-generation-with-output.rst | 1173 +++++++++++ ...sion-image-generation-with-output_26_1.jpg | 3 + ...sion-image-generation-with-output_26_1.png | 3 + ...sion-image-generation-with-output_28_1.jpg | 3 + ...sion-image-generation-with-output_28_1.png | 3 + ...sion-image-generation-with-output_30_1.jpg | 3 + ...sion-image-generation-with-output_30_1.png | 3 + .../index.html | 12 + .../260-pix2struct-docvqa-with-output.rst | 319 +++ ...260-pix2struct-docvqa-with-output_11_0.jpg | 3 + ...260-pix2struct-docvqa-with-output_11_0.png | 3 + .../index.html | 8 + .../261-fast-segment-anything-with-output.rst | 598 ++++++ ...fast-segment-anything-with-output_21_0.jpg | 3 + ...fast-segment-anything-with-output_21_0.png | 3 + ...-fast-segment-anything-with-output_9_0.jpg | 3 + ...-fast-segment-anything-with-output_9_0.png | 3 + .../index.html | 10 + ...62-softvc-voice-conversion-with-output.rst | 269 +++ ...cy-models-image-generation-with-output.rst | 939 +++++++++ ...dels-image-generation-with-output_21_0.jpg | 3 + ...dels-image-generation-with-output_21_0.png | 3 + .../index.html | 8 + ...low-training-openvino-nncf-with-output.rst | 377 ++-- ...aining-openvino-nncf-with-output_26_1.png} | 0 ...aining-openvino-nncf-with-output_2_13.png} | 0 ...raining-openvino-nncf-with-output_2_15.png | 3 - ...raining-openvino-nncf-with-output_2_4.png} | 0 ...training-openvino-nncf-with-output_2_6.png | 3 + ...training-openvino-nncf-with-output_2_8.png | 3 + ...training-openvino-nncf-with-output_2_9.png | 3 - .../index.html | 11 + ...nsorflow-training-openvino-with-output.rst | 434 ++--- ...low-training-openvino-with-output_13_0.jpg | 3 - ...low-training-openvino-with-output_13_0.png | 3 - ...low-training-openvino-with-output_14_0.jpg | 4 +- ...low-training-openvino-with-output_14_0.png | 4 +- ...low-training-openvino-with-output_15_0.jpg | 3 + ...low-training-openvino-with-output_15_0.png | 3 + ...low-training-openvino-with-output_16_0.jpg | 3 - ...low-training-openvino-with-output_16_0.png | 3 - ...low-training-openvino-with-output_17_0.jpg | 4 +- ...low-training-openvino-with-output_17_0.png | 4 +- ...low-training-openvino-with-output_18_0.jpg | 3 + ...low-training-openvino-with-output_18_0.png | 3 + ...ow-training-openvino-with-output_29_0.png} | 0 ...low-training-openvino-with-output_56_1.png | 3 - ...low-training-openvino-with-output_57_0.png | 3 + ...low-training-openvino-with-output_65_0.png | 3 - ...low-training-openvino-with-output_66_0.png | 3 + ...ow-training-openvino-with-output_79_1.png} | 0 .../index.html | 28 +- ...uantization-aware-training-with-output.rst | 283 ++- ...uantization-aware-training-with-output.rst | 224 +-- .../index.html | 6 +- .../401-object-detection-with-output.rst | 166 +- .../401-object-detection-with-output_21_0.png | 4 +- .../index.html | 6 +- .../402-pose-estimation-with-output.rst | 116 +- .../402-pose-estimation-with-output_21_0.png | 3 - .../402-pose-estimation-with-output_22_0.png | 3 + .../index.html | 6 +- ...-action-recognition-webcam-with-output.rst | 174 +- ...on-recognition-webcam-with-output_21_0.png | 3 - ...on-recognition-webcam-with-output_22_0.png | 3 + .../index.html | 6 +- .../404-style-transfer-with-output.rst | 149 +- .../index.html | 6 +- .../405-paddle-ocr-webcam-with-output.rst | 138 +- ...405-paddle-ocr-webcam-with-output_32_0.png | 4 +- .../index.html | 6 +- .../406-3D-pose-estimation-with-output.rst | 242 +-- .../407-person-tracking-with-output.rst | 180 +- .../407-person-tracking-with-output_27_0.png | 4 +- .../index.html | 8 +- docs/notebooks/index.html | 365 ++-- docs/notebooks/notebook_utils-with-output.rst | 26 +- .../index.html | 18 +- .../notebook_utils-with-output_26_0.png | 4 +- docs/notebooks/notebooks_tags.json | 121 +- .../notebooks_with_binder_buttons.txt | 6 + .../notebooks_with_colab_buttons.txt | 29 +- 480 files changed, 30170 insertions(+), 18377 deletions(-) rename docs/notebooks/001-hello-world-with-output_files/{001-hello-world-with-output_11_0.png => 001-hello-world-with-output_11_1.png} (100%) rename docs/notebooks/003-hello-segmentation-with-output_files/{003-hello-segmentation-with-output_11_1.png => 003-hello-segmentation-with-output_11_2.png} (100%) rename docs/notebooks/004-hello-detection-with-output_files/{004-hello-detection-with-output_11_0.png => 004-hello-detection-with-output_11_1.png} (100%) rename docs/notebooks/101-tensorflow-classification-to-openvino-with-output_files/{101-tensorflow-classification-to-openvino-with-output_19_0.png => 101-tensorflow-classification-to-openvino-with-output_19_1.png} (100%) rename docs/notebooks/103-paddle-to-openvino-classification-with-output_files/{103-paddle-to-openvino-classification-with-output_15_3.png => 103-paddle-to-openvino-classification-with-output_16_3.png} (100%) rename docs/notebooks/103-paddle-to-openvino-classification-with-output_files/{103-paddle-to-openvino-classification-with-output_23_1.png => 103-paddle-to-openvino-classification-with-output_24_1.png} (100%) rename docs/notebooks/103-paddle-to-openvino-classification-with-output_files/{103-paddle-to-openvino-classification-with-output_27_1.png => 103-paddle-to-openvino-classification-with-output_28_1.png} (100%) rename docs/notebooks/103-paddle-to-openvino-classification-with-output_files/{103-paddle-to-openvino-classification-with-output_30_1.png => 103-paddle-to-openvino-classification-with-output_31_1.png} (100%) rename docs/notebooks/103-paddle-to-openvino-classification-with-output_files/{103-paddle-to-openvino-classification-with-output_8_1.png => 103-paddle-to-openvino-classification-with-output_9_1.png} (100%) delete mode 100644 docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.jpg delete mode 100644 docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.png rename docs/notebooks/106-auto-device-with-output_files/{106-auto-device-with-output_12_0.jpg => 106-auto-device-with-output_14_1.jpg} (100%) rename docs/notebooks/106-auto-device-with-output_files/{106-auto-device-with-output_12_0.png => 106-auto-device-with-output_14_1.png} (100%) delete mode 100644 docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_25_0.png delete mode 100644 docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_26_0.png create mode 100644 docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_28_0.png delete mode 100644 docs/notebooks/107-speech-recognition-quantization-wav2vec2-with-output.rst rename docs/notebooks/117-model-server-with-output_files/{117-model-server-with-output_20_1.png => 117-model-server-with-output_21_1.png} (100%) rename docs/notebooks/117-model-server-with-output_files/{117-model-server-with-output_25_1.png => 117-model-server-with-output_26_1.png} (100%) create mode 100644 docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output.rst create mode 100644 docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_25_1.png create mode 100644 docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_39_0.png create mode 100644 docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/index.html create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output.rst create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.jpg create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.png create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.jpg create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.png create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.jpg create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.png create mode 100644 docs/notebooks/123-detectron2-to-openvino-with-output_files/index.html create mode 100644 docs/notebooks/124-hugging-face-hub-with-output.rst create mode 100644 docs/notebooks/125-convnext-classification-with-output.rst create mode 100644 docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.jpg create mode 100644 docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.png create mode 100644 docs/notebooks/125-convnext-classification-with-output_files/index.html create mode 100644 docs/notebooks/126-tensorflow-hub-with-output.rst create mode 100644 docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.jpg create mode 100644 docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.png create mode 100644 docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_26_0.png create mode 100644 docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_45_0.png create mode 100644 docs/notebooks/126-tensorflow-hub-with-output_files/index.html rename docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/{204-segmenter-semantic-segmentation-with-output_34_0.jpg => 204-segmenter-semantic-segmentation-with-output_32_0.jpg} (100%) rename docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/{204-segmenter-semantic-segmentation-with-output_34_0.png => 204-segmenter-semantic-segmentation-with-output_32_0.png} (100%) rename docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/{207-vision-paddlegan-superresolution-with-output_25_1.png => 207-vision-paddlegan-superresolution-with-output_26_1.png} (100%) delete mode 100644 docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_29_1.png create mode 100644 docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_30_1.png delete mode 100644 docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_31_0.png create mode 100644 docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_32_0.png rename docs/notebooks/209-handwritten-ocr-with-output_files/{209-handwritten-ocr-with-output_31_1.png => 209-handwritten-ocr-with-output_32_1.png} (100%) create mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_0.png create mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_2.png delete mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_1.png delete mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_3.png create mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_0.png create mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_1.png delete mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_0.png delete mode 100644 docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_1.png rename docs/notebooks/215-image-inpainting-with-output_files/{215-image-inpainting-with-output_14_0.png => 215-image-inpainting-with-output_15_0.png} (100%) rename docs/notebooks/215-image-inpainting-with-output_files/{215-image-inpainting-with-output_16_0.png => 215-image-inpainting-with-output_17_0.png} (100%) rename docs/notebooks/215-image-inpainting-with-output_files/{215-image-inpainting-with-output_18_0.png => 215-image-inpainting-with-output_19_0.png} (100%) rename docs/notebooks/215-image-inpainting-with-output_files/{215-image-inpainting-with-output_22_0.png => 215-image-inpainting-with-output_23_0.png} (100%) delete mode 100644 docs/notebooks/216-attention-center-with-output.rst delete mode 100644 docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_14_1.png delete mode 100644 docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_16_1.png delete mode 100644 docs/notebooks/216-attention-center-with-output_files/index.html rename docs/notebooks/217-vision-deblur-with-output_files/{217-vision-deblur-with-output_24_0.png => 217-vision-deblur-with-output_25_0.png} (100%) delete mode 100644 docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_27_0.png create mode 100644 docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_28_0.png delete mode 100644 docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_29_0.png create mode 100644 docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_30_0.png rename docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/{218-vehicle-detection-and-recognition-with-output_13_0.png => 218-vehicle-detection-and-recognition-with-output_14_0.png} (100%) rename docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/{218-vehicle-detection-and-recognition-with-output_20_0.png => 218-vehicle-detection-and-recognition-with-output_21_0.png} (100%) rename docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/{218-vehicle-detection-and-recognition-with-output_26_0.png => 218-vehicle-detection-and-recognition-with-output_27_0.png} (100%) delete mode 100644 docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_20_0.png create mode 100644 docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_22_0.png delete mode 100644 docs/notebooks/223-text-prediction-with-output.rst rename docs/notebooks/224-3D-segmentation-point-clouds-with-output_files/{224-3D-segmentation-point-clouds-with-output_10_0.png => 224-3D-segmentation-point-clouds-with-output_11_1.png} (100%) rename docs/notebooks/224-3D-segmentation-point-clouds-with-output_files/{224-3D-segmentation-point-clouds-with-output_15_0.png => 224-3D-segmentation-point-clouds-with-output_16_0.png} (100%) rename docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/{225-stable-diffusion-text-to-image-with-output_37_1.png => 225-stable-diffusion-text-to-image-with-output_38_1.png} (100%) rename docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/{225-stable-diffusion-text-to-image-with-output_39_1.png => 225-stable-diffusion-text-to-image-with-output_40_1.png} (100%) create mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.jpg create mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.png delete mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.jpg delete mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.png create mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.jpg create mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.png delete mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.jpg delete mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.png create mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.jpg create mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.png delete mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.jpg delete mode 100644 docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.png create mode 100644 docs/notebooks/227-whisper-convert-with-output.rst create mode 100644 docs/notebooks/227-whisper-nncf-quantize-with-output.rst delete mode 100644 docs/notebooks/227-whisper-subtitles-generation-with-output.rst delete mode 100644 docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_17_0.png delete mode 100644 docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_4_0.png create mode 100644 docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_5_0.png create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output.rst create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.jpg create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.png create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.jpg create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.png create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.jpg create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.png create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_60_0.png create mode 100644 docs/notebooks/230-yolov8-instance-segmentation-with-output_files/index.html create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output.rst create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.jpg create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.png create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.jpg create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.png create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.jpg create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.png create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_62_0.png create mode 100644 docs/notebooks/230-yolov8-keypoint-detection-with-output_files/index.html rename docs/notebooks/{230-yolov8-optimization-with-output.rst => 230-yolov8-object-detection-with-output.rst} (55%) create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.jpg create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.png create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.jpg create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.png create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.jpg create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.png create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.jpg create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.png create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_74_0.png create mode 100644 docs/notebooks/230-yolov8-object-detection-with-output_files/index.html delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_13_1.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_15_1.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_27_0.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_29_0.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_59_0.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_61_0.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_91_0.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_97_0.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_99_0.png delete mode 100644 docs/notebooks/230-yolov8-optimization-with-output_files/index.html create mode 100644 docs/notebooks/231-instruct-pix2pix-image-editing-with-output_files/231-instruct-pix2pix-image-editing-with-output_24_0.png delete mode 100644 docs/notebooks/231-instruct-pix2pix-image-editing-with-output_files/231-instruct-pix2pix-image-editing-with-output_25_0.png create mode 100644 docs/notebooks/231-instruct-pix2pix-image-editing-with-output_files/231-instruct-pix2pix-image-editing-with-output_36_2.png create mode 100644 docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_29_1.png delete mode 100644 docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_31_1.png create mode 100644 docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_35_1.png delete mode 100644 docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_37_1.png create mode 100644 docs/notebooks/232-clip-language-saliency-map-with-output_files/index.html create mode 100644 docs/notebooks/233-blip-convert-with-output.rst create mode 100644 docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_25_0.png create mode 100644 docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_27_0.png create mode 100644 docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_7_0.png create mode 100644 docs/notebooks/233-blip-convert-with-output_files/index.html create mode 100644 docs/notebooks/233-blip-optimize-with-output.rst create mode 100644 docs/notebooks/233-blip-optimize-with-output_files/233-blip-optimize-with-output_23_0.png rename docs/notebooks/{233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_30_0.png => 233-blip-optimize-with-output_files/233-blip-optimize-with-output_25_0.png} (100%) create mode 100644 docs/notebooks/233-blip-optimize-with-output_files/index.html delete mode 100644 docs/notebooks/233-blip-visual-language-processing-with-output.rst delete mode 100644 docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_28_0.png delete mode 100644 docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_8_0.png delete mode 100644 docs/notebooks/233-blip-visual-language-processing-with-output_files/index.html delete mode 100644 docs/notebooks/236-stable-diffusion-v2-optimum-demo-comparison-with-output_files/236-stable-diffusion-v2-optimum-demo-comparison-with-output_13_1.png create mode 100644 docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.jpg create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output.rst create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.jpg create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.png create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.jpg create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.png create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.jpg create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.png create mode 100644 docs/notebooks/238-deep-floyd-if-optimize-with-output_files/index.html delete mode 100644 docs/notebooks/238-deep-floyd-if-with-output.rst delete mode 100644 docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_29_3.png delete mode 100644 docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_31_3.png delete mode 100644 docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_41_0.png delete mode 100644 docs/notebooks/238-deep-floyd-if-with-output_files/index.html create mode 100644 docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.jpg create mode 100644 docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.png delete mode 100644 docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.jpg delete mode 100644 docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.png create mode 100644 docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.jpg create mode 100644 docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.png delete mode 100644 docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.jpg delete mode 100644 docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.png rename docs/notebooks/249-oneformer-segmentation-with-output_files/{249-oneformer-segmentation-with-output_22_1.jpg => 249-oneformer-segmentation-with-output_23_1.jpg} (100%) rename docs/notebooks/249-oneformer-segmentation-with-output_files/{249-oneformer-segmentation-with-output_22_1.png => 249-oneformer-segmentation-with-output_23_1.png} (100%) delete mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.jpg delete mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.png create mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.jpg create mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.png create mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.jpg create mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.png create mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.jpg create mode 100644 docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.png create mode 100644 docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.jpg create mode 100644 docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.png create mode 100644 docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.jpg create mode 100644 docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.png create mode 100644 docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_39_1.jpg rename docs/notebooks/251-tiny-sd-image-generation-with-output_files/{251-tiny-sd-image-generation_39_1.png => 251-tiny-sd-image-generation-with-output_39_1.png} (100%) delete mode 100644 docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_33_1.png delete mode 100644 docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_37_1.png delete mode 100644 docs/notebooks/252-fastcomposer-image-generation-with-output_files/multi-subject.png delete mode 100644 docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_02.png delete mode 100644 docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_03.gif create mode 100644 docs/notebooks/254-llm-chatbot-with-output.rst create mode 100644 docs/notebooks/255-mms-massively-multilingual-speech-with-output.rst create mode 100644 docs/notebooks/256-bark-text-to-audio-with-output.rst create mode 100644 docs/notebooks/257-llava-multimodal-chatbot-with-output.rst create mode 100644 docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.jpg create mode 100644 docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.png create mode 100644 docs/notebooks/257-llava-multimodal-chatbot-with-output_files/index.html create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output.rst create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/1c472f1f-1fce-4a13-9d44-b10f6f760ddb.png create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/258-blip-diffusion-subject-generation-with-output_12_0.png create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/258-blip-diffusion-subject-generation-with-output_16_0.png create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/258-blip-diffusion-subject-generation-with-output_19_0.png create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/258-blip-diffusion-subject-generation-with-output_52_0.png create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/258-blip-diffusion-subject-generation-with-output_55_0.png create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/258-blip-diffusion-subject-generation-with-output_58_0.png create mode 100644 docs/notebooks/258-blip-diffusion-subject-generation-with-output_files/index.html create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output.rst create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output_files/259-decidiffusion-image-generation-with-output_26_1.jpg create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output_files/259-decidiffusion-image-generation-with-output_26_1.png create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output_files/259-decidiffusion-image-generation-with-output_28_1.jpg create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output_files/259-decidiffusion-image-generation-with-output_28_1.png create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output_files/259-decidiffusion-image-generation-with-output_30_1.jpg create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output_files/259-decidiffusion-image-generation-with-output_30_1.png create mode 100644 docs/notebooks/259-decidiffusion-image-generation-with-output_files/index.html create mode 100644 docs/notebooks/260-pix2struct-docvqa-with-output.rst create mode 100644 docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.jpg create mode 100644 docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.png create mode 100644 docs/notebooks/260-pix2struct-docvqa-with-output_files/index.html create mode 100644 docs/notebooks/261-fast-segment-anything-with-output.rst create mode 100644 docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.jpg create mode 100644 docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.png create mode 100644 docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.jpg create mode 100644 docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.png create mode 100644 docs/notebooks/261-fast-segment-anything-with-output_files/index.html create mode 100644 docs/notebooks/262-softvc-voice-conversion-with-output.rst create mode 100644 docs/notebooks/263-latent-consistency-models-image-generation-with-output.rst create mode 100644 docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.jpg create mode 100644 docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.png create mode 100644 docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/index.html rename docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/{301-tensorflow-training-openvino-nncf-with-output_24_1.png => 301-tensorflow-training-openvino-nncf-with-output_26_1.png} (100%) rename docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/{301-tensorflow-training-openvino-nncf-with-output_2_22.png => 301-tensorflow-training-openvino-nncf-with-output_2_13.png} (100%) delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_15.png rename docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/{301-tensorflow-training-openvino-nncf-with-output_2_5.png => 301-tensorflow-training-openvino-nncf-with-output_2_4.png} (100%) create mode 100644 docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_6.png create mode 100644 docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_8.png delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_9.png create mode 100644 docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/index.html delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.jpg delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.png create mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.jpg create mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.png delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.jpg delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.png create mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.jpg create mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.png rename docs/notebooks/301-tensorflow-training-openvino-with-output_files/{301-tensorflow-training-openvino-with-output_28_1.png => 301-tensorflow-training-openvino-with-output_29_0.png} (100%) delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_56_1.png create mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_57_0.png delete mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_65_0.png create mode 100644 docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_66_0.png rename docs/notebooks/301-tensorflow-training-openvino-with-output_files/{301-tensorflow-training-openvino-with-output_78_1.png => 301-tensorflow-training-openvino-with-output_79_1.png} (100%) delete mode 100644 docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_21_0.png create mode 100644 docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_22_0.png delete mode 100644 docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_21_0.png create mode 100644 docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_22_0.png diff --git a/docs/articles_en/learn_openvino/tutorials.md b/docs/articles_en/learn_openvino/tutorials.md index 80d939bf162013..3fa4a11ee8b330 100644 --- a/docs/articles_en/learn_openvino/tutorials.md +++ b/docs/articles_en/learn_openvino/tutorials.md @@ -46,7 +46,65 @@ The Jupyter notebooks are categorized into following classes: - `Model Demos <#model-demos>`__ - `Model Training <#model-training>`__ - `Live Demos <#live-demos>`__ -- `Recommended Tutorials <#recommended-tutorials>`__ + + +Recommended Tutorials +###################### + +The following tutorials are guaranteed to provide a great experience with inference in OpenVINO: + ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| Notebook | | Preview | ++==============================================================================================================================================+============================================================================================================================================+====================================================+ +| `YOLOv8 - Optimization `__ |br| |c230c| | Convert and Optimize YOLOv8 real-time object detection with OpenVINO™. | |n230-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `SAM - Segment Anything Model `__ | Prompt based object segmentation mask generation, using Segment Anything and OpenVINO™. | |n237-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `ControlNet - Stable-Diffusion `__ | A text-to-image generation with ControlNet Conditioning and OpenVINO™. | |n235-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `Stable Diffusion v2 `__ | Text-to-image generation and Infinite Zoom with Stable Diffusion v2 and OpenVINO™. | |n236-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `Whisper - Subtitles generation `__ |br| |c227| | Generate subtitles for video with OpenAI Whisper and OpenVINO. | |n227-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `CLIP - zero-shot-image-classification `__ | Perform Zero-shot image classification with CLIP and OpenVINO. | |n228-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `BLIP - Visual-language-processing `__ | Visual question answering and image captioning using BLIP and OpenVINO™. | |n233-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `Instruct pix2pix - Image-editing `__ | Image editing with InstructPix2Pix. | |n231-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `DeepFloyd IF - Text-to-Image generation `__ | Text-to-image generation with DeepFloyd IF and OpenVINO™. | |n238-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `ImageBind `__ | Binding multimodal data, using ImageBind and OpenVINO™. | |n239-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `Dolly v2 `__ | Instruction following using Databricks Dolly 2.0 and OpenVINO™. | |n240-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `Stable Diffusion XL `__ | Image generation with Stable Diffusion XL and OpenVINO™. | |n248-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `MusicGen `__ |br| |n250| |br| |c250| | Controllable Music Generation with MusicGen and OpenVINO™. | |n250-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `Tiny SD `__ |br| |c251| | Image Generation with Tiny-SD and OpenVINO™. | |n251-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `253-zeroscope-text2video `__ | Text-to video synthesis with ZeroScope and OpenVINO™. | A panda eating bamboo on a rock. |br| |n253-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `254-llm-chatbot `__ | Create LLM-powered Chatbot using OpenVINO. | |n254-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `256-bark-text-to-audio `__ | Text-to-Speech generation with BARK and OpenVINO. | |n256-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `257-llava-multimodal-chatbot `__ | Visual-language assistant with LLaVA and OpenVINO. | |n257-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `258-blip-diffusion-subject-generation `__ | Subject-driven image generation and editing using BLIP Diffusion and OpenVINO. | |n258-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `259-decidiffusion-image-generation `__ | Image Generation with DeciDiffusion. | |n259-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `261-fast-segment-anything `__ |br| |n261| |br| |c261| | Object segmentations with FastSAM and OpenVINO™. | |n261-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `262-softvc-voice-conversion `__ |br| |c262| | Text-to video synthesis with ZeroScope and OpenVINO™. | | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ +| `263-latent-consistency-models-image-generation `__ | Image generation with Latent Consistency Model and OpenVINO. ||n263-img1| | ++----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + + + First steps with OpenVINO ########################## @@ -84,53 +142,63 @@ Tutorials that explain how to optimize and quantize models with OpenVINO tools. .. dropdown:: Explore more notebooks here. - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | Notebook | Description | - +====================================================================================================================================================+==================================================================================================================================+ - | `102-pytorch-onnx-to-openvino `__ | Convert PyTorch models to OpenVINO IR. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `104-model-tools `__ |br| |n104| | Download, convert and benchmark models from Open Model Zoo. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `105-language-quantize-bert `__ | Optimize and quantize a pre-trained BERT model. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `106-auto-device `__ |br| |n106| | Demonstrates how to use AUTO Device. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `107-speech-recognition-quantization `__ |br| |c107| | Optimize and quantize a pre-trained Data2Vec speech model. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `107-speech-recognition-quantization `__ | Optimize and quantize a pre-trained Wav2Vec2 speech model. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `108-gpu-device `__ | Working with GPUs in OpenVINO™ | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `109-latency-tricks `__ | Performance tricks for latency mode in OpenVINO™. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `109-throughput-tricks `__ | Performance tricks for throughput mode in OpenVINO™. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `110-ct-segmentation-quantize `__ |br| |n110| | Live inference of a kidney segmentation model and benchmark CT-scan data with OpenVINO. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `110-ct-segmentation-quantize `__ | Quantize a kidney segmentation model and show live inference. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `111-yolov5-quantization-migration `__ |br| |c111| | Migrate YOLOv5 POT API based quantization pipeline on Neural Network Compression Framework (NNCF). | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `112-pytorch-post-training-quantization-nncf `__ | Use Neural Network Compression Framework (NNCF) to quantize PyTorch model in post-training mode (without model fine-tuning). | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `113-image-classification-quantization `__ |br| |n113| | Quantize MobileNet image classification. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `115-async-api `__ |br| |n115| |br| |c115| | Use asynchronous execution to improve data pipelining. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `116-sparsity-optimization `__ |br| |c116| | Improve performance of sparse Transformer models. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `117-model-server `__ | Improve performance of sparse Transformer models. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `118-optimize-preprocessing `__ | Improve performance of image preprocessing step. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `119-tflite-to-openvino `__ |br| |c119| | Convert TensorFlow Lite models to OpenVINO IR. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `120-tensorflow-object-detection-to-openvino `__ |br| |n120| |br| |c120| | Convert TensorFlow Object Detection models to OpenVINO IR | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `122-speech-recognition-quantization-wav2vec2 `__ | Quantize Speech Recognition Models with accuracy control using NNCF PTQ API. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ - | `122-yolov8-quantization-with-accuracy-control `__ | Convert and Optimize YOLOv8 with OpenVINO™. | - +----------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | Notebook | Description | + +================================================================================================================================================================+=============================================================================================================================================+ + | `102-pytorch-onnx-to-openvino `__ | Convert PyTorch models to OpenVINO IR. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `104-model-tools `__ |br| |n104| | Download, convert and benchmark models from Open Model Zoo. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `105-language-quantize-bert `__ | Optimize and quantize a pre-trained BERT model. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `106-auto-device `__ |br| |n106| | Demonstrates how to use AUTO Device. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `107-speech-recognition-quantization `__ |br| |c107| | Optimize and quantize a pre-trained Data2Vec speech model. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `107-speech-recognition-quantization `__ | Optimize and quantize a pre-trained Wav2Vec2 speech model. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `108-gpu-device `__ | Working with GPUs in OpenVINO™ | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `109-latency-tricks `__ | Performance tricks for latency mode in OpenVINO™. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `109-throughput-tricks `__ | Performance tricks for throughput mode in OpenVINO™. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `110-ct-segmentation-quantize `__ |br| |n110| | Live inference of a kidney segmentation model and benchmark CT-scan data with OpenVINO. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `110-ct-segmentation-quantize `__ | Quantize a kidney segmentation model and show live inference. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `111-yolov5-quantization-migration `__ |br| |c111| | Migrate YOLOv5 POT API based quantization pipeline on Neural Network Compression Framework (NNCF). | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `112-pytorch-post-training-quantization-nncf `__ | Use Neural Network Compression Framework (NNCF) to quantize PyTorch model in post-training mode (without model fine-tuning). | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `113-image-classification-quantization `__ |br| |n113| | Quantize MobileNet image classification. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `115-async-api `__ |br| |n115| |br| |c115| | Use asynchronous execution to improve data pipelining. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `116-sparsity-optimization `__ |br| |c116| | Improve performance of sparse Transformer models. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `117-model-server `__ | Improve performance of sparse Transformer models. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `118-optimize-preprocessing `__ | Improve performance of image preprocessing step. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `119-tflite-to-openvino `__ |br| |c119| | Convert TensorFlow Lite models to OpenVINO IR. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `120-tensorflow-object-detection-to-openvino `__ |br| |n120| |br| |c120| | Convert TensorFlow Object Detection models to OpenVINO IR | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `120-tensorflow-instance-segmentation-to-openvino `__ |br| |n120a| |br| |c120a| | Convert the Mask R-CNN with Inception ResNet V2 Instance Segmentation model and then segment instances in an image using OpenVINO Runtime. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `122-speech-recognition-quantization-wav2vec2 `__ | Quantize Speech Recognition Models with accuracy control using NNCF PTQ API. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `122-yolov8-quantization-with-accuracy-control `__ | Convert and Optimize YOLOv8 with OpenVINO™. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `123-detectron2-to-openvino `__ |br| |n123| |br| |c123| | Convert Detection2 Models to OpenVINO™. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `124-hugging-face-hub `__ |br| |n124| |br| |c124| | Hugging Face Model Hub with OpenVINO™. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `125-convnext-classification `__ | Convert TorchVision ConvNext classification model to OpenVINO IR. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ + | `126-tensorflow-hub `__ |br| |n126| |br| |c126| | Convert TensorFlow Hub models to OpenVINO IR. | + +----------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------+ Model Demos @@ -154,117 +222,148 @@ Demos that demonstrate inference on a particular model. .. dropdown:: Explore more notebooks below. - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | Notebook | Description | Preview | - +===============================================================================================================================+============================================================================================================================================+====================================================+ - | `201-vision-monodepth `__ |br| |n201| |br| |c201| | Monocular depth estimation with images and video. | |n201-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `202-vision-superresolution-image `__ |br| |n202i| |br| |c202i| | Upscale raw images with a super resolution model. | |n202i-img1| → |n202i-img2| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `202-vision-superresolution-video `__ |br| |n202v| |br| |c202v| | Turn 360p into 1080p video using a super resolution model. | |n202v-img1| → |n202v-img2| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `203-meter-reader `__ |br| |n203| | PaddlePaddle pre-trained models to read industrial meter's value. | |n203-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `204-segmenter-semantic-segmentation `__ |br| |c204| | Semantic segmentation with OpenVINO™ using Segmenter. | |n204-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `206-vision-paddlegan-anime `__ | Turn an image into anime using a GAN. | |n206-img1| → |n206-img2| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `207-vision-paddlegan-superresolution `__ | Upscale small images with superresolution using a PaddleGAN model. | |n207-img1| → |n207-img2| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `208-optical-character-recognition `__ | Annotate text on images using text recognition resnet. | |n208-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `212-pyannote-speaker-diarization `__ | Run inference on speaker diarization pipeline. | |n212-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `210-slowfast-video-recognition `__ |br| |n210| | Video Recognition using SlowFast and OpenVINO™ | |n210-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `213-question-answering `__ |br| |n213| | Answer your questions basing on a context. | |n213-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `214-grammar-correction `__ | Grammatical error correction with OpenVINO. | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `216-attention-center `__ | The attention center model with OpenVINO™ | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `217-vision-deblur `__ |br| |n217| | Deblur images with DeblurGAN-v2. | |n217-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `219-knowledge-graphs-conve `__ |br| |n219| | Optimize the knowledge graph embeddings model (ConvE) with OpenVINO. | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `220-cross-lingual-books-alignment `__ |br| |n220| |br| |c220| | Cross-lingual Books Alignment With Transformers and OpenVINO™ | |n220-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `221-machine-translation `__ |br| |n221| |br| |c221| | Real-time translation from English to German. | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `222-vision-image-colorization `__ |br| |n222| | Use pre-trained models to colorize black & white images using OpenVINO. | |n222-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `223-text-prediction `__ |br| |c223| | Use pre-trained models to perform text prediction on an input sequence. | |n223-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `224-3D-segmentation-point-clouds `__ | Process point cloud data and run 3D Part Segmentation with OpenVINO. | |n224-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `225-stable-diffusion-text-to-image `__ | Text-to-image generation with Stable Diffusion method. | |n225-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `226-yolov7-optimization `__ | Optimize YOLOv7, using NNCF PTQ API. | |n226-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `227-whisper-subtitles-generation `__ |br| |c227| | Generate subtitles for video with OpenAI Whisper and OpenVINO. | |n227-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `228-clip-zero-shot-convert `__ | Zero-shot Image Classification with OpenAI CLIP and OpenVINO™ | |n228-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `228-clip-zero-shot-quantize `__ | Post-Training Quantization of OpenAI CLIP model with NNCF | |n228-img2| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `229-distilbert-sequence-classification `__ |br| |n229| | Sequence classification with OpenVINO. | |n229-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `230-yolov8-optimization `__ |br| |c230| | Optimize YOLOv8, using NNCF PTQ API. | |n230-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `231-instruct-pix2pix-image-editing `__ | Image editing with InstructPix2Pix. | |n231-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `232-clip-language-saliency-map `__ |br| |c232| | Language-visual saliency with CLIP and OpenVINO™. | |n232-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `233-blip-visual-language-processing `__ | Visual question answering and image captioning using BLIP and OpenVINO™. | |n233-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `234-encodec-audio-compression `__ | Audio compression with EnCodec and OpenVINO™. | |n234-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `235-controlnet-stable-diffusion `__ | A text-to-image generation with ControlNet Conditioning and OpenVINO™. | |n235-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `236-stable-diffusion-v2 `__ | Text-to-image generation and Infinite Zoom with Stable Diffusion v2 and OpenVINO™. | |n236-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `236-stable-diffusion-v2 `__ | Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware. | |n236-img4| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `236-stable-diffusion-v2 `__ | Stable Diffusion v2.1 using Optimum-Intel OpenVINO. | |n236-img4| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `236-stable-diffusion-v2 `__ | Stable Diffusion Text-to-Image Demo. | |n236-img4| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `236-stable-diffusion-v2 `__ | Text-to-image generation with Stable Diffusion v2 and OpenVINO™. | |n236-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `237-segment-anything `__ | Prompt based object segmentation mask generation, using Segment Anything and OpenVINO™. | |n237-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `238-deep-floyd-if `__ | Text-to-image generation with DeepFloyd IF and OpenVINO™. | |n238-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `239-image-bind `__ | Binding multimodal data, using ImageBind and OpenVINO™. | |n239-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `240-dolly-2-instruction-following `__ | Instruction following using Databricks Dolly 2.0 and OpenVINO™. | |n240-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `241-riffusion-text-to-music `__ | Text-to-Music generation using Riffusion and OpenVINO™. | |n241-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `242-freevc-voice-conversion `__ | High-Quality Text-Free One-Shot Voice Conversion with FreeVC and OpenVINO™ | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `243-tflite-selfie-segmentation `__ |br| |n243| |br| |c243| | Selfie Segmentation using TFLite and OpenVINO™. | |n243-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `244-named-entity-recognition `__ |br| |c244| | Named entity recognition with OpenVINO™. | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `245-typo-detector `__ | English Typo Detection in sentences with OpenVINO™. | |n245-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `246-depth-estimation-videpth `__ | Monocular Visual-Inertial Depth Estimation with OpenVINO™. | |n246-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `247-code-language-id `__ |br| |n247| | Identify the programming language used in an arbitrary code snippet. | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `248-stable-diffusion-xl `__ | Image generation with Stable Diffusion XL and OpenVINO™. | |n248-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `249-oneformer-segmentation `__ | Universal segmentation with OneFormer and OpenVINO™. | |n249-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `250-music-generation `__ |br| |n250| |br| |c250| | Controllable Music Generation with MusicGen and OpenVINO™. | |n250-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `251-tiny-sd-image-generation `__ |br| |c251| | Image Generation with Tiny-SD and OpenVINO™. | |n251-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `252-fastcomposer-image-generation `__ | Image generation with FastComposer and OpenVINO™. | | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `253-zeroscope-text2video `__ | Text-to video synthesis with ZeroScope and OpenVINO™. | A panda eating bamboo on a rock. |br| |n253-img1| | - +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | Notebook | Description | Preview | + +==============================================================================================================================================+============================================================================================================================================+====================================================+ + | `201-vision-monodepth `__ |br| |n201| |br| |c201| | Monocular depth estimation with images and video. | |n201-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `202-vision-superresolution-image `__ |br| |n202i| |br| |c202i| | Upscale raw images with a super resolution model. | |n202i-img1| → |n202i-img2| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `202-vision-superresolution-video `__ |br| |n202v| |br| |c202v| | Turn 360p into 1080p video using a super resolution model. | |n202v-img1| → |n202v-img2| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `203-meter-reader `__ |br| |n203| | PaddlePaddle pre-trained models to read industrial meter's value. | |n203-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `204-segmenter-semantic-segmentation `__ |br| |c204| | Semantic segmentation with OpenVINO™ using Segmenter. | |n204-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `206-vision-paddlegan-anime `__ | Turn an image into anime using a GAN. | |n206-img1| → |n206-img2| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `207-vision-paddlegan-superresolution `__ | Upscale small images with superresolution using a PaddleGAN model. | |n207-img1| → |n207-img2| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `208-optical-character-recognition `__ | Annotate text on images using text recognition resnet. | |n208-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `212-pyannote-speaker-diarization `__ | Run inference on speaker diarization pipeline. | |n212-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `210-slowfast-video-recognition `__ |br| |n210| | Video Recognition using SlowFast and OpenVINO™ | |n210-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `213-question-answering `__ |br| |n213| | Answer your questions basing on a context. | |n213-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `214-grammar-correction `__ | Grammatical error correction with OpenVINO. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `216-attention-center `__ | The attention center model with OpenVINO™ | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `217-vision-deblur `__ |br| |n217| | Deblur images with DeblurGAN-v2. | |n217-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `219-knowledge-graphs-conve `__ |br| |n219| | Optimize the knowledge graph embeddings model (ConvE) with OpenVINO. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `220-cross-lingual-books-alignment `__ |br| |n220| |br| |c220| | Cross-lingual Books Alignment With Transformers and OpenVINO™ | |n220-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `221-machine-translation `__ |br| |n221| |br| |c221| | Real-time translation from English to German. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `222-vision-image-colorization `__ |br| |n222| | Use pre-trained models to colorize black & white images using OpenVINO. | |n222-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `223-text-prediction `__ |br| |c223| | Use pre-trained models to perform text prediction on an input sequence. | |n223-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `224-3D-segmentation-point-clouds `__ |br| |n224| |br| |c224| | Process point cloud data and run 3D Part Segmentation with OpenVINO. | |n224-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `225-stable-diffusion-text-to-image `__ |br| |c225| | Text-to-image generation with Stable Diffusion method. | |n225-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `226-yolov7-optimization `__ | Optimize YOLOv7, using NNCF PTQ API. | |n226-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `227-whisper-subtitles-generation `__ |br| |c227| | Generate subtitles for video with OpenAI Whisper and OpenVINO. | |n227-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `228-clip-zero-shot-convert `__ | Zero-shot Image Classification with OpenAI CLIP and OpenVINO™ | |n228-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `228-clip-zero-shot-quantize `__ | Post-Training Quantization of OpenAI CLIP model with NNCF | |n228-img2| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `229-distilbert-sequence-classification `__ |br| |n229| | Sequence classification with OpenVINO. | |n229-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `230-yolov8-instance-segmentation `__ |br| |c230a| | Convert and Optimize YOLOv8 instance segmentation model with OpenVINO™. | |n230-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `230-yolov8-keypoint-detection `__ |br| |c230b| | Convert and Optimize YOLOv8 keypoint detection model with OpenVINO™. | |n230-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `230-yolov8-object-detection `__ |br| |c230c| | Convert and Optimize YOLOv8 real-time object detection with OpenVINO™. | |n230-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `231-instruct-pix2pix-image-editing `__ | Image editing with InstructPix2Pix. | |n231-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `232-clip-language-saliency-map `__ |br| |c232| | Language-visual saliency with CLIP and OpenVINO™. | |n232-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `233-blip-convert `__ | Visual Question Answering and Image Captioning using BLIP and OpenVINO. | |n233-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `233-blip-optimize `__ | Post-Training Quantization and Weights Compression of OpenAI BLIP model with NNCF. | |n233-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `234-encodec-audio-compression `__ | Audio compression with EnCodec and OpenVINO™. | |n234-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `235-controlnet-stable-diffusion `__ | A text-to-image generation with ControlNet Conditioning and OpenVINO™. | |n235-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `236-stable-diffusion-v2 `__ | Text-to-image generation and Infinite Zoom with Stable Diffusion v2 and OpenVINO™. | |n236-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `236-stable-diffusion-v2 `__ | Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware. | |n236-img4| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `236-stable-diffusion-v2 `__ | Stable Diffusion v2.1 using Optimum-Intel OpenVINO. | |n236-img4| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `236-stable-diffusion-v2 `__ | Stable Diffusion Text-to-Image Demo. | |n236-img4| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `236-stable-diffusion-v2 `__ | Text-to-image generation with Stable Diffusion v2 and OpenVINO™. | |n236-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `237-segment-anything `__ | Prompt based object segmentation mask generation, using Segment Anything and OpenVINO™. | |n237-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `238-deep-floyd-if-optimize `__ | Text-to-image generation with DeepFloyd IF and OpenVINO™. | |n238-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `239-image-bind `__ | Binding multimodal data, using ImageBind and OpenVINO™. | |n239-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `240-dolly-2-instruction-following `__ | Instruction following using Databricks Dolly 2.0 and OpenVINO™. | |n240-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `241-riffusion-text-to-music `__ | Text-to-Music generation using Riffusion and OpenVINO™. | |n241-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `242-freevc-voice-conversion `__ | High-Quality Text-Free One-Shot Voice Conversion with FreeVC and OpenVINO™ | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `243-tflite-selfie-segmentation `__ |br| |n243| |br| |c243| | Selfie Segmentation using TFLite and OpenVINO™. | |n243-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `244-named-entity-recognition `__ |br| |c244| | Named entity recognition with OpenVINO™. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `245-typo-detector `__ | English Typo Detection in sentences with OpenVINO™. | |n245-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `246-depth-estimation-videpth `__ | Monocular Visual-Inertial Depth Estimation with OpenVINO™. | |n246-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `247-code-language-id `__ |br| |n247| | Identify the programming language used in an arbitrary code snippet. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `248-stable-diffusion-xl `__ | Image generation with Stable Diffusion XL and OpenVINO™. | |n248-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `249-oneformer-segmentation `__ | Universal segmentation with OneFormer and OpenVINO™. | |n249-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `250-music-generation `__ |br| |n250| |br| |c250| | Controllable Music Generation with MusicGen and OpenVINO™. | |n250-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `251-tiny-sd-image-generation `__ |br| |c251| | Image Generation with Tiny-SD and OpenVINO™. | |n251-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `252-fastcomposer-image-generation `__ | Image generation with FastComposer and OpenVINO™. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `253-zeroscope-text2video `__ | Text-to video synthesis with ZeroScope and OpenVINO™. | A panda eating bamboo on a rock. |br| |n253-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `254-llm-chatbot `__ | Create LLM-powered Chatbot using OpenVINO. | |n254-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `255-mms-massively-multilingual-speech `__ | MMS: Scaling Speech Technology to 1000+ languages with OpenVINO™. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `256-bark-text-to-audio `__ | Text-to-Speech generation with BARK and OpenVINO. | |n256-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `257-llava-multimodal-chatbot `__ | Visual-language assistant with LLaVA and OpenVINO. | |n257-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `258-blip-diffusion-subject-generation `__ | Subject-driven image generation and editing using BLIP Diffusion and OpenVINO. | |n258-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `259-decidiffusion-image-generation `__ | Image Generation with DeciDiffusion. | |n259-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `260-pix2struct-docvqa `__ |br| |c260| | Document Visual Question Answering Using Pix2Struct and OpenVINO. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `261-fast-segment-anything `__ |br| |n261| |br| |c261| | Object segmentations with FastSAM and OpenVINO™. | |n261-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `262-softvc-voice-conversion `__ |br| |c262| | Text-to video synthesis with ZeroScope and OpenVINO™. | | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + | `263-latent-consistency-models-image-generation `__ | Image generation with Latent Consistency Model and OpenVINO. ||n263-img1| | + +----------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+ + + + + + Model Training @@ -308,44 +407,6 @@ Live inference demos that run on a webcam or video files. +-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -Recommended Tutorials -###################### - -The following tutorials are guaranteed to provide a great experience with inference in OpenVINO: - -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| Notebook | | Preview | -+===============================================================================================================================+============================================================================================================================================+===========================================+ -| `YOLOv8 - Optimization `__ |br| |c230| | Optimize YOLOv8, using NNCF PTQ API. | |n230-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `SAM - Segment Anything Model `__ | Prompt based object segmentation mask generation, using Segment Anything and OpenVINO™. | |n237-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `ControlNet - Stable-Diffusion `__ | A text-to-image generation with ControlNet Conditioning and OpenVINO™. | |n235-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `Stable Diffusion v2 `__ | Text-to-image generation and Infinite Zoom with Stable Diffusion v2 and OpenVINO™. | |n236-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `Whisper - Subtitles generation `__ |br| |c227| | Generate subtitles for video with OpenAI Whisper and OpenVINO. | |n227-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `CLIP - zero-shot-image-classification `__ | Perform Zero-shot image classification with CLIP and OpenVINO. | |n228-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `BLIP - Visual-language-processing `__ | Visual question answering and image captioning using BLIP and OpenVINO™. | |n233-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `Instruct pix2pix - Image-editing `__ | Image editing with InstructPix2Pix. | |n231-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `DeepFloyd IF - Text-to-Image generation `__ | Text-to-image generation with DeepFloyd IF and OpenVINO™. | |n238-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `ImageBind `__ | Binding multimodal data, using ImageBind and OpenVINO™. | |n239-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `Dolly v2 `__ | Instruction following using Databricks Dolly 2.0 and OpenVINO™. | |n240-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `Stable Diffusion XL `__ | Image generation with Stable Diffusion XL and OpenVINO™. | |n248-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `MusicGen `__ |br| |n250| |br| |c250| | Controllable Music Generation with MusicGen and OpenVINO™. | |n250-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ -| `Tiny SD `__ |br| |c251| | Image Generation with Tiny-SD and OpenVINO™. | |n251-img1| | -+-------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+ - - .. note:: If there are any issues while running the notebooks, refer to the **Troubleshooting** and **FAQ** sections in the :doc:`Installation Guide ` or start a GitHub `discussion `__. @@ -495,6 +556,20 @@ Additional Resources :target: https://user-images.githubusercontent.com/29454499/260904650-274fc2f9-24d2-46a3-ac3d-d660ec3c9a19.png .. |n253-img1| image:: https://user-images.githubusercontent.com/76161256/261102399-500956d5-4aac-4710-a77c-4df34bcda3be.gif :target: https://user-images.githubusercontent.com/76161256/261102399-500956d5-4aac-4710-a77c-4df34bcda3be.gif +.. |n254-img1| image:: https://user-images.githubusercontent.com/29454499/255799218-611e7189-8979-4ef5-8a80-5a75e0136b50.png + :target: https://user-images.githubusercontent.com/29454499/255799218-611e7189-8979-4ef5-8a80-5a75e0136b50.png +.. |n256-img1| image:: https://user-images.githubusercontent.com/29454499/269278630-9a770279-0045-480e-95f2-1a2f2d0a5115.png + :target: https://user-images.githubusercontent.com/29454499/269278630-9a770279-0045-480e-95f2-1a2f2d0a5115.png +.. |n257-img1| image:: https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png + :target: https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png +.. |n258-img1| image:: https://user-images.githubusercontent.com/76161256/275485611-0ecf621f-b544-44ae-8258-8a49be704989.png + :target: https://user-images.githubusercontent.com/76161256/275485611-0ecf621f-b544-44ae-8258-8a49be704989.png +.. |n259-img1| image:: https://user-images.githubusercontent.com/29454499/274927904-cd734349-9954-4656-ab96-08a903e846ef.png + :target: https://user-images.githubusercontent.com/29454499/274927904-cd734349-9954-4656-ab96-08a903e846ef.png +.. |n261-img1| image:: https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg + :target: https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg +.. |n263-img1| image:: https://user-images.githubusercontent.com/29454499/277367065-13a8f622-8ea7-4d12-b3f8-241d4499305e.png + :target: https://user-images.githubusercontent.com/29454499/277367065-13a8f622-8ea7-4d12-b3f8-241d4499305e.png .. |n301-img1| image:: https://user-images.githubusercontent.com/15709723/127779607-8fa34947-1c35-4260-8d04-981c41a2a2cc.png :target: https://user-images.githubusercontent.com/15709723/127779607-8fa34947-1c35-4260-8d04-981c41a2a2cc.png .. |n401-img1| image:: https://user-images.githubusercontent.com/4547501/141471665-82b28c86-cf64-4bfe-98b3-c314658f2d96.gif @@ -570,11 +645,31 @@ Additional Resources .. |c120| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :width: 109 :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/120-tensorflow-object-detection-to-openvino/120-tensorflow-object-detection-to-openvino.ipynb +.. |n120a| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F120-tensorflow-object-detection-to-openvino%2F120-tensorflow-instance-segmentation-to-openvino.ipynb +.. |c120a| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/120-tensorflow-object-detection-to-openvino/120-tensorflow-instance-segmentation-to-openvino.ipynb .. |n121| image:: https://mybinder.org/badge_logo.svg :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F121-convert-to-openvino%2F121-convert-to-openvino.ipynb .. |c121| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :width: 109 :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/121-convert-to-openvino/121-convert-to-openvino.ipynb +.. |n123| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F123-detectron2-to-openvino%2F123-detectron2-to-openvino.ipynb +.. |c123| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/123-detectron2-to-openvino/123-detectron2-to-openvino.ipynb +.. |n124| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F124-hugging-face-hub%2F124-hugging-face-hub.ipynb +.. |c124| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/124-hugging-face-hub/124-hugging-face-hub.ipynb +.. |n126| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F126-tensorflow-hub%2F126-tensorflow-hub.ipynb +.. |c126| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/126-tensorflow-hub/126-tensorflow-hub.ipynb .. |n209| image:: https://mybinder.org/badge_logo.svg :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F209-handwritten-ocr%2F209-handwritten-ocr.ipynb .. |n201| image:: https://mybinder.org/badge_logo.svg @@ -636,14 +731,28 @@ Additional Resources .. |c223| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :width: 109 :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/223-text-prediction/223-text-prediction.ipynb +.. |n224| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F224-3D-segmentation-point-clouds%2F224-3D-segmentation-point-clouds.ipynb +.. |c224| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/224-3D-segmentation-point-clouds/224-3D-segmentation-point-clouds.ipynb +.. |c225| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/224-3D-segmentation-point-clouds/224-3D-segmentation-point-clouds.ipynb .. |c227| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :width: 109 :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/227-whisper-subtitles-generation/227-whisper-subtitles-generation.ipynb .. |n229| image:: https://mybinder.org/badge_logo.svg :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?labpath=notebooks%2F229-distilbert-sequence-classification%2F229-distilbert-sequence-classification.ipynb -.. |c230| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 +.. |c230a| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-instance-segmentation.ipynb +.. |c230b| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :width: 109 - :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-optimization.ipynb + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-keypoint-detection.ipynb +.. |c230c| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-object-detection.ipynb .. |c232| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :width: 109 :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/232-clip-language-saliency-map/232-clip-language-saliency-map.ipynb @@ -661,6 +770,16 @@ Additional Resources :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/250-music-generation/250-music-generation.ipynb .. |c251| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/251-tiny-sd-image-generation/251-tiny-sd-image-generation.ipynb +.. |c260| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/260-pix2struct-docvqa/260-pix2struct-docvqa.ipynb +.. |n261| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F261-fast-segment-anything%2F261-fast-segment-anything.ipynb +.. |c261| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/261-fast-segment-anything/261-fast-segment-anything.ipynb +.. |c262| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 + :width: 109 + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/262-softvc-voice-conversion/262-softvc-voice-conversion.ipynb .. |c305| image:: https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667 :width: 109 :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index 32478632b2c65a..779f0f8baf3a8a 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -8,7 +8,7 @@ repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] diff --git a/docs/notebooks/001-hello-world-with-output.rst b/docs/notebooks/001-hello-world-with-output.rst index b938d435b16cc4..685b163de8868b 100644 --- a/docs/notebooks/001-hello-world-with-output.rst +++ b/docs/notebooks/001-hello-world-with-output.rst @@ -11,47 +11,52 @@ Zoo `__ is used in this tutorial. For more information about how OpenVINO IR models are created, refer to the `TensorFlow to OpenVINO <101-tensorflow-classification-to-openvino-with-output.html>`__ -tutorial. +tutorial. **Table of contents:** -- `Imports <#imports>`__ -- `Download the Model and data samples <#download-the-model-and-data-samples>`__ -- `Select inference device <#select-inference-device>`__ -- `Load the Model <#load-the-model>`__ -- `Load an Image <#load-an-image>`__ -- `Do Inference <#do-inference>`__ + +- `Imports <#imports>`__ +- `Download the Model and data samples <#download-the-model-and-data-samples>`__ +- `Select inference device <#select-inference-device>`__ +- `Load the Model <#load-the-model>`__ +- `Load an Image <#load-an-image>`__ +- `Do Inference <#do-inference>`__ .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - openvino-dev 2023.0.0 requires openvino==2023.0.0, but you have openvino 2023.1.0.dev20230811 which is incompatible. - + Note: you may need to restart the kernel to use updated packages. + -Imports -############################################ +Imports +------------------------------------------------- .. code:: ipython3 from pathlib import Path - import sys import cv2 import matplotlib.pyplot as plt import numpy as np import openvino as ov - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file -Download the Model and data samples -######################################################################## +Download the Model and data samples +----------------------------------------------------------------------------- .. code:: ipython3 @@ -84,10 +89,10 @@ Download the Model and data samples artifacts/v3-small_224_1.0_float.bin: 0%| | 0.00/4.84M [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/001-hello-world-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/001-hello-world-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/001-hello-world-with-output_files/


../
-001-hello-world-with-output_10_0.png               16-Aug-2023 01:31              387941
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/001-hello-world-with-output_files/


../
+001-hello-world-with-output_11_1.png               31-Oct-2023 00:35              387941
 

diff --git a/docs/notebooks/002-openvino-api-with-output.rst b/docs/notebooks/002-openvino-api-with-output.rst index d7ae0c6fe91bc9..1810a90a6ee60d 100644 --- a/docs/notebooks/002-openvino-api-with-output.rst +++ b/docs/notebooks/002-openvino-api-with-output.rst @@ -4,27 +4,29 @@ OpenVINO™ Runtime API Tutorial This notebook explains the basics of the OpenVINO Runtime API. It covers: -- `Loading OpenVINO Runtime and Showing Info <#loading-openvino-runtime-and-showing-info>`__ -- `Loading a Model <#loading-a-model>`__ +- `Loading OpenVINO Runtime and Showing + Info <#loading-openvino-runtime-and-showing-info>`__ +- `Loading a Model <#loading-a-model>`__ - - `OpenVINO IR Model <#openvino-ir-model>`__ - - `ONNX Model <#onnx-model>`__ - - `PaddlePaddle Model <#paddlepaddle-model>`__ - - `TensorFlow Model <#tensorflow-model>`__ - - `TensorFlow Lite Model <#tensorflow-lite-model>`__ + - `OpenVINO IR Model <#openvino-ir-model>`__ + - `ONNX Model <#onnx-model>`__ + - `PaddlePaddle Model <#paddlepaddle-model>`__ + - `TensorFlow Model <#tensorflow-model>`__ + - `TensorFlow Lite Model <#tensorflow-lite-model>`__ -- `Getting Information about a Model <#getting-information-about-a-model>`__ +- `Getting Information about a + Model <#getting-information-about-a-model>`__ - - `Model Inputs <#model-inputs>`__ - - `Model Outputs <#model-outputs>`__ + - `Model Inputs <#model-inputs>`__ + - `Model Outputs <#model-outputs>`__ -- `Doing Inference on a Model <#doing-inference-on-a-model>`__ -- `Reshaping and Resizing <#reshaping-and-resizing>`__ +- `Doing Inference on a Model <#doing-inference-on-a-model>`__ +- `Reshaping and Resizing <#reshaping-and-resizing>`__ - - `Change Image Size <#change-image-size>`__ - - `Change Batch Size <#change-batch-size>`__ + - `Change Image Size <#change-image-size>`__ + - `Change Batch Size <#change-batch-size>`__ -- `Caching a Model <#caching-a-model>`__ +- `Caching a Model <#caching-a-model>`__ The notebook is divided into sections with headers. The next cell contains global requirements installation and imports. Each section is @@ -37,8 +39,8 @@ same. .. code:: ipython3 # Required imports. Please execute this cell first. - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install requests tqdm + %pip install -q "openvino>=2023.1.0" + %pip install requests tqdm # Fetch `notebook_utils` module import urllib.request @@ -52,16 +54,18 @@ same. .. parsed-literal:: - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.31.0) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.1) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (3.2.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (3.4) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (1.26.16) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (2023.7.22) + Note: you may need to restart the kernel to use updated packages. + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.31.0) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.1) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (3.3.1) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (3.4) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (2.0.7) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests) (2023.7.22) + Note: you may need to restart the kernel to use updated packages. Loading OpenVINO Runtime and Showing Info -############################################################################################################################# +----------------------------------------- Initialize OpenVINO Runtime with Core() @@ -97,18 +101,19 @@ be faster. Loading a Model -############################################################################################################################# +--------------- After initializing OpenVINO Runtime, first read the model file with ``read_model()``, then compile it to the specified device with the ``compile_model()`` method. -`OpenVINO™ supports several model formats `__ +`OpenVINO™ supports several model +formats `__ and enables developers to convert them to its own OpenVINO IR format using a tool dedicated to this task. OpenVINO IR Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~ An OpenVINO IR (Intermediate Representation) model consists of an ``.xml`` file, containing information about network topology, and a @@ -121,7 +126,8 @@ is the case, specifying the weights file is optional. If the weights file has a different filename, it can be specified using the ``weights`` parameter in ``read_model()``. -The OpenVINO `Model Conversion API `__ +The OpenVINO `Model Conversion +API `__ tool is used to convert models to OpenVINO IR format. Model conversion API reads the original model and creates an OpenVINO IR model (``.xml`` and ``.bin`` files) so inference can be performed without delays due to @@ -161,7 +167,7 @@ notebooks. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.bin') @@ -176,7 +182,7 @@ notebooks. compiled_model = core.compile_model(model=model, device_name="CPU") ONNX Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~ `ONNX `__ is an open format built to represent machine learning models. ONNX defines a common set of operators - the building @@ -208,7 +214,7 @@ points to the filename of an ONNX model. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/segmentation.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/segmentation.onnx') @@ -229,7 +235,7 @@ The ONNX model can be exported to OpenVINO IR with ``save_model()``: ov.save_model(model_onnx, output_model="model/exported_onnx_model.xml") PaddlePaddle Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~ `PaddlePaddle `__ models saved for inference can also be passed to OpenVINO Runtime @@ -262,7 +268,7 @@ without any conversion step. Pass the filename with extension to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/inference.pdiparams') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/inference.pdiparams') @@ -281,19 +287,18 @@ without any conversion step. Pass the filename with extension to ov.save_model(model_paddle, output_model="model/exported_paddle_model.xml") TensorFlow Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~ TensorFlow models saved in frozen graph format can also be passed to ``read_model`` starting in OpenVINO 2022.3. -.. note:: - - Directly loading TensorFlow models is available as a + **NOTE**: Directly loading TensorFlow models is available as a preview feature in the OpenVINO 2022.3 release. Fully functional support will be provided in the upcoming 2023 releases. Currently support is limited to only frozen graph inference format. Other TensorFlow model formats must be converted to OpenVINO IR using - `model conversion API `__. + `model conversion + API `__. .. code:: ipython3 @@ -313,7 +318,7 @@ TensorFlow models saved in frozen graph format can also be passed to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.pb') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.pb') @@ -332,7 +337,7 @@ TensorFlow models saved in frozen graph format can also be passed to ov.save_model(model_tf, output_model="model/exported_tf_model.xml") TensorFlow Lite Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~~~~ `TFLite `__ models saved for inference can also be passed to OpenVINO Runtime. Pass the filename with extension @@ -358,9 +363,14 @@ It is pre-trained model optimized to work with TensorFlow Lite. model/classification.tflite: 0%| | 0.00/40.9M [00:00`__ + **NOTE** this notebook demonstrates only the basic synchronous + inference API. For an async inference example, please refer to `Async + API notebook <115-async-api-with-output.html>`__ The diagram below shows a typical inference pipeline with OpenVINO @@ -600,7 +608,7 @@ input, wrapping to a dictionary or list can be omitted. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.bin') @@ -746,10 +754,10 @@ To learn more about this notion, refer to the `hello world notebook <001-hello-world-with-output.html>`__. Reshaping and Resizing -############################################################################################################################# +---------------------- Change Image Size -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~ Instead of reshaping the image to fit the model, it is also possible to reshape the model to fit the image. Be aware that not all models support @@ -785,7 +793,7 @@ input shape. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/segmentation.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/segmentation.bin') @@ -837,7 +845,7 @@ setting the input dimensions to 544x544 also modifies the output dimensions. After reshaping, compile the network once again. Change Batch Size -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~ Use the ``.reshape()`` method to set the batch size, by increasing the first element of ``new_shape``. For example, to set a batch size of two, @@ -897,7 +905,7 @@ input image through the network to see the result: Caching a Model -############################################################################################################################# +--------------- For some devices, like GPU, loading a model can take some time. Model Caching solves this issue by caching the model in a cache directory. If @@ -914,9 +922,7 @@ model will be loaded to the GPU. After running this cell once, the model will be cached, so subsequent runs of this cell will load the model from the cache. -.. note:: - - Model Caching is also available on CPU devices +*Note: Model Caching is also available on CPU devices* .. code:: ipython3 @@ -938,7 +944,7 @@ the cache. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/002-openvino-api/model/classification.bin') diff --git a/docs/notebooks/003-hello-segmentation-with-output.rst b/docs/notebooks/003-hello-segmentation-with-output.rst index f19f6a9429005e..dc80c1733045ea 100644 --- a/docs/notebooks/003-hello-segmentation-with-output.rst +++ b/docs/notebooks/003-hello-segmentation-with-output.rst @@ -5,28 +5,36 @@ A very basic introduction to using segmentation models with OpenVINO™. In this tutorial, a pre-trained `road-segmentation-adas-0001 `__ -model from the `Open Model Zoo `__ is used. +model from the `Open Model +Zoo `__ is used. ADAS stands for Advanced Driver Assistance Services. The model recognizes four classes: background, road, curb and mark. **Table of contents:** -- `Imports <#imports>`__ -- `Download model weights <#download-model-weights>`__ -- `Select inference device <#select-inference-device>`__ -- `Load the Model <#load-the-model>`__ -- `Load an Image <#load-an-image>`__ -- `Do Inference <#do-inference>`__ -- `Prepare Data for Visualization <#prepare-data-for-visualization>`__ -- `Visualize data <#visualize-data>`__ + +- `Imports <#imports>`__ +- `Download model weights <#download-model-weights>`__ +- `Select inference device <#select-inference-device>`__ +- `Load the Model <#load-the-model>`__ +- `Load an Image <#load-an-image>`__ +- `Do Inference <#do-inference>`__ +- `Prepare Data for Visualization <#prepare-data-for-visualization>`__ +- `Visualize data <#visualize-data>`__ .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: -Imports -######################################### + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 @@ -34,13 +42,18 @@ Imports import matplotlib.pyplot as plt import numpy as np import openvino as ov - import sys - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import segmentation_map_to_image, download_file -Download model weights -############################################################################################################################# +Download model weights +---------------------------------------------------------------- .. code:: ipython3 @@ -76,10 +89,10 @@ Download model weights model/road-segmentation-adas-0001.bin: 0%| | 0.00/720k [00:00`__ dataset is +A sample image from the `Mapillary +Vistas `__ dataset is provided. .. code:: ipython3 + # Download the image from the openvino_notebooks storage + image_filename = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/empty_road_mapillary.jpg", + directory="data" + ) + # The segmentation network expects images in BGR format. - image = cv2.imread("../data/image/empty_road_mapillary.jpg") + image = cv2.imread(str(image_filename)) rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_h, image_w, _ = image.shape @@ -145,19 +165,25 @@ provided. +.. parsed-literal:: + + data/empty_road_mapillary.jpg: 0%| | 0.00/227k [00:00 + -.. image:: 003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_11_1.png +.. image:: 003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_11_2.png -Do Inference -############################################################################################################################# +Do Inference +------------------------------------------------------ .. code:: ipython3 @@ -173,7 +199,7 @@ Do Inference .. parsed-literal:: - + @@ -181,8 +207,8 @@ Do Inference .. image:: 003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_13_1.png -Prepare Data for Visualization -############################################################################################################################# +Prepare Data for Visualization +------------------------------------------------------------------------ .. code:: ipython3 @@ -199,8 +225,8 @@ Prepare Data for Visualization # Create an image with mask. image_with_mask = cv2.addWeighted(resized_mask, alpha, rgb_image, 1 - alpha, 0) -Visualize data -############################################################################################################################# +Visualize data +-------------------------------------------------------- .. code:: ipython3 diff --git a/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_11_1.png b/docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_11_2.png similarity index 100% rename from docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_11_1.png rename to docs/notebooks/003-hello-segmentation-with-output_files/003-hello-segmentation-with-output_11_2.png diff --git a/docs/notebooks/003-hello-segmentation-with-output_files/index.html b/docs/notebooks/003-hello-segmentation-with-output_files/index.html index 2502da6e4073cf..150c980285e1be 100644 --- a/docs/notebooks/003-hello-segmentation-with-output_files/index.html +++ b/docs/notebooks/003-hello-segmentation-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/003-hello-segmentation-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/003-hello-segmentation-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/003-hello-segmentation-with-output_files/


../
-003-hello-segmentation-with-output_10_1.png        16-Aug-2023 01:31              249032
-003-hello-segmentation-with-output_12_1.png        16-Aug-2023 01:31               20550
-003-hello-segmentation-with-output_16_0.png        16-Aug-2023 01:31              260045
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/003-hello-segmentation-with-output_files/


../
+003-hello-segmentation-with-output_11_2.png        31-Oct-2023 00:35              249032
+003-hello-segmentation-with-output_13_1.png        31-Oct-2023 00:35               20550
+003-hello-segmentation-with-output_17_0.png        31-Oct-2023 00:35              260045
 

diff --git a/docs/notebooks/004-hello-detection-with-output.rst b/docs/notebooks/004-hello-detection-with-output.rst index 13a38e8e20d373..b9daf8d04adcaa 100644 --- a/docs/notebooks/004-hello-detection-with-output.rst +++ b/docs/notebooks/004-hello-detection-with-output.rst @@ -6,32 +6,40 @@ OpenVINO™. The `horizontal-text-detection-0001 `__ -model from `Open Model Zoo `__ is used. It +model from `Open Model +Zoo `__ is used. It detects horizontal text in images and returns a blob of data in the shape of ``[100, 5]``. Each detected text box is stored in the ``[x_min, y_min, x_max, y_max, conf]`` format, where the ``(x_min, y_min)`` are the coordinates of the top left bounding box corner, ``(x_max, y_max)`` are the coordinates of the bottom right bounding box corner and ``conf`` is the confidence for the predicted -class. +class. **Table of contents:** -- `Imports <#imports>`__ -- `Download model weights <#download-model-weights>`__ -- `Select inference device <#select-inference-device>`__ -- `Load the Model <#load-the-model>`__ -- `Load an Image <#load-an-image>`__ -- `Do Inference <#do-inference>`__ -- `Visualize Results <#visualize-results>`__ + +- `Imports <#imports>`__ +- `Download model weights <#download-model-weights>`__ +- `Select inference device <#select-inference-device>`__ +- `Load the Model <#load-the-model>`__ +- `Load an Image <#load-an-image>`__ +- `Do Inference <#do-inference>`__ +- `Visualize Results <#visualize-results>`__ .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: -Imports -######################################## + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 @@ -40,13 +48,18 @@ Imports import numpy as np import openvino as ov from pathlib import Path - import sys - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file -Download model weights -####################################################### +Download model weights +---------------------------------------------------------------- .. code:: ipython3 @@ -81,10 +94,10 @@ Download model weights model/horizontal-text-detection-0001.bin: 0%| | 0.00/7.39M [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/004-hello-detection-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/004-hello-detection-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/004-hello-detection-with-output_files/


../
-004-hello-detection-with-output_10_0.png           16-Aug-2023 01:31              305482
-004-hello-detection-with-output_15_0.png           16-Aug-2023 01:31              457214
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/004-hello-detection-with-output_files/


../
+004-hello-detection-with-output_11_1.png           31-Oct-2023 00:35              305482
+004-hello-detection-with-output_16_0.png           31-Oct-2023 00:35              457214
 

diff --git a/docs/notebooks/101-tensorflow-classification-to-openvino-with-output.rst b/docs/notebooks/101-tensorflow-classification-to-openvino-with-output.rst index 020bd5ff97d15e..654f9b4a72b1c4 100644 --- a/docs/notebooks/101-tensorflow-classification-to-openvino-with-output.rst +++ b/docs/notebooks/101-tensorflow-classification-to-openvino-with-output.rst @@ -3,39 +3,51 @@ Convert a TensorFlow Model to OpenVINO™ This short tutorial shows how to convert a TensorFlow `MobileNetV3 `__ -image classification model to OpenVINO `Intermediate Representation `__ -(OpenVINO IR) format, using `Model Conversion API `__. -After creating the OpenVINO IR, load the model in `OpenVINO Runtime `__ +image classification model to OpenVINO `Intermediate +Representation `__ +(OpenVINO IR) format, using `Model Conversion +API `__. +After creating the OpenVINO IR, load the model in `OpenVINO +Runtime `__ and do inference with a sample image. **Table of contents:** -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Download model <#download-model>`__ -- `Convert a Model to OpenVINO IR Format <#convert-a-model-to-openvino-ir-format>`__ - - `Convert a TensorFlow Model to OpenVINO IR Format <#convert-a-tensorflow-model-to-openvino-ir-format>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Download model <#download-model>`__ +- `Convert a Model to OpenVINO IR + Format <#convert-a-model-to-openvino-ir-format>`__ -- `Test Inference on the Converted Model <#test-inference-on-the-converted-model>`__ + - `Convert a TensorFlow Model to OpenVINO IR + Format <#convert-a-tensorflow-model-to-openvino-ir-format>`__ - - `Load the Model <#load-the-model>`__ +- `Test Inference on the Converted Model <#test-inference-on-the-converted-model>`__ -- `Select inference device <#select-inference-device>`__ + - `Load the Model <#load-the-model>`__ - - `Get Model Information <#get-model-information>`__ - - `Load an Image <#load-an-image>`__ - - `Do Inference <#do-inference>`__ +- `Select inference device <#select-inference-device>`__ -- `Timing <#timing>`__ + - `Get Model Information <#get-model-information>`__ + - `Load an Image <#load-an-image>`__ + - `Do Inference <#do-inference>`__ + +- `Timing <#timing>`__ .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: -Imports -############################################################################################################################### + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 @@ -47,18 +59,27 @@ Imports import numpy as np import openvino as ov import tensorflow as tf + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + + from notebook_utils import download_file .. parsed-literal:: - 2023-09-08 22:28:30.021569: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:28:30.056559: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-30 22:29:25.672741: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 22:29:25.706557: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:28:30.570158: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-30 22:29:26.218506: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -Settings -############################################################################################################################### +Settings +-------------------------------------------------- .. code:: ipython3 @@ -70,10 +91,11 @@ Settings ir_path = Path("model/v3-small_224_1.0_float.xml") -Download model -############################################################################################################################### +Download model +-------------------------------------------------------- -Load model using `tf.keras.applications api `__ +Load model using `tf.keras.applications +api `__ and save it to the disk. .. code:: ipython3 @@ -89,26 +111,13 @@ and save it to the disk. .. parsed-literal:: - 2023-09-08 22:28:31.436088: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2023-10-30 22:29:27.284203: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... .. parsed-literal:: WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. - - -.. parsed-literal:: - - 2023-09-08 22:28:35.666551: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,1,1,1024] - [[{{node inputs}}]] - 2023-09-08 22:28:38.807497: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,1,1,1024] - [[{{node inputs}}]] - WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 54). These functions will not be directly callable after loading. - - -.. parsed-literal:: - INFO:tensorflow:Assets written to: model/v3-small_224_1.0_float/assets @@ -117,11 +126,11 @@ and save it to the disk. INFO:tensorflow:Assets written to: model/v3-small_224_1.0_float/assets -Convert a Model to OpenVINO IR Format -############################################################################################################################### +Convert a Model to OpenVINO IR Format +------------------------------------------------------------------------------- -Convert a TensorFlow Model to OpenVINO IR Format -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert a TensorFlow Model to OpenVINO IR Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the model conversion Python API to convert the TensorFlow model to OpenVINO IR. The ``ov.convert_model`` function accept path to saved @@ -149,21 +158,21 @@ models. Exporting TensorFlow model to IR... This may take a few minutes. -Test Inference on the Converted Model -############################################################################################################################### +Test Inference on the Converted Model +------------------------------------------------------------------------------- -Load the Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 core = ov.Core() model = core.read_model(ir_path) -Select inference device -############################################################################################################################### +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -191,8 +200,8 @@ Select device from dropdown list for running inference using OpenVINO: compiled_model = core.compile_model(model=model, device_name=device.value) -Get Model Information -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Get Model Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -200,16 +209,22 @@ Get Model Information output_key = compiled_model.output(0) network_input_shape = input_key.shape -Load an Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load an Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Load an image, resize it, and convert it to the input shape of the network. .. code:: ipython3 + # Download the image from the openvino_notebooks storage + image_filename = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", + directory="data" + ) + # The MobileNet network expects images in RGB format. - image = cv2.cvtColor(cv2.imread(filename="../data/image/coco.jpg"), code=cv2.COLOR_BGR2RGB) + image = cv2.cvtColor(cv2.imread(filename=str(image_filename)), code=cv2.COLOR_BGR2RGB) # Resize the image to the network input shape. resized_image = cv2.resize(src=image, dsize=(224, 224)) @@ -221,11 +236,17 @@ network. -.. image:: 101-tensorflow-classification-to-openvino-with-output_files/101-tensorflow-classification-to-openvino-with-output_19_0.png +.. parsed-literal:: + + data/coco.jpg: 0%| | 0.00/202k [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/101-tensorflow-classification-to-openvino-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/101-tensorflow-classification-to-openvino-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/101-tensorflow-classification-to-openvino-with-output_files/


../
-101-tensorflow-classification-to-openvino-with-..> 16-Aug-2023 01:31              387941
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/101-tensorflow-classification-to-openvino-with-output_files/


../
+101-tensorflow-classification-to-openvino-with-..> 31-Oct-2023 00:35              387941
 

diff --git a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output.rst b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output.rst index 1a1d6acf91179c..81805f59100eb5 100644 --- a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output.rst +++ b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output.rst @@ -35,45 +35,57 @@ documentation `__ **Table of contents:** -- `Preparation <#preparation>`__ - - `Imports <#imports>`__ - - `Settings <#settings>`__ - - `Load Model <#load-model>`__ +- `Preparation <#preparation>`__ -- `ONNX Model Conversion <#onnx-model-conversion>`__ + - `Imports <#imports>`__ + - `Settings <#settings>`__ + - `Load Model <#load-model>`__ - - `Convert PyTorch model to ONNX <#convert-pytorch-model-to-onnx>`__ - - `Convert ONNX Model to OpenVINO IR Format <#convert-onnx-model-to-openvino-ir-format>`__ +- `ONNX Model Conversion <#onnx-model-conversion>`__ -- `Show Results <#show-results>`__ + - `Convert PyTorch model to + ONNX <#convert-pytorch-model-to-onnx>`__ + - `Convert ONNX Model to OpenVINO IR + Format <#convert-onnx-model-to-openvino-ir-format>`__ - - `Load and Preprocess an Input Image <#load-and-preprocess-an-input-image>`__ - - `Load the OpenVINO IR Network and Run Inference on the ONNX model <#load-the-openvino-ir-network-and-run-inference-on-the-onnx-model>`__ +- `Show Results <#show-results>`__ - - `1. ONNX Model in OpenVINO Runtime <#onnx-model-in-openvino-runtime>`__ - - `Select an inference device <#select-an-inference-device>`__ - - `2. OpenVINO IR Model in OpenVINO Runtime <#openvino-ir-model-in-openvino-runtime>`__ - - `Select the inference device <#select-the-inference-device>`__ + - `Load and Preprocess an Input + Image <#load-and-preprocess-an-input-image>`__ + - `Load the OpenVINO IR Network and Run Inference on the ONNX + model <#load-the-openvino-ir-network-and-run-inference-on-the-onnx-model>`__ -- `PyTorch Comparison <#pytorch-comparison>`__ -- `Performance Comparison <#performance-comparison>`__ -- `References <#references>`__ + - `1. ONNX Model in OpenVINO + Runtime <#-onnx-model-in-openvino-runtime>`__ + - `Select inference device <#select-inference-device>`__ + - `2. OpenVINO IR Model in OpenVINO + Runtime <#-openvino-ir-model-in-openvino-runtime>`__ + - `Select inference device <#select-inference-device>`__ + +- `PyTorch Comparison <#pytorch-comparison>`__ +- `Performance Comparison <#performance-comparison>`__ +- `References <#references>`__ .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" onnx + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. -Preparation -######################################################################## -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - import sys import time import warnings from pathlib import Path @@ -84,11 +96,17 @@ Imports import torch from torchvision.models.segmentation import lraspp_mobilenet_v3_large, LRASPP_MobileNet_V3_Large_Weights - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import segmentation_map_to_image, viz_result_image, SegmentationMap, Label, download_file -Settings -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Set a name for the model, then define width and height of the image that will be used by the network during inference. According to the input @@ -109,12 +127,14 @@ transforms function, the model is pre-trained on images with a height of onnx_path.parent.mkdir() ir_path = onnx_path.with_suffix(".xml") -Load Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Generally, PyTorch models represent an instance of ``torch.nn.Module`` class, initialized by a state dictionary with model weights. Typical -steps for getting a pre-trained model: 1. Create instance of model class +steps for getting a pre-trained model: + +1. Create instance of model class 2. Load checkpoint state dict, which contains pre-trained model weights 3. Turn model to evaluation for switching some operations to inference mode @@ -160,11 +180,11 @@ have not downloaded the model before. Loaded PyTorch LRASPP MobileNetV3 model -ONNX Model Conversion -################################################################################ +ONNX Model Conversion +--------------------------------------------------------------- -Convert PyTorch model to ONNX -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert PyTorch model to ONNX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenVINO supports PyTorch models that are exported in ONNX format. We will use the ``torch.onnx.export`` function to obtain the ONNX model, @@ -203,8 +223,8 @@ line of the output will read: ONNX model exported to model/lraspp_mobilenet_v3_large.onnx. -Convert ONNX Model to OpenVINO IR Format -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert ONNX Model to OpenVINO IR Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To convert the ONNX model to OpenVINO IR with ``FP16`` precision, use model conversion API. The models are saved inside the current directory. @@ -226,14 +246,14 @@ For more information on how to convert models, see this Exporting ONNX model to IR... This may take a few minutes. -Show Results -###################################################################### +Show Results +------------------------------------------------------ Confirm that the segmentation results look as expected by comparing model predictions on the ONNX, OpenVINO IR and PyTorch models. -Load and Preprocess an Input Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load and Preprocess an Input Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Images need to be normalized before propagating through the network. @@ -254,8 +274,13 @@ Images need to be normalized before propagating through the network. .. code:: ipython3 - image_filename = "../data/image/coco.jpg" - image = cv2.cvtColor(cv2.imread(image_filename), cv2.COLOR_BGR2RGB) + # Download the image from the openvino_notebooks storage + image_filename = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", + directory="data" + ) + + image = cv2.cvtColor(cv2.imread(str(image_filename)), cv2.COLOR_BGR2RGB) resized_image = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT)) normalized_image = normalize(resized_image) @@ -264,8 +289,15 @@ Images need to be normalized before propagating through the network. input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0) normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0) -Load the OpenVINO IR Network and Run Inference on the ONNX model -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + +.. parsed-literal:: + + data/coco.jpg: 0%| | 0.00/202k [00:00`__ - `Pytorch ONNX diff --git a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/index.html b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/index.html index d28d774056a68c..0030b3e836c9de 100644 --- a/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/index.html +++ b/docs/notebooks/102-pytorch-onnx-to-openvino-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/102-pytorch-onnx-to-openvino-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/102-pytorch-onnx-to-openvino-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/102-pytorch-onnx-to-openvino-with-output_files/


../
-102-pytorch-onnx-to-openvino-with-output_21_0.png  16-Aug-2023 01:31              465692
-102-pytorch-onnx-to-openvino-with-output_26_0.png  16-Aug-2023 01:31              465695
-102-pytorch-onnx-to-openvino-with-output_28_0.png  16-Aug-2023 01:31              465692
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/102-pytorch-onnx-to-openvino-with-output_files/


../
+102-pytorch-onnx-to-openvino-with-output_22_0.png  31-Oct-2023 00:35              465692
+102-pytorch-onnx-to-openvino-with-output_27_0.png  31-Oct-2023 00:35              465695
+102-pytorch-onnx-to-openvino-with-output_29_0.png  31-Oct-2023 00:35              465692
 

diff --git a/docs/notebooks/102-pytorch-to-openvino-with-output.rst b/docs/notebooks/102-pytorch-to-openvino-with-output.rst index 6b962778c1404c..71a724522deb89 100644 --- a/docs/notebooks/102-pytorch-to-openvino-with-output.rst +++ b/docs/notebooks/102-pytorch-to-openvino-with-output.rst @@ -31,44 +31,66 @@ regimes. **Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Load PyTorch Model <#load-pytorch-model>`__ - - `Prepare Input Data <#prepare-input-data>`__ - - `Run PyTorch Model Inference <#run-pytorch-model-inference>`__ - - `Benchmark PyTorch Model Inference <#benchmark-pytorch-model-inference>`__ +- `Prerequisites <#prerequisites>`__ +- `Load PyTorch Model <#load-pytorch-model>`__ + + - `Prepare Input Data <#prepare-input-data>`__ + - `Run PyTorch Model + Inference <#run-pytorch-model-inference>`__ + - `Benchmark PyTorch Model + Inference <#benchmark-pytorch-model-inference>`__ + +- `Convert PyTorch Model to OpenVINO Intermediate + Representation <#convert-pytorch-model-to-openvino-intermediate-representation>`__ + + - `Select inference device <#select-inference-device>`__ + - `Run OpenVINO Model + Inference <#run-openvino-model-inference>`__ + - `Benchmark OpenVINO Model + Inference <#benchmark-openvino-model-inference>`__ + +- `Convert PyTorch Model with Static Input + Shape <#convert-pytorch-model-with-static-input-shape>`__ + + - `Select inference device <#select-inference-device>`__ + - `Run OpenVINO Model Inference with Static Input + Shape <#run-openvino-model-inference-with-static-input-shape>`__ + - `Benchmark OpenVINO Model Inference with Static Input + Shape <#benchmark-openvino-model-inference-with-static-input-shape>`__ + +- `Convert TorchScript Model to OpenVINO Intermediate + Representation <#convert-torchscript-model-to-openvino-intermediate-representation>`__ + + - `Scripted Model <#scripted-model>`__ + - `Benchmark Scripted Model + Inference <#benchmark-scripted-model-inference>`__ + - `Convert PyTorch Scripted Model to OpenVINO Intermediate + Representation <#convert-pytorch-scripted-model-to-openvino-intermediate-representation>`__ + - `Benchmark OpenVINO Model Inference Converted From Scripted + Model <#benchmark-openvino-model-inference-converted-from-scripted-model>`__ + - `Traced Model <#traced-model>`__ + - `Benchmark Traced Model + Inference <#benchmark-traced-model-inference>`__ + - `Convert PyTorch Traced Model to OpenVINO Intermediate + Representation <#convert-pytorch-traced-model-to-openvino-intermediate-representation>`__ + - `Benchmark OpenVINO Model Inference Converted From Traced + Model <#benchmark-openvino-model-inference-converted-from-traced-model>`__ + +Prerequisites +------------------------------------------------------- -- `Convert PyTorch Model to OpenVINO Intermediate Representation <#convert-pytorch-model-to-openvino-intermediate-representation>`__ - - - `Select inference device <#select-inference-device>`__ - - `Run OpenVINO Model Inference <#run-openvino-model-inference>`__ - - `Benchmark OpenVINO Model Inference <#benchmark-openvino-model-inference>`__ - -- `Convert PyTorch Model with Static Input Shape <#convert-pytorch-model-with-static-input-shape>`__ - - - `Select inference device <#select-inference-device>`__ - - `Run OpenVINO Model Inference with Static Input Shape <#run-openvino-model-inference-with-static-input-shape>`__ - - `Benchmark OpenVINO Model Inference with Static Input Shape <#benchmark-openvino-model-inference-with-static-input-shape>`__ +Install notebook dependencies -- `Convert TorchScript Model to OpenVINO Intermediate Representation <#convert-torchscript-model-to-openvino-intermediate-representation>`__ +.. code:: ipython3 - - `Scripted Model <#scripted-model>`__ - - `Benchmark Scripted Model Inference <#benchmark-scripted-model-inference>`__ - - `Convert PyTorch Scripted Model to OpenVINO Intermediate Representation <#convert-pytorch-scripted-model-to-openvino-intermediate-representation>`__ - - `Benchmark OpenVINO Model Inference Converted From Scripted Model <#benchmark-openvino-model-inference-converted-from-scripted-model>`__ - - `Traced Model <#traced-model>`__ - - `Benchmark Traced Model Inference <#benchmark-traced-model-inference>`__ - - `Convert PyTorch Traced Model to OpenVINO Intermediate Representation <#convert-pytorch-traced-model-to-openvino-intermediate-representation>`__ - - `Benchmark OpenVINO Model Inference Converted From Traced Model <#benchmark-openvino-model-inference-converted-from-traced-model>`__ + %pip install -q "openvino>=2023.1.0" scipy -Prerequisites -############################################################################################################################### -Install notebook dependencies +.. parsed-literal:: -.. code:: ipython3 + Note: you may need to restart the kernel to use updated packages. - !pip install -q "openvino==2023.1.0.dev20230811" scipy Download input data and label map @@ -96,8 +118,8 @@ Download input data and label map imagenet_classes = labels_file.open("r").read().splitlines() -Load PyTorch Model -############################################################################################################################### +Load PyTorch Model +------------------------------------------------------------ Generally, PyTorch models represent an instance of the ``torch.nn.Module`` class, initialized by a state dictionary with model @@ -127,8 +149,8 @@ enum ``RegNet_Y_800MF_Weights.DEFAULT``. # switch model to inference mode model.eval(); -Prepare Input Data -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Prepare Input Data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The code below demonstrates how to preprocess input data using a model-specific transforms module from ``torchvision``. After @@ -149,8 +171,8 @@ the first dimension. # Add batch dimension to image tensor input_tensor = img_transformed.unsqueeze(0) -Run PyTorch Model Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run PyTorch Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The model returns a vector of probabilities in raw logits format, softmax can be applied to get normalized values in the [0, 1] range. For @@ -205,8 +227,8 @@ can be reused later. 5: hamper - 2.35% -Benchmark PyTorch Model Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Benchmark PyTorch Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -218,11 +240,11 @@ Benchmark PyTorch Model Inference .. parsed-literal:: - 13.5 ms ± 5.61 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + 17.5 ms ± 9.66 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) -Convert PyTorch Model to OpenVINO Intermediate Representation -############################################################################################################################### +Convert PyTorch Model to OpenVINO Intermediate Representation +------------------------------------------------------------------------------------------------------- Starting from the 2023.0 release OpenVINO supports direct PyTorch models conversion to OpenVINO Intermediate Representation (IR) format. OpenVINO @@ -230,13 +252,11 @@ model conversion API should be used for these purposes. More details regarding PyTorch model conversion can be found in OpenVINO `documentation `__ -.. note:: - - Please, take into account that direct support PyTorch + **Note**: Please, take into account that direct support PyTorch models conversion is an experimental feature. Model coverage will be increased in the next releases. For cases, when PyTorch model conversion failed, you still can try to export the model to ONNX - format. Please, refer to this + format. Please refer to this `tutorial <102-pytorch-to-openvino-with-output.html>`__ which explains how to convert PyTorch model to ONNX, then to OpenVINO @@ -274,24 +294,6 @@ More details can be found on this ov_model -.. parsed-literal:: - - 2023-09-08 22:29:26.465675: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:29:26.497093: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:29:27.072823: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -.. parsed-literal:: - - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - - .. parsed-literal:: @@ -306,10 +308,10 @@ More details can be found on this -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -354,8 +356,8 @@ Select device from dropdown list for running inference using OpenVINO: -Run OpenVINO Model Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run OpenVINO Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -385,8 +387,8 @@ Run OpenVINO Model Inference 5: hamper - 2.35% -Benchmark OpenVINO Model Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Benchmark OpenVINO Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -397,11 +399,11 @@ Benchmark OpenVINO Model Inference .. parsed-literal:: - 3.16 ms ± 13.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + 3.21 ms ± 12 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) -Convert PyTorch Model with Static Input Shape -############################################################################################################################### +Convert PyTorch Model with Static Input Shape +--------------------------------------------------------------------------------------- The default conversion path preserves dynamic input shapes, in order if you want to convert the model with static shapes, you can explicitly @@ -433,10 +435,10 @@ reshaping example please check the following -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -476,8 +478,8 @@ Now, we can see that input of our converted model is tensor of shape [1, 3, 224, 224] instead of [?, 3, ?, ?] reported by previously converted model. -Run OpenVINO Model Inference with Static Input Shape -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run OpenVINO Model Inference with Static Input Shape +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -507,8 +509,8 @@ Run OpenVINO Model Inference with Static Input Shape 5: hamper - 2.35% -Benchmark OpenVINO Model Inference with Static Input Shape -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Benchmark OpenVINO Model Inference with Static Input Shape +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -519,11 +521,11 @@ Benchmark OpenVINO Model Inference with Static Input Shape .. parsed-literal:: - 2.81 ms ± 20.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + 2.79 ms ± 12 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) -Convert TorchScript Model to OpenVINO Intermediate Representation -############################################################################################################################### +Convert TorchScript Model to OpenVINO Intermediate Representation +----------------------------------------------------------------------------------------------------------- TorchScript is a way to create serializable and optimizable models from PyTorch code. Any TorchScript program can be saved from a Python process @@ -543,8 +545,8 @@ There are 2 possible ways to convert the PyTorch model to TorchScript: Let’s consider both approaches and their conversion into OpenVINO IR. -Scripted Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Scripted Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``torch.jit.script`` inspects model source code and compiles it to ``ScriptModule``. After compilation model can be used for inference or @@ -596,8 +598,8 @@ Reference -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/102-pytorch-to-openvino-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/102-pytorch-to-openvino-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/102-pytorch-to-openvino-with-output_files/


../
-102-pytorch-to-openvino-with-output_11_0.jpg       16-Aug-2023 01:31               54874
-102-pytorch-to-openvino-with-output_11_0.png       16-Aug-2023 01:31              542516
-102-pytorch-to-openvino-with-output_20_0.jpg       16-Aug-2023 01:31               54874
-102-pytorch-to-openvino-with-output_20_0.png       16-Aug-2023 01:31              542516
-102-pytorch-to-openvino-with-output_31_0.jpg       16-Aug-2023 01:31               54874
-102-pytorch-to-openvino-with-output_31_0.png       16-Aug-2023 01:31              542516
-102-pytorch-to-openvino-with-output_35_0.jpg       16-Aug-2023 01:31               54874
-102-pytorch-to-openvino-with-output_35_0.png       16-Aug-2023 01:31              542516
-102-pytorch-to-openvino-with-output_39_0.jpg       16-Aug-2023 01:31               54874
-102-pytorch-to-openvino-with-output_39_0.png       16-Aug-2023 01:31              542516
-102-pytorch-to-openvino-with-output_43_0.jpg       16-Aug-2023 01:31               54874
-102-pytorch-to-openvino-with-output_43_0.png       16-Aug-2023 01:31              542516
-102-pytorch-to-openvino-with-output_47_0.jpg       16-Aug-2023 01:31               54874
-102-pytorch-to-openvino-with-output_47_0.png       16-Aug-2023 01:31              542516
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/102-pytorch-to-openvino-with-output_files/


../
+102-pytorch-to-openvino-with-output_11_0.jpg       31-Oct-2023 00:35               54874
+102-pytorch-to-openvino-with-output_11_0.png       31-Oct-2023 00:35              542516
+102-pytorch-to-openvino-with-output_20_0.jpg       31-Oct-2023 00:35               54874
+102-pytorch-to-openvino-with-output_20_0.png       31-Oct-2023 00:35              542516
+102-pytorch-to-openvino-with-output_31_0.jpg       31-Oct-2023 00:35               54874
+102-pytorch-to-openvino-with-output_31_0.png       31-Oct-2023 00:35              542516
+102-pytorch-to-openvino-with-output_35_0.jpg       31-Oct-2023 00:35               54874
+102-pytorch-to-openvino-with-output_35_0.png       31-Oct-2023 00:35              542516
+102-pytorch-to-openvino-with-output_39_0.jpg       31-Oct-2023 00:35               54874
+102-pytorch-to-openvino-with-output_39_0.png       31-Oct-2023 00:35              542516
+102-pytorch-to-openvino-with-output_43_0.jpg       31-Oct-2023 00:35               54874
+102-pytorch-to-openvino-with-output_43_0.png       31-Oct-2023 00:35              542516
+102-pytorch-to-openvino-with-output_47_0.jpg       31-Oct-2023 00:35               54874
+102-pytorch-to-openvino-with-output_47_0.png       31-Oct-2023 00:35              542516
 

diff --git a/docs/notebooks/103-paddle-to-openvino-classification-with-output.rst b/docs/notebooks/103-paddle-to-openvino-classification-with-output.rst index c0315dfcc595df..50ec3305ec903d 100644 --- a/docs/notebooks/103-paddle-to-openvino-classification-with-output.rst +++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output.rst @@ -16,40 +16,50 @@ Source of the **Table of contents:** -- `Preparation <#preparation>`__ - - `Imports <#imports>`__ - - `Settings <#settings>`__ +- `Preparation <#preparation>`__ -- `Show Inference on PaddlePaddle Model <#show-inference-on-paddlepaddle-model>`__ -- `Convert the Model to OpenVINO IR Format <#convert-the-model-to-openvino-ir-format>`__ -- `Select inference device <#select-inference-device>`__ -- `Show Inference on OpenVINO Model <#show-inference-on-openvino-model>`__ -- `Timing and Comparison <#timing-and-comparison>`__ -- `Select inference device <#select-inference-device>`__ -- `References <#references>`__ + - `Imports <#imports>`__ + - `Settings <#settings>`__ -Preparation -############################################################################################################################### +- `Show Inference on PaddlePaddle + Model <#show-inference-on-paddlepaddle-model>`__ +- `Convert the Model to OpenVINO IR + Format <#convert-the-model-to-openvino-ir-format>`__ +- `Select inference device <#select-inference-device>`__ +- `Show Inference on OpenVINO + Model <#show-inference-on-openvino-model>`__ +- `Timing and Comparison <#timing-and-comparison>`__ +- `Select inference device <#select-inference-device>`__ +- `References <#references>`__ -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 import sys if sys.version_info.minor > 7: - !pip install -q "paddlepaddle>=2.5.0" + %pip install -q "paddlepaddle>=2.5.1" else: - !pip install -q "paddlepaddle==2.4.2" + %pip install -q "paddlepaddle==2.4.2" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 !pip install -q paddleclas --no-deps !pip install -q "prettytable" "ujson" "visualdl>=2.2.0" "faiss-cpu>=1.7.1" # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + !pip install -q "openvino>=2023.1.0" .. parsed-literal:: @@ -60,12 +70,31 @@ Imports paddleclas 2.5.1 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. +.. code:: ipython3 + + import platform + + if (platform.system() == "Linux"): + !wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + !sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + + +.. parsed-literal:: + + --2023-10-30 22:31:22-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb + Resolving proxy-mu.intel.com (proxy-mu.intel.com)... 10.217.247.236 + Connecting to proxy-mu.intel.com (proxy-mu.intel.com)|10.217.247.236|:911... connected. + Proxy request sent, awaiting response... 404 Not Found + 2023-10-30 22:31:22 ERROR 404: Not Found. + + dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory + + .. code:: ipython3 import time import tarfile from pathlib import Path - import sys import matplotlib.pyplot as plt import numpy as np @@ -73,18 +102,24 @@ Imports from paddleclas import PaddleClas from PIL import Image - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file .. parsed-literal:: - 2023-09-08 22:30:09 INFO: Loading faiss with AVX2 support. - 2023-09-08 22:30:09 INFO: Successfully loaded faiss with AVX2 support. + 2023-10-30 22:31:24 INFO: Loading faiss with AVX2 support. + 2023-10-30 22:31:24 INFO: Successfully loaded faiss with AVX2 support. -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Set ``IMAGE_FILENAME`` to the filename of an image to use. Set ``MODEL_NAME`` to the PaddlePaddle model to download from PaddleHub. @@ -101,7 +136,13 @@ PaddleHub. This may take a while. .. code:: ipython3 - IMAGE_FILENAME = "../data/image/coco_close.png" + # Download the image from the openvino_notebooks storage + img = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_close.png", + directory="data" + ) + + IMAGE_FILENAME = img.as_posix() MODEL_NAME = "MobileNetV3_large_x1_0" MODEL_DIR = Path("model") @@ -118,6 +159,12 @@ PaddleHub. This may take a while. +.. parsed-literal:: + + data/coco_close.png: 0%| | 0.00/133k [00:00 + -.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_15_3.png +.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_16_3.png To decode the labels predicted by the model to names of classes, we need @@ -250,8 +297,8 @@ OpenVINO model. partition = line.split("\n")[0].partition(" ") class_id_map[int(partition[0])] = str(partition[-1]) -Convert the Model to OpenVINO IR Format -############################################################################################################################### +Convert the Model to OpenVINO IR Format +--------------------------------------------------------------------------------- Call the OpenVINO Model Conversion API to convert the PaddlePaddle model to OpenVINO IR, with FP32 precision. ``ov.convert_model`` function @@ -271,10 +318,10 @@ for more information about the Model Conversion API. else: print(f"{model_xml} already exists.") -Select inference device -############################################################################################################################### +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -299,8 +346,8 @@ Select device from dropdown list for running inference using OpenVINO: -Show Inference on OpenVINO Model -############################################################################################################################### +Show Inference on OpenVINO Model +-------------------------------------------------------------------------- Load the IR model, get model information, load the image, do inference, convert the inference to a meaningful result, and show the output. See @@ -344,11 +391,11 @@ information. -.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_23_1.png +.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_24_1.png -Timing and Comparison -############################################################################################################################### +Timing and Comparison +--------------------------------------------------------------- Measure the time it takes to do inference on fifty images and compare the result. The timing information gives an indication of performance. @@ -401,7 +448,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0070 seconds per image, FPS: 143.05 + PaddlePaddle model on CPU: 0.0071 seconds per image, FPS: 141.73 PaddlePaddle result: Labrador retriever, 0.75138 @@ -412,13 +459,13 @@ Note that many optimizations are possible to improve the performance. -.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_27_1.png +.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_28_1.png -Select inference device -############################################################################################################################### +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -463,7 +510,7 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0030 seconds per image, FPS: 337.80 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0030 seconds per image, FPS: 328.24 OpenVINO result: Labrador retriever, 0.74909 @@ -474,11 +521,11 @@ Select device from dropdown list for running inference using OpenVINO: -.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_30_1.png +.. image:: 103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_31_1.png -References -############################################################################################################################### +References +---------------------------------------------------- - `PaddleClas `__ - `OpenVINO PaddlePaddle diff --git a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_15_3.png b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_16_3.png similarity index 100% rename from docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_15_3.png rename to docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_16_3.png diff --git a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_23_1.png b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_24_1.png similarity index 100% rename from docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_23_1.png rename to docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_24_1.png diff --git a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_27_1.png b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_28_1.png similarity index 100% rename from docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_27_1.png rename to docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_28_1.png diff --git a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_30_1.png b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_31_1.png similarity index 100% rename from docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_30_1.png rename to docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_31_1.png diff --git a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_8_1.png b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_9_1.png similarity index 100% rename from docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_8_1.png rename to docs/notebooks/103-paddle-to-openvino-classification-with-output_files/103-paddle-to-openvino-classification-with-output_9_1.png diff --git a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/index.html b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/index.html index ca87d2ee17e666..8b1207e9e33259 100644 --- a/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/index.html +++ b/docs/notebooks/103-paddle-to-openvino-classification-with-output_files/index.html @@ -1,11 +1,11 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/103-paddle-to-openvino-classification-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/103-paddle-to-openvino-classification-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/103-paddle-to-openvino-classification-with-output_files/


../
-103-paddle-to-openvino-classification-with-outp..> 16-Aug-2023 01:31              120883
-103-paddle-to-openvino-classification-with-outp..> 16-Aug-2023 01:31              224886
-103-paddle-to-openvino-classification-with-outp..> 16-Aug-2023 01:31              224886
-103-paddle-to-openvino-classification-with-outp..> 16-Aug-2023 01:31              224886
-103-paddle-to-openvino-classification-with-outp..> 16-Aug-2023 01:31              224886
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/103-paddle-to-openvino-classification-with-output_files/


../
+103-paddle-to-openvino-classification-with-outp..> 31-Oct-2023 00:35              120883
+103-paddle-to-openvino-classification-with-outp..> 31-Oct-2023 00:35              224886
+103-paddle-to-openvino-classification-with-outp..> 31-Oct-2023 00:35              224886
+103-paddle-to-openvino-classification-with-outp..> 31-Oct-2023 00:35              224886
+103-paddle-to-openvino-classification-with-outp..> 31-Oct-2023 00:35              224886
 

diff --git a/docs/notebooks/104-model-tools-with-output.rst b/docs/notebooks/104-model-tools-with-output.rst index c098431d60caff..3b7b163a6fb07a 100644 --- a/docs/notebooks/104-model-tools-with-output.rst +++ b/docs/notebooks/104-model-tools-with-output.rst @@ -8,22 +8,28 @@ the model. **Table of contents:** -- `OpenVINO and Open Model Zoo Tools <#openvino-and-open-model-zoo-tools>`__ -- `Preparation <#preparation>`__ - - `Model Name <#model-name>`__ - - `Imports <#imports>`__ - - `Settings and Configuration <#settings-and-configuration>`__ +- `OpenVINO and Open Model Zoo + Tools <#openvino-and-open-model-zoo-tools>`__ +- `Preparation <#preparation>`__ -- `Download a Model from Open Model Zoo <#download-a-model-from-open-model-zoo>`__ -- `Convert a Model to OpenVINO IR format <#convert-a-model-to-openvino-ir-format>`__ -- `Get Model Information <#get-model-information>`__ -- `Run Benchmark Tool <#run-benchmark-tool>`__ + - `Model Name <#model-name>`__ + - `Imports <#imports>`__ + - `Settings and + Configuration <#settings-and-configuration>`__ - - `Benchmark with Different Settings <#benchmark-with-different-settings>`__ +- `Download a Model from Open Model + Zoo <#download-a-model-from-open-model-zoo>`__ +- `Convert a Model to OpenVINO IR + format <#convert-a-model-to-openvino-ir-format>`__ +- `Get Model Information <#get-model-information>`__ +- `Run Benchmark Tool <#run-benchmark-tool>`__ + + - `Benchmark with Different + Settings <#benchmark-with-different-settings>`__ OpenVINO and Open Model Zoo Tools -############################################################################################################################### +--------------------------------------------------------------------------- OpenVINO and Open Model Zoo tools are listed in the table below. @@ -46,13 +52,19 @@ OpenVINO and Open Model Zoo tools are listed in the table below. .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino-dev>=2023.1.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Preparation -############################################################################################################################### +----------------------------------------------------- Model Name -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Set ``model_name`` to the name of the Open Model Zoo model to use in this notebook. Refer to the list of @@ -68,22 +80,26 @@ pre-trained models for a full list of models that can be used. Set model_name = "mobilenet-v2-pytorch" Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 import json - import sys from pathlib import Path import openvino as ov from IPython.display import Markdown, display - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) from notebook_utils import DeviceNotFoundAlert, NotebookAlert Settings and Configuration -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Set the file and directory paths. By default, this notebook downloads models from Open Model Zoo to the ``open_model_zoo_models`` directory in @@ -122,7 +138,7 @@ The following settings can be changed: Download a Model from Open Model Zoo -############################################################################################################################### +------------------------------------------------------------------------------ Specify, display and run the Model Downloader command to download the model. @@ -162,7 +178,7 @@ Downloading mobilenet-v2-pytorch… Convert a Model to OpenVINO IR format -############################################################################################################################### +------------------------------------------------------------------------------- Specify, display and run the Model Converter command to convert the model to OpenVINO IR format. Model conversion may take a while. The @@ -198,25 +214,25 @@ Converting mobilenet-v2-pytorch… .. parsed-literal:: ========== Converting mobilenet-v2-pytorch to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-name=mobilenet_v2 --weights=model/public/mobilenet-v2-pytorch/mobilenet_v2-b0353104.pth --import-module=torchvision.models --input-shape=1,3,224,224 --output-file=model/public/mobilenet-v2-pytorch/mobilenet-v2.onnx --input-names=data --output-names=prob + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-name=mobilenet_v2 --weights=model/public/mobilenet-v2-pytorch/mobilenet_v2-b0353104.pth --import-module=torchvision.models --input-shape=1,3,224,224 --output-file=model/public/mobilenet-v2-pytorch/mobilenet-v2.onnx --input-names=data --output-names=prob ONNX check passed successfully. ========== Converting mobilenet-v2-pytorch to IR (FP16) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=/tmp/tmp9rrzi7ey --model_name=mobilenet-v2-pytorch --input=data '--mean_values=data[123.675,116.28,103.53]' '--scale_values=data[58.624,57.12,57.375]' --reverse_input_channels --output=prob --input_model=model/public/mobilenet-v2-pytorch/mobilenet-v2.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 224, 224]' --compress_to_fp16=True + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/mobilenet-v2-pytorch/FP16 --model_name=mobilenet-v2-pytorch --input=data '--mean_values=data[123.675,116.28,103.53]' '--scale_values=data[58.624,57.12,57.375]' --reverse_input_channels --output=prob --input_model=model/public/mobilenet-v2-pytorch/mobilenet-v2.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 224, 224]' --compress_to_fp16=True - [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression by removing argument --compress_to_fp16 or set it to false --compress_to_fp16=False. - Find more information about compression to FP16 at https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. + Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. - Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html + Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /tmp/tmp9rrzi7ey/mobilenet-v2-pytorch.xml - [ SUCCESS ] BIN file: /tmp/tmp9rrzi7ey/mobilenet-v2-pytorch.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/104-model-tools/model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/104-model-tools/model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.bin Get Model Information -############################################################################################################################### +--------------------------------------------------------------- The Info Dumper prints the following information for Open Model Zoo models: @@ -261,8 +277,8 @@ information in a dictionary. 'description': 'MobileNet V2 is image classification model pre-trained on ImageNet dataset. This is a PyTorch* implementation of MobileNetV2 architecture as described in the paper "Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation" .\nThe model input is a blob that consists of a single image of "1, 3, 224, 224" in "RGB" order.\nThe model output is typical object classifier for the 1000 different classifications matching with those in the ImageNet database.', 'framework': 'pytorch', 'license_url': 'https://raw.githubusercontent.com/pytorch/vision/master/LICENSE', - 'accuracy_config': '/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/mobilenet-v2-pytorch/accuracy-check.yml', - 'model_config': '/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/mobilenet-v2-pytorch/model.yml', + 'accuracy_config': '/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/mobilenet-v2-pytorch/accuracy-check.yml', + 'model_config': '/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/mobilenet-v2-pytorch/model.yml', 'precisions': ['FP16', 'FP32'], 'quantization_output_precisions': ['FP16-INT8', 'FP32-INT8'], 'subdirectory': 'public/mobilenet-v2-pytorch', @@ -294,8 +310,8 @@ file. model/public/mobilenet-v2-pytorch/FP16/mobilenet-v2-pytorch.xml exists: True -Run Benchmark Tool -############################################################################################################################### +Run Benchmark Tool +------------------------------------------------------------ By default, Benchmark Tool runs inference for 60 seconds in asynchronous mode on CPU. It returns inference speed as latency (milliseconds per @@ -331,18 +347,18 @@ seconds… [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ INFO ] CPU - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 23.78 ms + [ INFO ] Read model took 30.99 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] data (node: data) : f32 / [N,C,H,W] / [1,3,224,224] @@ -356,10 +372,10 @@ seconds… [ INFO ] Model outputs: [ INFO ] prob (node: prob) : f32 / [...] / [1,1000] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 127.94 ms + [ INFO ] Compile model took 148.02 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NETWORK_NAME: main_graph [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 [ INFO ] NUM_STREAMS: 6 [ INFO ] AFFINITY: Affinity.CORE @@ -380,21 +396,21 @@ seconds… [ INFO ] Fill input 'data' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 6.41 ms + [ INFO ] First inference took 6.55 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 20136 iterations - [ INFO ] Duration: 15005.77 ms + [ INFO ] Count: 20166 iterations + [ INFO ] Duration: 15006.87 ms [ INFO ] Latency: - [ INFO ] Median: 4.33 ms - [ INFO ] Average: 4.33 ms - [ INFO ] Min: 2.33 ms - [ INFO ] Max: 12.04 ms - [ INFO ] Throughput: 1341.88 FPS + [ INFO ] Median: 4.32 ms + [ INFO ] Average: 4.32 ms + [ INFO ] Min: 2.50 ms + [ INFO ] Max: 12.20 ms + [ INFO ] Throughput: 1343.78 FPS -Benchmark with Different Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Benchmark with Different Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``benchmark_app`` tool displays logging information that is not always necessary. A more compact result is achieved when the output is @@ -418,9 +434,7 @@ In the next cell, define the ``benchmark_model()`` function that calls ``benchmark_app``. This makes it easy to try different combinations. In the cell below that, you display available devices on the system. -.. note:: - - In this notebook, ``benchmark_app`` runs for 15 seconds to + **Note**: In this notebook, ``benchmark_app`` runs for 15 seconds to give a quick indication of performance. For more accurate performance, it is recommended to run inference for at least one minute by setting the ``t`` parameter to 60 or higher, and run diff --git a/docs/notebooks/105-language-quantize-bert-with-output.rst b/docs/notebooks/105-language-quantize-bert-with-output.rst index a81b93db98bb4f..e3fddbbfb9ca35 100644 --- a/docs/notebooks/105-language-quantize-bert-with-output.rst +++ b/docs/notebooks/105-language-quantize-bert-with-output.rst @@ -14,40 +14,53 @@ Research Paraphrase Corpus will be used. The tutorial is designed to be extendable to custom models and datasets. It consists of the following steps: -- Download and prepare the BERT model and MRPC dataset. -- Define data loading and accuracy validation functionality. -- Prepare the model for quantization. -- Run optimization pipeline. -- Load and test quantized model. -- Compare the performance of the original, converted and quantized models. +- Download and prepare the BERT model and MRPC dataset. +- Define data loading and accuracy validation functionality. +- Prepare the model for quantization. +- Run optimization pipeline. +- Load and test quantized model. +- Compare the performance of the original, converted and quantized + models. **Table of contents:** -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Prepare the Model <#prepare-the-model>`__ -- `Prepare the Dataset <#prepare-the-dataset>`__ -- `Optimize model using NNCF Post-training Quantization API <#optimize-model-using-nncf-post-training-quantization-api>`__ -- `Load and Test OpenVINO Model <#load-and-test-openvino-model>`__ - - `Select inference device <#select-inference-device>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Prepare the Model <#prepare-the-model>`__ +- `Prepare the Dataset <#prepare-the-dataset>`__ +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ +- `Load and Test OpenVINO + Model <#load-and-test-openvino-model>`__ -- `Compare F1-score of FP32 and INT8 models <#compare-f1-score-of-fp32-and-int8-models>`__ -- `Compare Performance of the Original, Converted and Quantized Models <#compare-performance-of-the-original-converted-and-quantized-models>`__ + - `Select inference device <#select-inference-device>`__ + +- `Compare F1-score of FP32 and INT8 + models <#compare-f-score-of-fp-and-int-models>`__ +- `Compare Performance of the Original, Converted and Quantized + Models <#compare-performance-of-the-original-converted-and-quantized-models>`__ .. code:: ipython3 - !pip install -q "nncf>=2.5.0" - !pip install -q transformers datasets evaluate - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "nncf>=2.5.0" + %pip install -q "transformers" datasets evaluate + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: -Imports -############################################################################################################################### + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 import os - import sys import time from pathlib import Path from zipfile import ZipFile @@ -63,16 +76,21 @@ Imports import torch from transformers import BertForSequenceClassification, BertTokenizer - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) from notebook_utils import download_file .. parsed-literal:: - 2023-09-08 22:31:58.502786: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:31:58.537414: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-30 22:33:08.247649: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 22:33:08.281400: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:31:59.115585: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-30 22:33:08.912908: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -80,8 +98,8 @@ Imports INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -Settings -############################################################################################################################### +Settings +-------------------------------------------------- .. code:: ipython3 @@ -95,8 +113,8 @@ Settings os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(MODEL_DIR, exist_ok=True) -Prepare the Model -############################################################################################################################### +Prepare the Model +----------------------------------------------------------- Perform the following: @@ -162,13 +180,22 @@ PyTorch model formats are supported: .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + + +.. parsed-literal:: + + WARNING:nncf:NNCF provides best results with torch==2.0.1, while current torch version is 2.1.0+cpu. If you encounter issues, consider switching to torch==2.0.1 + + +.. parsed-literal:: + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/annotations.py:309: UserWarning: TorchScript will treat type annotations of Tensor dtype-specific subtypes as if they are normal Tensors. dtype constraints are not enforced in compilation either. - warnings.warn("TorchScript will treat type annotations of Tensor " + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/annotations.py:386: UserWarning: TorchScript will treat type annotations of Tensor dtype-specific subtypes as if they are normal Tensors. dtype constraints are not enforced in compilation either. + warnings.warn( -Prepare the Dataset -############################################################################################################################### +Prepare the Dataset +------------------------------------------------------------- We download the `General Language Understanding Evaluation (GLUE) `__ dataset for the MRPC task from @@ -192,8 +219,15 @@ tokenizer from HuggingFace. data_source = create_data_source() -Optimize model using NNCF Post-training Quantization API -############################################################################################################################### + + +.. parsed-literal:: + + Map: 0%| | 0/408 [00:00`__ provides a suite of advanced algorithms for Neural Networks inference optimization in @@ -229,179 +263,19 @@ The optimization process contains the following steps: .. parsed-literal:: - INFO:nncf:202 ignored nodes was found by types in the NNCFGraph - INFO:nncf:24 ignored nodes was found by name in the NNCFGraph - INFO:nncf:Not adding activation input quantizer for operation: 19 __module.bert/aten::rsub/Multiply - INFO:nncf:Not adding activation input quantizer for operation: 22 __module.bert/aten::rsub/Subtract - INFO:nncf:Not adding activation input quantizer for operation: 25 __module.bert/aten::mul/Multiply - INFO:nncf:Not adding activation input quantizer for operation: 11 __module.bert.embeddings/aten::add/Add_15 - INFO:nncf:Not adding activation input quantizer for operation: 14 __module.bert.embeddings/aten::add_/Add - INFO:nncf:Not adding activation input quantizer for operation: 17 __module.bert.embeddings.LayerNorm/aten::layer_norm/MVN - 20 __module.bert.embeddings.LayerNorm/aten::layer_norm/Multiply - 23 __module.bert.embeddings.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 30 __module.bert.encoder.layer.0.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 46 __module.bert.encoder.layer.0.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 65 __module.bert.encoder.layer.0.attention.self/aten::matmul/MatMul_54 - INFO:nncf:Not adding activation input quantizer for operation: 26 __module.bert.encoder.layer.0.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 42 __module.bert.encoder.layer.0.attention.output.LayerNorm/aten::layer_norm/MVN - 58 __module.bert.encoder.layer.0.attention.output.LayerNorm/aten::layer_norm/Multiply - 77 __module.bert.encoder.layer.0.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 97 __module.bert.encoder.layer.0.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 127 __module.bert.encoder.layer.0.output.LayerNorm/aten::layer_norm/MVN - 154 __module.bert.encoder.layer.0.output.LayerNorm/aten::layer_norm/Multiply - 180 __module.bert.encoder.layer.0.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 31 __module.bert.encoder.layer.1.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 47 __module.bert.encoder.layer.1.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 66 __module.bert.encoder.layer.1.attention.self/aten::matmul/MatMul_107 - INFO:nncf:Not adding activation input quantizer for operation: 181 __module.bert.encoder.layer.1.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 196 __module.bert.encoder.layer.1.attention.output.LayerNorm/aten::layer_norm/MVN - 210 __module.bert.encoder.layer.1.attention.output.LayerNorm/aten::layer_norm/Multiply - 227 __module.bert.encoder.layer.1.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 245 __module.bert.encoder.layer.1.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 271 __module.bert.encoder.layer.1.output.LayerNorm/aten::layer_norm/MVN - 294 __module.bert.encoder.layer.1.output.LayerNorm/aten::layer_norm/Multiply - 316 __module.bert.encoder.layer.1.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 34 __module.bert.encoder.layer.2.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 50 __module.bert.encoder.layer.2.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 69 __module.bert.encoder.layer.2.attention.self/aten::matmul/MatMul_160 - INFO:nncf:Not adding activation input quantizer for operation: 184 __module.bert.encoder.layer.2.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 199 __module.bert.encoder.layer.2.attention.output.LayerNorm/aten::layer_norm/MVN - 213 __module.bert.encoder.layer.2.attention.output.LayerNorm/aten::layer_norm/Multiply - 230 __module.bert.encoder.layer.2.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 251 __module.bert.encoder.layer.2.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 277 __module.bert.encoder.layer.2.output.LayerNorm/aten::layer_norm/MVN - 300 __module.bert.encoder.layer.2.output.LayerNorm/aten::layer_norm/Multiply - 322 __module.bert.encoder.layer.2.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 35 __module.bert.encoder.layer.3.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 51 __module.bert.encoder.layer.3.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 70 __module.bert.encoder.layer.3.attention.self/aten::matmul/MatMul_213 - INFO:nncf:Not adding activation input quantizer for operation: 185 __module.bert.encoder.layer.3.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 200 __module.bert.encoder.layer.3.attention.output.LayerNorm/aten::layer_norm/MVN - 214 __module.bert.encoder.layer.3.attention.output.LayerNorm/aten::layer_norm/Multiply - 231 __module.bert.encoder.layer.3.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 253 __module.bert.encoder.layer.3.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 279 __module.bert.encoder.layer.3.output.LayerNorm/aten::layer_norm/MVN - 302 __module.bert.encoder.layer.3.output.LayerNorm/aten::layer_norm/Multiply - 324 __module.bert.encoder.layer.3.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 36 __module.bert.encoder.layer.4.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 52 __module.bert.encoder.layer.4.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 71 __module.bert.encoder.layer.4.attention.self/aten::matmul/MatMul_266 - INFO:nncf:Not adding activation input quantizer for operation: 186 __module.bert.encoder.layer.4.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 201 __module.bert.encoder.layer.4.attention.output.LayerNorm/aten::layer_norm/MVN - 215 __module.bert.encoder.layer.4.attention.output.LayerNorm/aten::layer_norm/Multiply - 232 __module.bert.encoder.layer.4.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 255 __module.bert.encoder.layer.4.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 281 __module.bert.encoder.layer.4.output.LayerNorm/aten::layer_norm/MVN - 304 __module.bert.encoder.layer.4.output.LayerNorm/aten::layer_norm/Multiply - 326 __module.bert.encoder.layer.4.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 37 __module.bert.encoder.layer.5.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 53 __module.bert.encoder.layer.5.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 72 __module.bert.encoder.layer.5.attention.self/aten::matmul/MatMul_319 - INFO:nncf:Not adding activation input quantizer for operation: 187 __module.bert.encoder.layer.5.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 202 __module.bert.encoder.layer.5.attention.output.LayerNorm/aten::layer_norm/MVN - 216 __module.bert.encoder.layer.5.attention.output.LayerNorm/aten::layer_norm/Multiply - 233 __module.bert.encoder.layer.5.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 257 __module.bert.encoder.layer.5.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 283 __module.bert.encoder.layer.5.output.LayerNorm/aten::layer_norm/MVN - 306 __module.bert.encoder.layer.5.output.LayerNorm/aten::layer_norm/Multiply - 328 __module.bert.encoder.layer.5.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 38 __module.bert.encoder.layer.6.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 54 __module.bert.encoder.layer.6.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 73 __module.bert.encoder.layer.6.attention.self/aten::matmul/MatMul_372 - INFO:nncf:Not adding activation input quantizer for operation: 188 __module.bert.encoder.layer.6.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 203 __module.bert.encoder.layer.6.attention.output.LayerNorm/aten::layer_norm/MVN - 217 __module.bert.encoder.layer.6.attention.output.LayerNorm/aten::layer_norm/Multiply - 234 __module.bert.encoder.layer.6.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 259 __module.bert.encoder.layer.6.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 285 __module.bert.encoder.layer.6.output.LayerNorm/aten::layer_norm/MVN - 308 __module.bert.encoder.layer.6.output.LayerNorm/aten::layer_norm/Multiply - 330 __module.bert.encoder.layer.6.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 39 __module.bert.encoder.layer.7.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 55 __module.bert.encoder.layer.7.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 74 __module.bert.encoder.layer.7.attention.self/aten::matmul/MatMul_425 - INFO:nncf:Not adding activation input quantizer for operation: 189 __module.bert.encoder.layer.7.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 204 __module.bert.encoder.layer.7.attention.output.LayerNorm/aten::layer_norm/MVN - 218 __module.bert.encoder.layer.7.attention.output.LayerNorm/aten::layer_norm/Multiply - 235 __module.bert.encoder.layer.7.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 261 __module.bert.encoder.layer.7.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 287 __module.bert.encoder.layer.7.output.LayerNorm/aten::layer_norm/MVN - 310 __module.bert.encoder.layer.7.output.LayerNorm/aten::layer_norm/Multiply - 332 __module.bert.encoder.layer.7.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 40 __module.bert.encoder.layer.8.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 56 __module.bert.encoder.layer.8.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 75 __module.bert.encoder.layer.8.attention.self/aten::matmul/MatMul_478 - INFO:nncf:Not adding activation input quantizer for operation: 190 __module.bert.encoder.layer.8.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 205 __module.bert.encoder.layer.8.attention.output.LayerNorm/aten::layer_norm/MVN - 219 __module.bert.encoder.layer.8.attention.output.LayerNorm/aten::layer_norm/Multiply - 236 __module.bert.encoder.layer.8.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 263 __module.bert.encoder.layer.8.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 289 __module.bert.encoder.layer.8.output.LayerNorm/aten::layer_norm/MVN - 312 __module.bert.encoder.layer.8.output.LayerNorm/aten::layer_norm/Multiply - 334 __module.bert.encoder.layer.8.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 41 __module.bert.encoder.layer.9.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 57 __module.bert.encoder.layer.9.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 76 __module.bert.encoder.layer.9.attention.self/aten::matmul/MatMul_531 - INFO:nncf:Not adding activation input quantizer for operation: 191 __module.bert.encoder.layer.9.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 206 __module.bert.encoder.layer.9.attention.output.LayerNorm/aten::layer_norm/MVN - 220 __module.bert.encoder.layer.9.attention.output.LayerNorm/aten::layer_norm/Multiply - 237 __module.bert.encoder.layer.9.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 265 __module.bert.encoder.layer.9.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 291 __module.bert.encoder.layer.9.output.LayerNorm/aten::layer_norm/MVN - 314 __module.bert.encoder.layer.9.output.LayerNorm/aten::layer_norm/Multiply - 336 __module.bert.encoder.layer.9.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 32 __module.bert.encoder.layer.10.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 48 __module.bert.encoder.layer.10.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 67 __module.bert.encoder.layer.10.attention.self/aten::matmul/MatMul_584 - INFO:nncf:Not adding activation input quantizer for operation: 182 __module.bert.encoder.layer.10.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 197 __module.bert.encoder.layer.10.attention.output.LayerNorm/aten::layer_norm/MVN - 211 __module.bert.encoder.layer.10.attention.output.LayerNorm/aten::layer_norm/Multiply - 228 __module.bert.encoder.layer.10.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 247 __module.bert.encoder.layer.10.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 273 __module.bert.encoder.layer.10.output.LayerNorm/aten::layer_norm/MVN - 296 __module.bert.encoder.layer.10.output.LayerNorm/aten::layer_norm/Multiply - 318 __module.bert.encoder.layer.10.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 33 __module.bert.encoder.layer.11.attention.self/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 49 __module.bert.encoder.layer.11.attention.self/aten::softmax/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 68 __module.bert.encoder.layer.11.attention.self/aten::matmul/MatMul_637 - INFO:nncf:Not adding activation input quantizer for operation: 183 __module.bert.encoder.layer.11.attention.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 198 __module.bert.encoder.layer.11.attention.output.LayerNorm/aten::layer_norm/MVN - 212 __module.bert.encoder.layer.11.attention.output.LayerNorm/aten::layer_norm/Multiply - 229 __module.bert.encoder.layer.11.attention.output.LayerNorm/aten::layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 249 __module.bert.encoder.layer.11.output/aten::add/Add - INFO:nncf:Not adding activation input quantizer for operation: 275 __module.bert.encoder.layer.11.output.LayerNorm/aten::layer_norm/MVN - 298 __module.bert.encoder.layer.11.output.LayerNorm/aten::layer_norm/Multiply - 320 __module.bert.encoder.layer.11.output.LayerNorm/aten::layer_norm/Add - + Statistics collection: 100%|██████████| 300/300 [00:07<00:00, 39.50it/s] + Applying Smooth Quant: 100%|██████████| 50/50 [00:00<00:00, 51.91it/s] + + +.. parsed-literal:: + + INFO:nncf:36 ignored nodes was found by name in the NNCFGraph .. parsed-literal:: - Statistics collection: 100%|██████████| 300/300 [00:25<00:00, 11.87it/s] - Biases correction: 100%|██████████| 74/74 [00:25<00:00, 2.92it/s] + Statistics collection: 100%|██████████| 300/300 [00:25<00:00, 11.96it/s] + Applying Fast Bias correction: 100%|██████████| 74/74 [00:25<00:00, 2.93it/s] .. code:: ipython3 @@ -409,8 +283,8 @@ The optimization process contains the following steps: compressed_model_xml = Path(MODEL_DIR) / "quantized_bert_mrpc.xml" ov.save_model(quantized_model, compressed_model_xml) -Load and Test OpenVINO Model -############################################################################################################################### +Load and Test OpenVINO Model +---------------------------------------------------------------------- To load and test converted model, perform the following: @@ -419,10 +293,10 @@ To load and test converted model, perform the following: - Run the inference. - Get the answer from the model output. -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -478,8 +352,8 @@ changing ``sample_idx`` to another value (from 0 to 407). The same meaning: yes -Compare F1-score of FP32 and INT8 models -############################################################################################################################### +Compare F1-score of FP32 and INT8 models +---------------------------------------------------------------------------------- .. code:: ipython3 @@ -519,11 +393,11 @@ Compare F1-score of FP32 and INT8 models Checking the accuracy of the original model: F1 score: 0.9019 Checking the accuracy of the quantized model: - F1 score: 0.8983 + F1 score: 0.8985 -Compare Performance of the Original, Converted and Quantized Models -############################################################################################################################### +Compare Performance of the Original, Converted and Quantized Models +------------------------------------------------------------------------------------------------------------- Compare the original PyTorch model with OpenVINO converted and quantized models (``FP32``, ``INT8``) to see the difference in performance. It is @@ -580,9 +454,9 @@ Frames Per Second (FPS) for images. .. parsed-literal:: - PyTorch model on CPU: 0.073 seconds per sentence, SPS: 13.77 - IR FP32 model in OpenVINO Runtime/AUTO: 0.021 seconds per sentence, SPS: 46.77 - OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.010 seconds per sentence, SPS: 98.85 + PyTorch model on CPU: 0.073 seconds per sentence, SPS: 13.72 + IR FP32 model in OpenVINO Runtime/AUTO: 0.022 seconds per sentence, SPS: 46.40 + OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.010 seconds per sentence, SPS: 98.65 Finally, measure the inference performance of OpenVINO ``FP32`` and @@ -590,9 +464,7 @@ Finally, measure the inference performance of OpenVINO ``FP32`` and Tool `__ in OpenVINO. -.. note:: - - The ``benchmark_app`` tool is able to measure the + **Note**: The ``benchmark_app`` tool is able to measure the performance of the OpenVINO Intermediate Representation (OpenVINO IR) models only. For more accurate performance, run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run @@ -614,7 +486,7 @@ in OpenVINO. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device device.value [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ ERROR ] Exception from src/inference/src/core.cpp:84: @@ -622,9 +494,9 @@ in OpenVINO. Device with "device" name is not registered in the OpenVINO Runtime Traceback (most recent call last): - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 102, in main + File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 102, in main benchmark.print_version_info() - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/benchmark.py", line 48, in print_version_info + File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/benchmark.py", line 48, in print_version_info for device, version in self.core.get_versions(self.device).items(): RuntimeError: Exception from src/inference/src/core.cpp:84: Exception from src/inference/src/dev/core_impl.cpp:565: @@ -646,7 +518,7 @@ in OpenVINO. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device device.value [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ ERROR ] Exception from src/inference/src/core.cpp:84: @@ -654,9 +526,9 @@ in OpenVINO. Device with "device" name is not registered in the OpenVINO Runtime Traceback (most recent call last): - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 102, in main + File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 102, in main benchmark.print_version_info() - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/benchmark.py", line 48, in print_version_info + File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/benchmark.py", line 48, in print_version_info for device, version in self.core.get_versions(self.device).items(): RuntimeError: Exception from src/inference/src/core.cpp:84: Exception from src/inference/src/dev/core_impl.cpp:565: diff --git a/docs/notebooks/106-auto-device-with-output.rst b/docs/notebooks/106-auto-device-with-output.rst index bcb30c004d8771..639842d29e0304 100644 --- a/docs/notebooks/106-auto-device-with-output.rst +++ b/docs/notebooks/106-auto-device-with-output.rst @@ -32,33 +32,53 @@ first inference. **Table of contents:** -- `Import modules and create Core <#import-modules-and-create-core>`__ -- `Convert the model to OpenVINO IR format <#convert-the-model-to-openvino-ir-format>`__ -- `(1) Simplify selection logic <#simplify-selection-logic>`__ - - `Default behavior of Core::compile_model API without device_name <#default-behavior-of-core::compile_model-api-without-device_name>`__ - - `Explicitly pass AUTO as device_name to Core::compile_model API <#explicitly-pass-auto-as-device_name-to-core::compile_model-api>`__ +- `Import modules and create + Core <#import-modules-and-create-core>`__ +- `Convert the model to OpenVINO IR + format <#convert-the-model-to-openvino-ir-format>`__ +- `(1) Simplify selection + logic <#-simplify-selection-logic>`__ + + - `Default behavior of Core::compile_model API without + device_name <#default-behavior-of-corecompile_model-api-without-device_name>`__ + - `Explicitly pass AUTO as device_name to Core::compile_model + API <#explicitly-pass-auto-as-device_name-to-corecompile_model-api>`__ + +- `(2) Improve the first inference + latency <#-improve-the-first-inference-latency>`__ + + - `Load an Image <#load-an-image>`__ + - `Load the model to GPU device and perform + inference <#load-the-model-to-gpu-device-and-perform-inference>`__ + - `Load the model using AUTO device and do + inference <#load-the-model-using-auto-device-and-do-inference>`__ + +- `(3) Achieve different performance for different + targets <#-achieve-different-performance-for-different-targets>`__ + + - `Class and callback + definition <#class-and-callback-definition>`__ + - `Inference with THROUGHPUT + hint <#inference-with-throughput-hint>`__ + - `Inference with LATENCY + hint <#inference-with-latency-hint>`__ + - `Difference in FPS and + latency <#difference-in-fps-and-latency>`__ + +Import modules and create Core +------------------------------------------------------------------------ -- `(2) Improve the first inference latency <#improve-the-first-inference-latency>`__ - - - `Load an Image <#load-an-image>`__ - - `Load the model to GPU device and perform inference <#load-the-model-to-gpu-device-and-perform-inference>`__ - - `Load the model using AUTO device and do inference <#load-the-model-using-auto-device-and-do-inference>`__ +.. code:: ipython3 -- `(3) Achieve different performance for different targets <#achieve-different-performance-for-different-targets>`__ + # Install openvino package + %pip install -q "openvino>=2023.1.0" - - `Class and callback definition <#class-and-callback-definition>`__ - - `Inference with THROUGHPUT hint <#inference-with-throughput-hint>`__ - - `Inference with LATENCY hint <#inference-with-latency-hint>`__ - - `Difference in FPS and latency <#difference-in-fps-and-latency>`__ -Import modules and create Core -############################################################################################################################### +.. parsed-literal:: -.. code:: ipython3 + Note: you may need to restart the kernel to use updated packages. - # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" .. code:: ipython3 @@ -82,8 +102,8 @@ Import modules and create Core device to have meaningful results. -Convert the model to OpenVINO IR format -############################################################################################################################### +Convert the model to OpenVINO IR format +--------------------------------------------------------------------------------- This tutorial uses `resnet50 `__ @@ -123,34 +143,16 @@ For more information about model conversion API, see this ov_model = core.read_model(model_path) -.. parsed-literal:: - - 2023-09-08 22:36:23.476933: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:36:23.509668: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:36:24.096790: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -.. parsed-literal:: - - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - - .. parsed-literal:: IR model saved to model/resnet50.xml -(1) Simplify selection logic -############################################################################################################################### +(1) Simplify selection logic +---------------------------------------------------------------------- -Default behavior of Core::compile_model API without device_name -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Default behavior of Core::compile_model API without device_name +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By default, ``compile_model`` API will select **AUTO** as ``device_name`` if no device is specified. @@ -169,14 +171,14 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [22:36:26.6713]I[plugin.cpp:537][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [22:36:26.6714]I[plugin.cpp:537][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [22:36:26.6714]I[plugin.cpp:537][AUTO] device:CPU, config:PERF_COUNT=NO - [22:36:26.6714]I[plugin.cpp:542][AUTO] device:CPU, priority:0 - [22:36:26.6716]I[schedule.cpp:17][AUTO] scheduler starting - [22:36:26.6717]I[auto_schedule.cpp:131][AUTO] select device:CPU - [22:36:26.8157]I[auto_schedule.cpp:109][AUTO] device:CPU compiling model finished - [22:36:26.8158]I[plugin.cpp:572][AUTO] underlying hardware does not support hardware context + [22:37:18.2538]I[plugin.cpp:537][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [22:37:18.2539]I[plugin.cpp:537][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [22:37:18.2539]I[plugin.cpp:537][AUTO] device:CPU, config:PERF_COUNT=NO + [22:37:18.2539]I[plugin.cpp:542][AUTO] device:CPU, priority:0 + [22:37:18.2540]I[schedule.cpp:17][AUTO] scheduler starting + [22:37:18.2540]I[auto_schedule.cpp:131][AUTO] select device:CPU + [22:37:18.3716]I[auto_schedule.cpp:109][AUTO] device:CPU compiling model finished + [22:37:18.3717]I[plugin.cpp:572][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -189,12 +191,12 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: + [22:37:18.3836]I[schedule.cpp:303][AUTO] scheduler ending Deleted compiled_model - [22:36:26.8279]I[schedule.cpp:303][AUTO] scheduler ending -Explicitly pass AUTO as device_name to Core::compile_model API -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Explicitly pass AUTO as device_name to Core::compile_model API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It is optional, but passing AUTO explicitly as ``device_name`` may improve readability of your code. @@ -227,8 +229,8 @@ improve readability of your code. Deleted compiled_model -(2) Improve the first inference latency -############################################################################################################################### +(2) Improve the first inference latency +--------------------------------------------------------------------------------- One of the benefits of using AUTO device selection is reducing FIL (first inference latency). FIL is the model compilation time combined @@ -241,17 +243,33 @@ This initialization time may be intolerable for some applications. To avoid this delay, the AUTO uses CPU transparently as the first inference device until GPU is ready. -Load an Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load an Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ torchvision library provides model specific input transformation function, we will reuse it for preparing input data. +.. code:: ipython3 + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file + .. code:: ipython3 from PIL import Image - image = Image.open("../data/image/coco.jpg") + # Download the image from the openvino_notebooks storage + image_filename = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", + directory="data" + ) + + image = Image.open(str(image_filename)) input_transform = torchvision.models.ResNet50_Weights.DEFAULT.transforms() input_tensor = input_transform(image) @@ -260,13 +278,19 @@ function, we will reuse it for preparing input data. +.. parsed-literal:: + + data/coco.jpg: 0%| | 0.00/202k [00:00`__ article. -Class and callback definition -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Class and callback definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -441,8 +465,8 @@ Class and callback definition metrics_update_interval = 10 metrics_update_num = 6 -Inference with THROUGHPUT hint -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Inference with THROUGHPUT hint +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Loop for inference and update the FPS/Latency every @metrics_update_interval seconds. @@ -480,17 +504,17 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 181.92fps, latency: 31.32ms, time interval: 10.02s - throughput: 181.58fps, latency: 32.24ms, time interval: 10.00s - throughput: 182.07fps, latency: 32.16ms, time interval: 10.00s - throughput: 181.02fps, latency: 32.35ms, time interval: 10.00s - throughput: 180.73fps, latency: 32.40ms, time interval: 10.01s - throughput: 180.81fps, latency: 32.37ms, time interval: 10.00s + throughput: 182.05fps, latency: 31.31ms, time interval: 10.01s + throughput: 182.68fps, latency: 32.06ms, time interval: 10.01s + throughput: 183.56fps, latency: 31.93ms, time interval: 10.02s + throughput: 182.77fps, latency: 32.06ms, time interval: 10.01s + throughput: 182.17fps, latency: 32.13ms, time interval: 10.00s + throughput: 182.39fps, latency: 32.15ms, time interval: 10.00s Done -Inference with LATENCY hint -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Inference with LATENCY hint +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Loop for inference and update the FPS/Latency for each @metrics_update_interval seconds @@ -529,17 +553,17 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 139.38fps, latency: 6.69ms, time interval: 10.00s - throughput: 141.83fps, latency: 6.68ms, time interval: 10.00s - throughput: 141.97fps, latency: 6.67ms, time interval: 10.00s - throughput: 141.95fps, latency: 6.67ms, time interval: 10.00s - throughput: 141.90fps, latency: 6.67ms, time interval: 10.01s - throughput: 141.96fps, latency: 6.67ms, time interval: 10.00s + throughput: 140.29fps, latency: 6.65ms, time interval: 10.00s + throughput: 142.87fps, latency: 6.63ms, time interval: 10.00s + throughput: 142.41fps, latency: 6.64ms, time interval: 10.01s + throughput: 142.91fps, latency: 6.63ms, time interval: 10.01s + throughput: 142.78fps, latency: 6.64ms, time interval: 10.00s + throughput: 142.80fps, latency: 6.64ms, time interval: 10.01s Done -Difference in FPS and latency -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Difference in FPS and latency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -572,7 +596,7 @@ Difference in FPS and latency -.. image:: 106-auto-device-with-output_files/106-auto-device-with-output_26_0.png +.. image:: 106-auto-device-with-output_files/106-auto-device-with-output_27_0.png .. code:: ipython3 @@ -606,5 +630,5 @@ Difference in FPS and latency -.. image:: 106-auto-device-with-output_files/106-auto-device-with-output_27_0.png +.. image:: 106-auto-device-with-output_files/106-auto-device-with-output_28_0.png diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.jpg b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.jpg deleted file mode 100644 index abe8cb45ca2fd7..00000000000000 --- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a58493845ebd3e98186df7a1ea042b20545bb3c2a5b4a326163da6c9eb5e7d9 -size 121563 diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.png b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.png deleted file mode 100644 index 30f6673bf72850..00000000000000 --- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_13_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:84112f4f04be0a3445174f4dcf7e300fc12dbd50fd8a5e2d98b4082402dda6de -size 869661 diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_12_0.jpg b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_14_1.jpg similarity index 100% rename from docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_12_0.jpg rename to docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_14_1.jpg diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_12_0.png b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_14_1.png similarity index 100% rename from docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_12_0.png rename to docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_14_1.png diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_25_0.png b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_25_0.png deleted file mode 100644 index ec960eef2246ee..00000000000000 --- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_25_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7858463aadc74dfe6af6906da15e208305234a13a8463c4f4e4632630fffde70 -size 27107 diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_26_0.png b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_26_0.png deleted file mode 100644 index 37c8b0a925566c..00000000000000 --- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_26_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5af33aef5b450754224f0fade5aa6a3dba2ce509739a1c551335135e7490291e -size 26881 diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_27_0.png b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_27_0.png index 2904815be00f1e..da915be3e3048d 100644 --- a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_27_0.png +++ b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a74427d2101d8f93e2888e623dcbbfc9167a32a35ca33b891ec0e3f980414d0f -size 40038 +oid sha256:1f838f2aa8318b73e4dadf5d48bbfb1332a4bebc2ca4d854dd81e77c93b5b244 +size 26784 diff --git a/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_28_0.png b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_28_0.png new file mode 100644 index 00000000000000..42aa7ec559fd7e --- /dev/null +++ b/docs/notebooks/106-auto-device-with-output_files/106-auto-device-with-output_28_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8187fa0479ea8f4d92a4b1cb42aa15f5377ce2630fa924bee73168ba2c5d10b4 +size 39984 diff --git a/docs/notebooks/106-auto-device-with-output_files/index.html b/docs/notebooks/106-auto-device-with-output_files/index.html index a32b6ac79600cc..ec9933241327ee 100644 --- a/docs/notebooks/106-auto-device-with-output_files/index.html +++ b/docs/notebooks/106-auto-device-with-output_files/index.html @@ -1,10 +1,10 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/106-auto-device-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/106-auto-device-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/106-auto-device-with-output_files/


../
-106-auto-device-with-output_12_0.jpg               16-Aug-2023 01:31              121563
-106-auto-device-with-output_12_0.png               16-Aug-2023 01:31              869661
-106-auto-device-with-output_25_0.png               16-Aug-2023 01:31               27107
-106-auto-device-with-output_26_0.png               16-Aug-2023 01:31               39987
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/106-auto-device-with-output_files/


../
+106-auto-device-with-output_14_1.jpg               31-Oct-2023 00:35              121563
+106-auto-device-with-output_14_1.png               31-Oct-2023 00:35              869661
+106-auto-device-with-output_27_0.png               31-Oct-2023 00:35               26784
+106-auto-device-with-output_28_0.png               31-Oct-2023 00:35               39984
 

diff --git a/docs/notebooks/107-speech-recognition-quantization-data2vec-with-output.rst b/docs/notebooks/107-speech-recognition-quantization-data2vec-with-output.rst index c5da0daf8369d1..1173f4b74c3a64 100644 --- a/docs/notebooks/107-speech-recognition-quantization-data2vec-with-output.rst +++ b/docs/notebooks/107-speech-recognition-quantization-data2vec-with-output.rst @@ -21,21 +21,29 @@ steps: **Table of contents:** -- `Download and prepare model <#download-and-prepare-model>`__ - - `Obtain Pytorch model representation <#obtain-pytorch-model-representation>`__ - - `Convert model to OpenVINO Intermediate Representation <#convert-model-to-openvino-intermediate-representation>`__ - - `Prepare inference data <#prepare-inference-data>`__ - -- `Check model inference result <#check-model-inference-result>`__ -- `Validate model accuracy on dataset <#validate-model-accuracy-on-dataset>`__ -- `Quantization <#quantization>`__ -- `Check INT8 model inference result <#check-int8-model-inference-result>`__ -- `Compare Performance of the Original and Quantized Models <#compare-performance-of-the-original-and-quantized-models>`__ -- `Compare Accuracy of the Original and Quantized Models <#compare-accuracy-of-the-original-and-quantized-models>`__ - -Download and prepare model -############################################################################################################################### +- `Download and prepare model <#download-and-prepare-model>`__ + + - `Obtain Pytorch model + representation <#obtain-pytorch-model-representation>`__ + - `Convert model to OpenVINO Intermediate + Representation <#convert-model-to-openvino-intermediate-representation>`__ + - `Prepare inference data <#prepare-inference-data>`__ + +- `Check model inference + result <#check-model-inference-result>`__ +- `Validate model accuracy on + dataset <#validate-model-accuracy-on-dataset>`__ +- `Quantization <#quantization>`__ +- `Check INT8 model inference + result <#check-int-model-inference-result>`__ +- `Compare Performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ +- `Compare Accuracy of the Original and Quantized + Models <#compare-accuracy-of-the-original-and-quantized-models>`__ + +Download and prepare model +-------------------------------------------------------------------- data2vec is a framework for self-supervised representation learning for images, speech, and text as described in `data2vec: A General Framework @@ -53,8 +61,8 @@ In our case, we will use ``data2vec-audio-base-960h`` model, which was finetuned on 960 hours of audio from LibriSpeech Automatic Speech Recognition corpus and distributed as part of HuggingFace transformers. -Obtain Pytorch model representation -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Obtain Pytorch model representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For instantiating PyTorch model class, we should use ``Data2VecAudioForCTC.from_pretrained`` method with providing model ID @@ -68,9 +76,9 @@ model specific pre- and post-processing steps. .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" "nncf>=2.5.0" - !pip install -q datasets "torchmetrics>=0.11.0" - !pip install -q soundfile librosa transformers onnx + %pip install -q "openvino>=2023.1.0" "nncf>=2.5.0" + %pip install -q datasets "torchmetrics>=0.11.0" + %pip install -q soundfile librosa transformers .. code:: ipython3 @@ -79,8 +87,17 @@ model specific pre- and post-processing steps. processor = Wav2Vec2Processor.from_pretrained("facebook/data2vec-audio-base-960h") model = Data2VecAudioForCTC.from_pretrained("facebook/data2vec-audio-base-960h") -Convert model to OpenVINO Intermediate Representation -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. parsed-literal:: + + 2023-09-12 19:27:57.776647: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-12 19:27:57.812053: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-09-12 19:27:58.411557: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +Convert model to OpenVINO Intermediate Representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -99,43 +116,10 @@ Convert model to OpenVINO Intermediate Representation BATCH_SIZE = 1 MAX_SEQ_LENGTH = 30480 - - def export_model_to_onnx(model, path): - # switch model to evaluation mode - model.eval() - # disallow gradient propagation for reducing memory during export - with torch.no_grad(): - # define dummy input with specific shape - default_input = torch.zeros([1, MAX_SEQ_LENGTH], dtype=torch.float) - inputs = { - "inputs": default_input - } - - # define names for dynamic dimentions - symbolic_names = {0: "batch_size", 1: "sequence_len"} - # export model - torch.onnx.export( - model, - (inputs["inputs"]), - path, - opset_version=11, - input_names=["inputs"], - output_names=["logits"], - dynamic_axes={ - "inputs": symbolic_names, - "logits": symbolic_names, - }, - ) - print("ONNX model saved to {}".format(path)) - - - onnx_model_path = MODEL_DIR / "data2vec-audo-base.onnx" - ir_model_path = onnx_model_path.with_suffix('.xml') + ir_model_path = MODEL_DIR / "data2vec-audo-base.xml" if not ir_model_path.exists(): - if not onnx_model_path.exists(): - export_model_to_onnx(model, onnx_model_path) - ov_model = ov.convert_model(onnx_model_path) + ov_model = ov.convert_model(model, example_input=torch.zeros([1, MAX_SEQ_LENGTH], dtype=torch.float)) ov.save_model(ov_model, str(ir_model_path)) print("IR model saved to {}".format(ir_model_path)) else: @@ -149,8 +133,8 @@ Convert model to OpenVINO Intermediate Representation Read IR model from model/data2vec-audo-base.xml -Prepare inference data -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Prepare inference data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For demonstration purposes, we will use short dummy version of LibriSpeech dataset - ``patrickvonplaten/librispeech_asr_dummy`` to @@ -178,8 +162,15 @@ dataset. test_sample = ds[0]["audio"] -Check model inference result -############################################################################################################################### + +.. parsed-literal:: + + Found cached dataset librispeech_asr_dummy (/home/ea/.cache/huggingface/datasets/patrickvonplaten___librispeech_asr_dummy/clean/2.1.0/f2c70a4d03ab4410954901bde48c54b85ca1b7f9bf7d616e7e2a72b5ee6ddbfc) + Loading cached processed dataset at /home/ea/.cache/huggingface/datasets/patrickvonplaten___librispeech_asr_dummy/clean/2.1.0/f2c70a4d03ab4410954901bde48c54b85ca1b7f9bf7d616e7e2a72b5ee6ddbfc/cache-5282243604a7a526.arrow + + +Check model inference result +---------------------------------------------------------------------- The code below is used for running model inference on a single sample from the dataset. It contains the following steps: @@ -233,9 +224,9 @@ For reference, see the same function provided for OpenVINO model. .. parsed-literal:: - [Reference]: MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL - [PyTorch]: MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL - [OpenVINO FP16]: MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL + [Reference]: BECAUSE YOU WERE SLEEPING INSTEAD OF CONQUERING THE LOVELY ROSE PRINCESS HAS BECOME A FIDDLE WITHOUT A BOW WHILE POOR SHAGGY SITS THERE A COOING DOVE + [PyTorch]: BECAUSE YOU WERE SLEEPING INSTEAD OF CONQUERING THE LOVELY RUSE PRINCESS HAS BECOME A FIDDLE WITHOUT A BOW A POOR SHAGGY SITS THERE ACCOOING DOVE + [OpenVINO FP16]: BECAUSE YOU WERE SLEEPING INSTEAD OF CONQUERING THE LOVELY RUSE PRINCESS HAS BECOME A FIDDLE WITHOUT A BOW A POOR SHAGGY SITS THERE ACCOOING DOVE @@ -244,15 +235,15 @@ For reference, see the same function provided for OpenVINO model. -Validate model accuracy on dataset -############################################################################################################################### +Validate model accuracy on dataset +---------------------------------------------------------------------------- For model accuracy evaluation, `Word Error Rate `__ metric can be @@ -261,7 +252,7 @@ the total words spoken. A lower WER in speech-to-text means better accuracy in recognizing speech. For WER calculation, we will use -```torchmetrics`` `__ +`torchmetrics `__ library. .. code:: ipython3 @@ -311,8 +302,8 @@ library. [OpenVino] Word Error Rate: 0.0383 -Quantization -############################################################################################################################### +Quantization +------------------------------------------------------ `NNCF `__ provides a suite of advanced algorithms for Neural Networks inference optimization in @@ -322,20 +313,22 @@ Create a quantized model from the pre-trained ``FP16`` model and the calibration dataset. The optimization process contains the following steps: -:: - - 1. Create a Dataset for quantization. - 2. Run `nncf.quantize` for getting an optimized model. The `nncf.quantize` function provides an interface for model quantization. It requires an instance of the OpenVINO Model and quantization dataset. Optionally, some additional parameters for the configuration quantization process (number of samples for quantization, preset, ignored scope, etc.) can be provided. For more accurate results, we should keep the operation in the postprocessing subgraph in floating point precision, using the `ignored_scope` parameter. `advanced_parameters` can be used to specify advanced quantization parameters for fine-tuning the quantization algorithm. In this tutorial we pass range estimator parameters for activations. For more information see [Tune quantization parameters](https://docs.openvino.ai/2023.0/basic_quantization_flow.html#tune-quantization-parameters). - 3. Serialize OpenVINO IR model using `ov.save_model` function. +1. Create a Dataset for quantization. +2. Run ``nncf.quantize`` for getting an optimized model. The + ``nncf.quantize`` function provides an interface for model + quantization. It requires an instance of the OpenVINO Model and + quantization dataset. Optionally, some additional parameters for the + configuration quantization process (number of samples for + quantization, preset, ignored scope, etc.) can be provided. For more + accurate results, we should keep the operation in the postprocessing + subgraph in floating point precision, using the ``ignored_scope`` + parameter. For more information see `Tune quantization + parameters `__. +3. Serialize OpenVINO IR model using ``ov.save_model`` function. .. code:: ipython3 import nncf - from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters - from nncf.quantization.range_estimator import AggregatorType - from nncf.quantization.range_estimator import RangeEstimatorParameters - from nncf.quantization.range_estimator import StatisticsCollectorParameters - from nncf.quantization.range_estimator import StatisticsType from nncf.parameters import ModelType @@ -358,245 +351,222 @@ steps: subset_size=len(dataset), ignored_scope=nncf.IgnoredScope( names=[ - "/data2vec_audio/encoder/layers.3/feed_forward/intermediate_dense/MatMul", - "/data2vec_audio/feature_extractor/conv_layers.2/conv/Conv", - "/data2vec_audio/encoder/layers.3/Add_1", - "/data2vec_audio/encoder/layers.2/feed_forward/intermediate_dense/MatMul", - "/data2vec_audio/feature_extractor/conv_layers.0/conv/Conv", - "/data2vec_audio/encoder/layers.4/Add_1", - "/data2vec_audio/encoder/layers.4/feed_forward/intermediate_dense/MatMul", - "/data2vec_audio/encoder/layers.4/final_layer_norm/Div", - "/data2vec_audio/encoder/layers.4/feed_forward/output_dense/MatMul", - "/data2vec_audio/encoder/layers.8/attention/MatMul_1", - "/data2vec_audio/feature_extractor/conv_layers.1/conv/Conv", - "/data2vec_audio/encoder/layers.2/Add_1", - "/data2vec_audio/feature_extractor/conv_layers.0/layer_norm/Div", - "/data2vec_audio/encoder/layers.1/feed_forward/intermediate_dense/MatMul", - "/data2vec_audio/encoder/layers.1/Add_1", - "/data2vec_audio/feature_extractor/conv_layers.3/layer_norm/Div" + "__module.data2vec_audio.feature_extractor.conv_layers.1.conv/aten::_convolution/Convolution_33", + "__module.data2vec_audio.feature_extractor.conv_layers.0.conv/aten::_convolution/Convolution_3", + "__module.data2vec_audio.encoder.layers.6.feed_forward.output_dense/aten::linear/MatMul_1312", + "__module.data2vec_audio.encoder.layers.7.feed_forward.output_dense/aten::linear/MatMul_1434", + "__module.data2vec_audio.encoder.layers.5.feed_forward.output_dense/aten::linear/MatMul_1190", + "__module.data2vec_audio.encoder.layers.4.feed_forward.output_dense/aten::linear/MatMul_1068", + "__module.data2vec_audio.encoder.layers.8.feed_forward.output_dense/aten::linear/MatMul_1556" ], ), - advanced_parameters=AdvancedQuantizationParameters( - activations_range_estimator_params=RangeEstimatorParameters( - min=StatisticsCollectorParameters( - statistics_type=StatisticsType.MIN, - aggregator_type=AggregatorType.MIN - ), - max=StatisticsCollectorParameters( - statistics_type=StatisticsType.QUANTILE, - aggregator_type=AggregatorType.MEAN, - quantile_outlier_prob=0.0001 - ), - ) - ) ) .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - INFO:nncf:16 ignored nodes was found by name in the NNCFGraph + INFO:nncf:7 ignored nodes was found by name in the NNCFGraph INFO:nncf:220 ignored nodes was found by types in the NNCFGraph INFO:nncf:24 ignored nodes was found by name in the NNCFGraph - INFO:nncf:Not adding activation input quantizer for operation: 2 /data2vec_audio/feature_extractor/conv_layers.0/conv/Conv - INFO:nncf:Not adding activation input quantizer for operation: 4 /data2vec_audio/feature_extractor/conv_layers.0/layer_norm/Div - 5 /data2vec_audio/feature_extractor/conv_layers.0/layer_norm/Mul - 6 /data2vec_audio/feature_extractor/conv_layers.0/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 9 /data2vec_audio/feature_extractor/conv_layers.1/conv/Conv - INFO:nncf:Not adding activation input quantizer for operation: 11 /data2vec_audio/feature_extractor/conv_layers.1/layer_norm/Div - 12 /data2vec_audio/feature_extractor/conv_layers.1/layer_norm/Mul - 13 /data2vec_audio/feature_extractor/conv_layers.1/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 16 /data2vec_audio/feature_extractor/conv_layers.2/conv/Conv - INFO:nncf:Not adding activation input quantizer for operation: 18 /data2vec_audio/feature_extractor/conv_layers.2/layer_norm/Div - 19 /data2vec_audio/feature_extractor/conv_layers.2/layer_norm/Mul - 20 /data2vec_audio/feature_extractor/conv_layers.2/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 25 /data2vec_audio/feature_extractor/conv_layers.3/layer_norm/Div - 26 /data2vec_audio/feature_extractor/conv_layers.3/layer_norm/Mul - 27 /data2vec_audio/feature_extractor/conv_layers.3/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 32 /data2vec_audio/feature_extractor/conv_layers.4/layer_norm/Div - 33 /data2vec_audio/feature_extractor/conv_layers.4/layer_norm/Mul - 34 /data2vec_audio/feature_extractor/conv_layers.4/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 39 /data2vec_audio/feature_extractor/conv_layers.5/layer_norm/Div - 40 /data2vec_audio/feature_extractor/conv_layers.5/layer_norm/Mul - 41 /data2vec_audio/feature_extractor/conv_layers.5/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 46 /data2vec_audio/feature_extractor/conv_layers.6/layer_norm/Div - 47 /data2vec_audio/feature_extractor/conv_layers.6/layer_norm/Mul - 48 /data2vec_audio/feature_extractor/conv_layers.6/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 52 /data2vec_audio/feature_projection/layer_norm/Div - 53 /data2vec_audio/feature_projection/layer_norm/Mul - 54 /data2vec_audio/feature_projection/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 70 /data2vec_audio/encoder/pos_conv_embed/layers.0/layer_norm/Div - INFO:nncf:Not adding activation input quantizer for operation: 124 /data2vec_audio/encoder/pos_conv_embed/layers.1/layer_norm/Div - INFO:nncf:Not adding activation input quantizer for operation: 178 /data2vec_audio/encoder/pos_conv_embed/layers.2/layer_norm/Div - INFO:nncf:Not adding activation input quantizer for operation: 221 /data2vec_audio/encoder/pos_conv_embed/layers.3/layer_norm/Div - INFO:nncf:Not adding activation input quantizer for operation: 262 /data2vec_audio/encoder/pos_conv_embed/layers.4/layer_norm/Div - INFO:nncf:Not adding activation input quantizer for operation: 57 /data2vec_audio/encoder/Add - INFO:nncf:Not adding activation input quantizer for operation: 59 /data2vec_audio/encoder/layer_norm/Div - 61 /data2vec_audio/encoder/layer_norm/Mul - 63 /data2vec_audio/encoder/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 65 /data2vec_audio/encoder/layers.0/Add - INFO:nncf:Not adding activation input quantizer for operation: 71 /data2vec_audio/encoder/layers.0/layer_norm/Div - 83 /data2vec_audio/encoder/layers.0/layer_norm/Mul - 94 /data2vec_audio/encoder/layers.0/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 104 /data2vec_audio/encoder/layers.0/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 115 /data2vec_audio/encoder/layers.0/final_layer_norm/Div - 121 /data2vec_audio/encoder/layers.0/final_layer_norm/Mul - 125 /data2vec_audio/encoder/layers.0/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 128 /data2vec_audio/encoder/layers.1/Add - INFO:nncf:Not adding activation input quantizer for operation: 135 /data2vec_audio/encoder/layers.1/layer_norm/Div - 147 /data2vec_audio/encoder/layers.1/layer_norm/Mul - 158 /data2vec_audio/encoder/layers.1/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 169 /data2vec_audio/encoder/layers.1/feed_forward/intermediate_dense/MatMul - 180 /data2vec_audio/encoder/layers.1/feed_forward/intermediate_dense/Add - - INFO:nncf:Not adding activation input quantizer for operation: 168 /data2vec_audio/encoder/layers.1/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 179 /data2vec_audio/encoder/layers.1/final_layer_norm/Div - 185 /data2vec_audio/encoder/layers.1/final_layer_norm/Mul - 189 /data2vec_audio/encoder/layers.1/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 192 /data2vec_audio/encoder/layers.2/Add - INFO:nncf:Not adding activation input quantizer for operation: 199 /data2vec_audio/encoder/layers.2/layer_norm/Div - 211 /data2vec_audio/encoder/layers.2/layer_norm/Mul - 222 /data2vec_audio/encoder/layers.2/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 233 /data2vec_audio/encoder/layers.2/feed_forward/intermediate_dense/MatMul - 244 /data2vec_audio/encoder/layers.2/feed_forward/intermediate_dense/Add - - INFO:nncf:Not adding activation input quantizer for operation: 232 /data2vec_audio/encoder/layers.2/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 243 /data2vec_audio/encoder/layers.2/final_layer_norm/Div - 249 /data2vec_audio/encoder/layers.2/final_layer_norm/Mul - 253 /data2vec_audio/encoder/layers.2/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 256 /data2vec_audio/encoder/layers.3/Add - INFO:nncf:Not adding activation input quantizer for operation: 263 /data2vec_audio/encoder/layers.3/layer_norm/Div - 275 /data2vec_audio/encoder/layers.3/layer_norm/Mul - 286 /data2vec_audio/encoder/layers.3/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 297 /data2vec_audio/encoder/layers.3/feed_forward/intermediate_dense/MatMul - 307 /data2vec_audio/encoder/layers.3/feed_forward/intermediate_dense/Add - - INFO:nncf:Not adding activation input quantizer for operation: 296 /data2vec_audio/encoder/layers.3/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 306 /data2vec_audio/encoder/layers.3/final_layer_norm/Div - 311 /data2vec_audio/encoder/layers.3/final_layer_norm/Mul - 314 /data2vec_audio/encoder/layers.3/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 316 /data2vec_audio/encoder/layers.4/Add - INFO:nncf:Not adding activation input quantizer for operation: 322 /data2vec_audio/encoder/layers.4/layer_norm/Div - 333 /data2vec_audio/encoder/layers.4/layer_norm/Mul - 343 /data2vec_audio/encoder/layers.4/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 353 /data2vec_audio/encoder/layers.4/feed_forward/intermediate_dense/MatMul - 363 /data2vec_audio/encoder/layers.4/feed_forward/intermediate_dense/Add - - INFO:nncf:Not adding activation input quantizer for operation: 371 /data2vec_audio/encoder/layers.4/feed_forward/output_dense/MatMul - 377 /data2vec_audio/encoder/layers.4/feed_forward/output_dense/Add - - INFO:nncf:Not adding activation input quantizer for operation: 352 /data2vec_audio/encoder/layers.4/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 362 /data2vec_audio/encoder/layers.4/final_layer_norm/Div - 367 /data2vec_audio/encoder/layers.4/final_layer_norm/Mul - 370 /data2vec_audio/encoder/layers.4/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 372 /data2vec_audio/encoder/layers.5/Add - INFO:nncf:Not adding activation input quantizer for operation: 378 /data2vec_audio/encoder/layers.5/layer_norm/Div - 389 /data2vec_audio/encoder/layers.5/layer_norm/Mul - 399 /data2vec_audio/encoder/layers.5/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 408 /data2vec_audio/encoder/layers.5/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 418 /data2vec_audio/encoder/layers.5/final_layer_norm/Div - 423 /data2vec_audio/encoder/layers.5/final_layer_norm/Mul - 426 /data2vec_audio/encoder/layers.5/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 428 /data2vec_audio/encoder/layers.6/Add - INFO:nncf:Not adding activation input quantizer for operation: 434 /data2vec_audio/encoder/layers.6/layer_norm/Div - 445 /data2vec_audio/encoder/layers.6/layer_norm/Mul - 455 /data2vec_audio/encoder/layers.6/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 464 /data2vec_audio/encoder/layers.6/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 474 /data2vec_audio/encoder/layers.6/final_layer_norm/Div - 479 /data2vec_audio/encoder/layers.6/final_layer_norm/Mul - 482 /data2vec_audio/encoder/layers.6/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 484 /data2vec_audio/encoder/layers.7/Add - INFO:nncf:Not adding activation input quantizer for operation: 490 /data2vec_audio/encoder/layers.7/layer_norm/Div - 501 /data2vec_audio/encoder/layers.7/layer_norm/Mul - 511 /data2vec_audio/encoder/layers.7/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 520 /data2vec_audio/encoder/layers.7/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 530 /data2vec_audio/encoder/layers.7/final_layer_norm/Div - 535 /data2vec_audio/encoder/layers.7/final_layer_norm/Mul - 538 /data2vec_audio/encoder/layers.7/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 540 /data2vec_audio/encoder/layers.8/Add - INFO:nncf:Not adding activation input quantizer for operation: 546 /data2vec_audio/encoder/layers.8/layer_norm/Div - 557 /data2vec_audio/encoder/layers.8/layer_norm/Mul - 567 /data2vec_audio/encoder/layers.8/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 576 /data2vec_audio/encoder/layers.8/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 586 /data2vec_audio/encoder/layers.8/final_layer_norm/Div - 591 /data2vec_audio/encoder/layers.8/final_layer_norm/Mul - 594 /data2vec_audio/encoder/layers.8/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 596 /data2vec_audio/encoder/layers.9/Add - INFO:nncf:Not adding activation input quantizer for operation: 602 /data2vec_audio/encoder/layers.9/layer_norm/Div - 613 /data2vec_audio/encoder/layers.9/layer_norm/Mul - 623 /data2vec_audio/encoder/layers.9/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 632 /data2vec_audio/encoder/layers.9/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 642 /data2vec_audio/encoder/layers.9/final_layer_norm/Div - 647 /data2vec_audio/encoder/layers.9/final_layer_norm/Mul - 650 /data2vec_audio/encoder/layers.9/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 652 /data2vec_audio/encoder/layers.10/Add - INFO:nncf:Not adding activation input quantizer for operation: 658 /data2vec_audio/encoder/layers.10/layer_norm/Div - 669 /data2vec_audio/encoder/layers.10/layer_norm/Mul - 679 /data2vec_audio/encoder/layers.10/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 688 /data2vec_audio/encoder/layers.10/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 698 /data2vec_audio/encoder/layers.10/final_layer_norm/Div - 703 /data2vec_audio/encoder/layers.10/final_layer_norm/Mul - 706 /data2vec_audio/encoder/layers.10/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 708 /data2vec_audio/encoder/layers.11/Add - INFO:nncf:Not adding activation input quantizer for operation: 714 /data2vec_audio/encoder/layers.11/layer_norm/Div - 725 /data2vec_audio/encoder/layers.11/layer_norm/Mul - 735 /data2vec_audio/encoder/layers.11/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 744 /data2vec_audio/encoder/layers.11/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 754 /data2vec_audio/encoder/layers.11/final_layer_norm/Div - 759 /data2vec_audio/encoder/layers.11/final_layer_norm/Mul - 762 /data2vec_audio/encoder/layers.11/final_layer_norm/Add_1 + INFO:nncf:Not adding activation input quantizer for operation: 3 __module.data2vec_audio.feature_extractor.conv_layers.0.conv/aten::_convolution/Convolution_3 + INFO:nncf:Not adding activation input quantizer for operation: 5 __module.data2vec_audio.feature_extractor.conv_layers.0.layer_norm/aten::layer_norm/MVN + 6 __module.data2vec_audio.feature_extractor.conv_layers.0.layer_norm/aten::layer_norm/Multiply + 7 __module.data2vec_audio.feature_extractor.conv_layers.0.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 10 __module.data2vec_audio.feature_extractor.conv_layers.1.conv/aten::_convolution/Convolution_33 + INFO:nncf:Not adding activation input quantizer for operation: 12 __module.data2vec_audio.feature_extractor.conv_layers.1.layer_norm/aten::layer_norm/MVN + 13 __module.data2vec_audio.feature_extractor.conv_layers.1.layer_norm/aten::layer_norm/Multiply + 14 __module.data2vec_audio.feature_extractor.conv_layers.1.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 19 __module.data2vec_audio.feature_extractor.conv_layers.2.layer_norm/aten::layer_norm/MVN + 20 __module.data2vec_audio.feature_extractor.conv_layers.2.layer_norm/aten::layer_norm/Multiply + 21 __module.data2vec_audio.feature_extractor.conv_layers.2.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 26 __module.data2vec_audio.feature_extractor.conv_layers.3.layer_norm/aten::layer_norm/MVN + 27 __module.data2vec_audio.feature_extractor.conv_layers.3.layer_norm/aten::layer_norm/Multiply + 28 __module.data2vec_audio.feature_extractor.conv_layers.3.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 33 __module.data2vec_audio.feature_extractor.conv_layers.4.layer_norm/aten::layer_norm/MVN + 34 __module.data2vec_audio.feature_extractor.conv_layers.4.layer_norm/aten::layer_norm/Multiply + 35 __module.data2vec_audio.feature_extractor.conv_layers.4.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 40 __module.data2vec_audio.feature_extractor.conv_layers.5.layer_norm/aten::layer_norm/MVN + 41 __module.data2vec_audio.feature_extractor.conv_layers.5.layer_norm/aten::layer_norm/Multiply + 42 __module.data2vec_audio.feature_extractor.conv_layers.5.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 47 __module.data2vec_audio.feature_extractor.conv_layers.6.layer_norm/aten::layer_norm/MVN + 48 __module.data2vec_audio.feature_extractor.conv_layers.6.layer_norm/aten::layer_norm/Multiply + 49 __module.data2vec_audio.feature_extractor.conv_layers.6.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 53 __module.data2vec_audio.feature_projection.layer_norm/aten::layer_norm/MVN + 54 __module.data2vec_audio.feature_projection.layer_norm/aten::layer_norm/Multiply + 55 __module.data2vec_audio.feature_projection.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 66 __module.data2vec_audio.encoder.pos_conv_embed.layers.0.layer_norm/aten::layer_norm/MVN + INFO:nncf:Not adding activation input quantizer for operation: 114 __module.data2vec_audio.encoder.pos_conv_embed.layers.1.layer_norm/aten::layer_norm/MVN + INFO:nncf:Not adding activation input quantizer for operation: 154 __module.data2vec_audio.encoder.pos_conv_embed.layers.2.layer_norm/aten::layer_norm/MVN + INFO:nncf:Not adding activation input quantizer for operation: 191 __module.data2vec_audio.encoder.pos_conv_embed.layers.3.layer_norm/aten::layer_norm/MVN + INFO:nncf:Not adding activation input quantizer for operation: 233 __module.data2vec_audio.encoder.pos_conv_embed.layers.4.layer_norm/aten::layer_norm/MVN + INFO:nncf:Not adding activation input quantizer for operation: 59 __module.data2vec_audio.encoder/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 61 __module.data2vec_audio.encoder.layer_norm/aten::layer_norm/MVN + 63 __module.data2vec_audio.encoder.layer_norm/aten::layer_norm/Multiply + 65 __module.data2vec_audio.encoder.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 71 __module.data2vec_audio.encoder.layers.0/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 78 __module.data2vec_audio.encoder.layers.0.layer_norm/aten::layer_norm/MVN + 88 __module.data2vec_audio.encoder.layers.0.layer_norm/aten::layer_norm/Multiply + 97 __module.data2vec_audio.encoder.layers.0.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 106 __module.data2vec_audio.encoder.layers.0/aten::add/Add_583 + INFO:nncf:Not adding activation input quantizer for operation: 113 __module.data2vec_audio.encoder.layers.0.final_layer_norm/aten::layer_norm/MVN + 117 __module.data2vec_audio.encoder.layers.0.final_layer_norm/aten::layer_norm/Multiply + 120 __module.data2vec_audio.encoder.layers.0.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 127 __module.data2vec_audio.encoder.layers.1/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 134 __module.data2vec_audio.encoder.layers.1.layer_norm/aten::layer_norm/MVN + 144 __module.data2vec_audio.encoder.layers.1.layer_norm/aten::layer_norm/Multiply + 153 __module.data2vec_audio.encoder.layers.1.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 162 __module.data2vec_audio.encoder.layers.1/aten::add/Add_705 + INFO:nncf:Not adding activation input quantizer for operation: 169 __module.data2vec_audio.encoder.layers.1.final_layer_norm/aten::layer_norm/MVN + 173 __module.data2vec_audio.encoder.layers.1.final_layer_norm/aten::layer_norm/Multiply + 176 __module.data2vec_audio.encoder.layers.1.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 183 __module.data2vec_audio.encoder.layers.2/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 190 __module.data2vec_audio.encoder.layers.2.layer_norm/aten::layer_norm/MVN + 200 __module.data2vec_audio.encoder.layers.2.layer_norm/aten::layer_norm/Multiply + 209 __module.data2vec_audio.encoder.layers.2.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 218 __module.data2vec_audio.encoder.layers.2/aten::add/Add_827 + INFO:nncf:Not adding activation input quantizer for operation: 225 __module.data2vec_audio.encoder.layers.2.final_layer_norm/aten::layer_norm/MVN + 229 __module.data2vec_audio.encoder.layers.2.final_layer_norm/aten::layer_norm/Multiply + 232 __module.data2vec_audio.encoder.layers.2.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 239 __module.data2vec_audio.encoder.layers.3/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 246 __module.data2vec_audio.encoder.layers.3.layer_norm/aten::layer_norm/MVN + 256 __module.data2vec_audio.encoder.layers.3.layer_norm/aten::layer_norm/Multiply + 265 __module.data2vec_audio.encoder.layers.3.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 273 __module.data2vec_audio.encoder.layers.3/aten::add/Add_949 + INFO:nncf:Not adding activation input quantizer for operation: 279 __module.data2vec_audio.encoder.layers.3.final_layer_norm/aten::layer_norm/MVN + 282 __module.data2vec_audio.encoder.layers.3.final_layer_norm/aten::layer_norm/Multiply + 284 __module.data2vec_audio.encoder.layers.3.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 290 __module.data2vec_audio.encoder.layers.4/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 296 __module.data2vec_audio.encoder.layers.4.layer_norm/aten::layer_norm/MVN + 305 __module.data2vec_audio.encoder.layers.4.layer_norm/aten::layer_norm/Multiply + 313 __module.data2vec_audio.encoder.layers.4.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 331 __module.data2vec_audio.encoder.layers.4.feed_forward.output_dense/aten::linear/MatMul_1068 + 333 __module.data2vec_audio.encoder.layers.4.feed_forward.output_dense/aten::linear/Add + + INFO:nncf:Not adding activation input quantizer for operation: 321 __module.data2vec_audio.encoder.layers.4/aten::add/Add_1071 + INFO:nncf:Not adding activation input quantizer for operation: 327 __module.data2vec_audio.encoder.layers.4.final_layer_norm/aten::layer_norm/MVN + 330 __module.data2vec_audio.encoder.layers.4.final_layer_norm/aten::layer_norm/Multiply + 332 __module.data2vec_audio.encoder.layers.4.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 338 __module.data2vec_audio.encoder.layers.5/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 344 __module.data2vec_audio.encoder.layers.5.layer_norm/aten::layer_norm/MVN + 353 __module.data2vec_audio.encoder.layers.5.layer_norm/aten::layer_norm/Multiply + 361 __module.data2vec_audio.encoder.layers.5.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 379 __module.data2vec_audio.encoder.layers.5.feed_forward.output_dense/aten::linear/MatMul_1190 + 381 __module.data2vec_audio.encoder.layers.5.feed_forward.output_dense/aten::linear/Add + + INFO:nncf:Not adding activation input quantizer for operation: 369 __module.data2vec_audio.encoder.layers.5/aten::add/Add_1193 + INFO:nncf:Not adding activation input quantizer for operation: 375 __module.data2vec_audio.encoder.layers.5.final_layer_norm/aten::layer_norm/MVN + 378 __module.data2vec_audio.encoder.layers.5.final_layer_norm/aten::layer_norm/Multiply + 380 __module.data2vec_audio.encoder.layers.5.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 386 __module.data2vec_audio.encoder.layers.6/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 392 __module.data2vec_audio.encoder.layers.6.layer_norm/aten::layer_norm/MVN + 401 __module.data2vec_audio.encoder.layers.6.layer_norm/aten::layer_norm/Multiply + 409 __module.data2vec_audio.encoder.layers.6.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 427 __module.data2vec_audio.encoder.layers.6.feed_forward.output_dense/aten::linear/MatMul_1312 + 429 __module.data2vec_audio.encoder.layers.6.feed_forward.output_dense/aten::linear/Add + + INFO:nncf:Not adding activation input quantizer for operation: 417 __module.data2vec_audio.encoder.layers.6/aten::add/Add_1315 + INFO:nncf:Not adding activation input quantizer for operation: 423 __module.data2vec_audio.encoder.layers.6.final_layer_norm/aten::layer_norm/MVN + 426 __module.data2vec_audio.encoder.layers.6.final_layer_norm/aten::layer_norm/Multiply + 428 __module.data2vec_audio.encoder.layers.6.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 434 __module.data2vec_audio.encoder.layers.7/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 440 __module.data2vec_audio.encoder.layers.7.layer_norm/aten::layer_norm/MVN + 449 __module.data2vec_audio.encoder.layers.7.layer_norm/aten::layer_norm/Multiply + 457 __module.data2vec_audio.encoder.layers.7.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 475 __module.data2vec_audio.encoder.layers.7.feed_forward.output_dense/aten::linear/MatMul_1434 + 477 __module.data2vec_audio.encoder.layers.7.feed_forward.output_dense/aten::linear/Add + + INFO:nncf:Not adding activation input quantizer for operation: 465 __module.data2vec_audio.encoder.layers.7/aten::add/Add_1437 + INFO:nncf:Not adding activation input quantizer for operation: 471 __module.data2vec_audio.encoder.layers.7.final_layer_norm/aten::layer_norm/MVN + 474 __module.data2vec_audio.encoder.layers.7.final_layer_norm/aten::layer_norm/Multiply + 476 __module.data2vec_audio.encoder.layers.7.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 482 __module.data2vec_audio.encoder.layers.8/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 488 __module.data2vec_audio.encoder.layers.8.layer_norm/aten::layer_norm/MVN + 497 __module.data2vec_audio.encoder.layers.8.layer_norm/aten::layer_norm/Multiply + 505 __module.data2vec_audio.encoder.layers.8.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 523 __module.data2vec_audio.encoder.layers.8.feed_forward.output_dense/aten::linear/MatMul_1556 + 525 __module.data2vec_audio.encoder.layers.8.feed_forward.output_dense/aten::linear/Add + + INFO:nncf:Not adding activation input quantizer for operation: 513 __module.data2vec_audio.encoder.layers.8/aten::add/Add_1559 + INFO:nncf:Not adding activation input quantizer for operation: 519 __module.data2vec_audio.encoder.layers.8.final_layer_norm/aten::layer_norm/MVN + 522 __module.data2vec_audio.encoder.layers.8.final_layer_norm/aten::layer_norm/Multiply + 524 __module.data2vec_audio.encoder.layers.8.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 530 __module.data2vec_audio.encoder.layers.9/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 536 __module.data2vec_audio.encoder.layers.9.layer_norm/aten::layer_norm/MVN + 545 __module.data2vec_audio.encoder.layers.9.layer_norm/aten::layer_norm/Multiply + 553 __module.data2vec_audio.encoder.layers.9.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 561 __module.data2vec_audio.encoder.layers.9/aten::add/Add_1681 + INFO:nncf:Not adding activation input quantizer for operation: 567 __module.data2vec_audio.encoder.layers.9.final_layer_norm/aten::layer_norm/MVN + 570 __module.data2vec_audio.encoder.layers.9.final_layer_norm/aten::layer_norm/Multiply + 572 __module.data2vec_audio.encoder.layers.9.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 578 __module.data2vec_audio.encoder.layers.10/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 584 __module.data2vec_audio.encoder.layers.10.layer_norm/aten::layer_norm/MVN + 593 __module.data2vec_audio.encoder.layers.10.layer_norm/aten::layer_norm/Multiply + 601 __module.data2vec_audio.encoder.layers.10.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 609 __module.data2vec_audio.encoder.layers.10/aten::add/Add_1803 + INFO:nncf:Not adding activation input quantizer for operation: 615 __module.data2vec_audio.encoder.layers.10.final_layer_norm/aten::layer_norm/MVN + 618 __module.data2vec_audio.encoder.layers.10.final_layer_norm/aten::layer_norm/Multiply + 620 __module.data2vec_audio.encoder.layers.10.final_layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 626 __module.data2vec_audio.encoder.layers.11/aten::add/Add + INFO:nncf:Not adding activation input quantizer for operation: 632 __module.data2vec_audio.encoder.layers.11.layer_norm/aten::layer_norm/MVN + 641 __module.data2vec_audio.encoder.layers.11.layer_norm/aten::layer_norm/Multiply + 649 __module.data2vec_audio.encoder.layers.11.layer_norm/aten::layer_norm/Add + + INFO:nncf:Not adding activation input quantizer for operation: 657 __module.data2vec_audio.encoder.layers.11/aten::add/Add_1925 + INFO:nncf:Not adding activation input quantizer for operation: 663 __module.data2vec_audio.encoder.layers.11.final_layer_norm/aten::layer_norm/MVN + 666 __module.data2vec_audio.encoder.layers.11.final_layer_norm/aten::layer_norm/Multiply + 668 __module.data2vec_audio.encoder.layers.11.final_layer_norm/aten::layer_norm/Add .. parsed-literal:: - Statistics collection: 100%|██████████| 73/73 [00:37<00:00, 1.93it/s] - Biases correction: 100%|██████████| 74/74 [00:16<00:00, 4.60it/s] + Statistics collection: 100%|████████████████████| 73/73 [00:19<00:00, 3.70it/s] + Biases correction: 100%|████████████████████████| 74/74 [00:22<00:00, 3.34it/s] After quantization is finished, compressed model representation can be -saved using ``serialize`` function. +saved using ``ov.save_model`` function. .. code:: ipython3 MODEL_NAME = 'quantized_data2vec_base' quantized_model_path = Path(f"{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml") - ov.save_model(quantized_model, str(quantized_model_path)) + ov.save_model(quantized_model, quantized_model_path) -Check INT8 model inference result -############################################################################################################################### +Check INT8 model inference result +--------------------------------------------------------------------------- ``INT8`` model is the same in usage like the original one. We need to read it, using the ``core.read_model`` method and load on the device, @@ -617,8 +587,8 @@ using ``core.compile_model``. After that, we can reuse the same .. parsed-literal:: - [Reference]: MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL - [OpenVINO INT8]: MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL + [Reference]: BECAUSE YOU WERE SLEEPING INSTEAD OF CONQUERING THE LOVELY ROSE PRINCESS HAS BECOME A FIDDLE WITHOUT A BOW WHILE POOR SHAGGY SITS THERE A COOING DOVE + [OpenVINO INT8]: BECAUSE YOU WERE SLEEPING INSTEAD OF CONQUERING THE LOVELY RUSE PRINCESS HAS BECOME A FIDDLE WITHOUT A BOW A POORA SHAGGY SITS THERE A COOING DOVE @@ -627,24 +597,22 @@ using ``core.compile_model``. After that, we can reuse the same -Compare Performance of the Original and Quantized Models -############################################################################################################################### +Compare Performance of the Original and Quantized Models +-------------------------------------------------------------------------------------------------- `Benchmark Tool `__ is used to measure the inference performance of the ``FP16`` and ``INT8`` models. -.. note:: - - For more accurate performance, it is recommended to run + **NOTE**: For more accurate performance, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on CPU for one minute. Change ``CPU`` to ``GPU`` to @@ -663,41 +631,41 @@ is used to measure the inference performance of the ``FP16`` and [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] Device info: [ INFO ] CPU - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 34.19 ms + [ INFO ] Read model took 41.32 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [?,?] + [ INFO ] input_values (node: input_values) : f32 / [...] / [?,?] [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [?,?,32] + [ INFO ] logits , 819 (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [?,?,32] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 - [ INFO ] Reshaping model: 'inputs': [1,30480] - [ INFO ] Reshape model took 14.25 ms + [ INFO ] Reshaping model: 'input_values': [1,30480] + [ INFO ] Reshape model took 45.40 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [1,30480] + [ INFO ] input_values (node: input_values) : f32 / [...] / [1,30480] [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [1,95,32] + [ INFO ] logits , 819 (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [1,95,32] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 510.06 ms + [ INFO ] Compile model took 719.60 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] NUM_STREAMS: 6 - [ INFO ] AFFINITY: Affinity.HYBRID_AWARE - [ INFO ] INFERENCE_NUM_THREADS: 18 + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 36 [ INFO ] PERF_COUNT: False [ INFO ] INFERENCE_PRECISION_HINT: [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT @@ -707,22 +675,24 @@ is used to measure the inference performance of the ``FP16`` and [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE [ INFO ] ENABLE_HYPER_THREADING: True [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'inputs'!. This input will be filled with random values! - [ INFO ] Fill input 'inputs' with random values - [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ WARNING ] No input files were given for input 'input_values'!. This input will be filled with random values! + [ INFO ] Fill input 'input_values' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 110.84 ms + [ INFO ] First inference took 81.52 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 360 iterations - [ INFO ] Duration: 15162.56 ms + [ INFO ] Count: 732 iterations + [ INFO ] Duration: 15397.52 ms [ INFO ] Latency: - [ INFO ] Median: 219.94 ms - [ INFO ] Average: 251.64 ms - [ INFO ] Min: 154.25 ms - [ INFO ] Max: 378.39 ms - [ INFO ] Throughput: 23.74 FPS + [ INFO ] Median: 250.65 ms + [ INFO ] Average: 251.11 ms + [ INFO ] Min: 131.68 ms + [ INFO ] Max: 317.63 ms + [ INFO ] Throughput: 47.54 FPS .. code:: ipython3 @@ -737,41 +707,41 @@ is used to measure the inference performance of the ``FP16`` and [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] Device info: [ INFO ] CPU - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 67.14 ms + [ INFO ] Read model took 58.07 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [?,?] + [ INFO ] input_values (node: input_values) : f32 / [...] / [?,?] [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [?,?,32] + [ INFO ] 819 , logits (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [?,?,32] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 - [ INFO ] Reshaping model: 'inputs': [1,30480] - [ INFO ] Reshape model took 32.77 ms + [ INFO ] Reshaping model: 'input_values': [1,30480] + [ INFO ] Reshape model took 58.17 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [1,30480] + [ INFO ] input_values (node: input_values) : f32 / [...] / [1,30480] [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [1,95,32] + [ INFO ] 819 , logits (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [1,95,32] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 731.37 ms + [ INFO ] Compile model took 1027.08 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] NUM_STREAMS: 6 - [ INFO ] AFFINITY: Affinity.HYBRID_AWARE - [ INFO ] INFERENCE_NUM_THREADS: 18 + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 36 [ INFO ] PERF_COUNT: False [ INFO ] INFERENCE_PRECISION_HINT: [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT @@ -781,26 +751,28 @@ is used to measure the inference performance of the ``FP16`` and [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE [ INFO ] ENABLE_HYPER_THREADING: True [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'inputs'!. This input will be filled with random values! - [ INFO ] Fill input 'inputs' with random values - [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ WARNING ] No input files were given for input 'input_values'!. This input will be filled with random values! + [ INFO ] Fill input 'input_values' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 72.68 ms + [ INFO ] First inference took 58.39 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 582 iterations - [ INFO ] Duration: 15220.80 ms + [ INFO ] Count: 1008 iterations + [ INFO ] Duration: 15179.89 ms [ INFO ] Latency: - [ INFO ] Median: 135.06 ms - [ INFO ] Average: 156.14 ms - [ INFO ] Min: 124.62 ms - [ INFO ] Max: 232.68 ms - [ INFO ] Throughput: 38.24 FPS + [ INFO ] Median: 179.72 ms + [ INFO ] Average: 179.93 ms + [ INFO ] Min: 134.90 ms + [ INFO ] Max: 241.15 ms + [ INFO ] Throughput: 66.40 FPS -Compare Accuracy of the Original and Quantized Models -############################################################################################################################### +Compare Accuracy of the Original and Quantized Models +----------------------------------------------------------------------------------------------- Finally, calculate WER metric for the ``INT8`` model representation and compare it with the ``FP16`` result. @@ -821,5 +793,5 @@ compare it with the ``FP16`` result. .. parsed-literal:: [OpenVino FP16] Word Error Rate: 0.03826 - [OpenVino INT8] Word Error Rate: 0.0487 + [OpenVino INT8] Word Error Rate: 0.0383 diff --git a/docs/notebooks/107-speech-recognition-quantization-wav2vec2-with-output.rst b/docs/notebooks/107-speech-recognition-quantization-wav2vec2-with-output.rst deleted file mode 100644 index d911dac20bf770..00000000000000 --- a/docs/notebooks/107-speech-recognition-quantization-wav2vec2-with-output.rst +++ /dev/null @@ -1,925 +0,0 @@ -Quantize Speech Recognition Models using NNCF PTQ API -===================================================== - -This tutorial demonstrates how to apply ``INT8`` quantization to the -speech recognition model, known as -`Wav2Vec2 `__, -using the NNCF (Neural Network Compression Framework) 8-bit quantization -in post-training mode (without the fine-tuning pipeline). This notebook -uses a fine-tuned -`Wav2Vec2-Base-960h `__ -`PyTorch `__ model trained on the `LibriSpeech ASR -corpus `__. The tutorial is designed to be -extendable to custom models and datasets. It consists of the following -steps: - -- Download and prepare the Wav2Vec2 model and LibriSpeech dataset. -- Define data loading and accuracy validation functionality. -- Model quantization. -- Compare Accuracy of original PyTorch model, OpenVINO FP16 and INT8 - models. -- Compare performance of the original and quantized models. - -**Table of contents:** - -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Prepare the Model <#prepare-the-model>`__ -- `Prepare LibriSpeech Dataset <#prepare-librispeech-dataset>`__ -- `Define DataLoader <#define-dataloader>`__ -- `Run Quantization <#run-quantization>`__ -- `Model Usage Example with Inference Pipeline <#model-usage-example-with-inference-pipeline>`__ -- `Validate model accuracy on dataset <#validate-model-accuracy-on-dataset>`__ -- `Compare Performance of the Original and Quantized Models <#compare-performance-of-the-original-and-quantized-models>`__ - -.. code:: ipython3 - - !pip install -q "openvino==2023.1.0.dev20230811" "nncf>=2.5.0" - !pip install -q soundfile librosa transformers onnx - -Imports -############################################################################################################################### - -.. code:: ipython3 - - import os - import sys - import re - import numpy as np - import openvino as ov - import tarfile - import torch - from itertools import groupby - import soundfile as sf - import IPython.display as ipd - - from transformers import Wav2Vec2ForCTC - - sys.path.append("../utils") - from notebook_utils import download_file - - -.. parsed-literal:: - - 2023-09-08 22:38:42.752981: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:38:42.787924: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:38:43.332490: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Settings -############################################################################################################################### - -.. code:: ipython3 - - from pathlib import Path - - # Set the data and model directories, model source URL and model filename. - MODEL_DIR = Path("model") - DATA_DIR = Path("../data/datasets/librispeech") - MODEL_DIR.mkdir(exist_ok=True) - DATA_DIR.mkdir(exist_ok=True) - -Prepare the Model -############################################################################################################################### - -Perform the following: - Download and unpack a pre-trained Wav2Vec2 -model. - Convert the model to ONNX. - Run model conversion API to -convert the model from the ONNX representation to the OpenVINO -Intermediate Representation (OpenVINO IR). - -.. code:: ipython3 - - download_file("https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/pytorch_model.bin", directory=Path(MODEL_DIR) / 'pytorch', show_progress=True) - download_file("https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json", directory=Path(MODEL_DIR) / 'pytorch', show_progress=False) - - - -.. parsed-literal:: - - model/pytorch/pytorch_model.bin: 0%| | 0.00/360M [00:00= self.samples_limit: - # Limit exceeded - return - -Run Quantization -############################################################################################################################### - -`NNCF `__ provides a suite of -advanced algorithms for Neural Networks inference optimization in -OpenVINO with minimal accuracy drop. - -Create a quantized model from the pre-trained ``FP16`` model and the -calibration dataset. The optimization process contains the following -steps: 1. Create a Dataset for quantization. 2. Run ``nncf.quantize`` -for getting an optimized model. The ``nncf.quantize`` function provides -an interface for model quantization. It requires an instance of the -OpenVINO Model and quantization dataset. Optionally, some additional -parameters for the configuration quantization process (number of samples -for quantization, preset, ignored scope, etc.) can be provided. For more -accurate results, we should keep the operation in the postprocessing -subgraph in floating point precision, using the ``ignored_scope`` -parameter. ``advanced_parameters`` can be used to specify advanced -quantization parameters for fine-tuning the quantization algorithm. In -this tutorial we pass range estimator parameters for activations. For -more information see `Tune quantization -parameters `__. -3. Serialize OpenVINO IR model using ``openvino.runtime.serialize`` -function. - -.. code:: ipython3 - - import nncf - from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters, RangeEstimatorParameters - from nncf.quantization.range_estimator import StatisticsCollectorParameters, StatisticsType, AggregatorType - from nncf.parameters import ModelType - - - def transform_fn(data_item): - """ - Extract the model's input from the data item. - The data item here is the data item that is returned from the data source per iteration. - This function should be passed when the data item cannot be used as model's input. - """ - _, inputs = data_item - - return inputs["inputs"] - - - dataset_config = {"data_source": os.path.join(DATA_DIR, "LibriSpeech/dev-clean")} - data_loader = LibriSpeechDataLoader(dataset_config, samples_limit=300) - calibration_dataset = nncf.Dataset(data_loader, transform_fn) - - - quantized_model = nncf.quantize( - ov_model, - calibration_dataset, - model_type=ModelType.TRANSFORMER, # specify additional transformer patterns in the model - ignored_scope=nncf.IgnoredScope( - names=[ - '/wav2vec2/feature_extractor/conv_layers.1/conv/Conv', - '/wav2vec2/feature_extractor/conv_layers.2/conv/Conv', - '/wav2vec2/encoder/layers.7/feed_forward/output_dense/MatMul' - ], - ), - advanced_parameters=AdvancedQuantizationParameters( - activations_range_estimator_params=RangeEstimatorParameters( - min=StatisticsCollectorParameters( - statistics_type=StatisticsType.MIN, - aggregator_type=AggregatorType.MIN - ), - max=StatisticsCollectorParameters( - statistics_type=StatisticsType.QUANTILE, - aggregator_type=AggregatorType.MEAN, - quantile_outlier_prob=0.0001 - ), - ) - ) - ) - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - INFO:nncf:3 ignored nodes was found by name in the NNCFGraph - INFO:nncf:193 ignored nodes was found by types in the NNCFGraph - INFO:nncf:24 ignored nodes was found by name in the NNCFGraph - INFO:nncf:Not adding activation input quantizer for operation: 5 MVN_224 - INFO:nncf:Not adding activation input quantizer for operation: 7 /wav2vec2/feature_extractor/conv_layers.0/layer_norm/Mul - 8 /wav2vec2/feature_extractor/conv_layers.0/layer_norm/Add - - INFO:nncf:Not adding activation input quantizer for operation: 10 /wav2vec2/feature_extractor/conv_layers.1/conv/Conv - INFO:nncf:Not adding activation input quantizer for operation: 12 /wav2vec2/feature_extractor/conv_layers.2/conv/Conv - INFO:nncf:Not adding activation input quantizer for operation: 23 /wav2vec2/feature_projection/layer_norm/Div - 24 /wav2vec2/feature_projection/layer_norm/Mul - 25 /wav2vec2/feature_projection/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 28 /wav2vec2/encoder/Add - INFO:nncf:Not adding activation input quantizer for operation: 30 /wav2vec2/encoder/layer_norm/Div - 32 /wav2vec2/encoder/layer_norm/Mul - 34 /wav2vec2/encoder/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 36 /wav2vec2/encoder/layers.0/Add - INFO:nncf:Not adding activation input quantizer for operation: 42 /wav2vec2/encoder/layers.0/layer_norm/Div - 49 /wav2vec2/encoder/layers.0/layer_norm/Mul - 58 /wav2vec2/encoder/layers.0/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 66 /wav2vec2/encoder/layers.0/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 74 /wav2vec2/encoder/layers.0/final_layer_norm/Div - 79 /wav2vec2/encoder/layers.0/final_layer_norm/Mul - 82 /wav2vec2/encoder/layers.0/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 84 /wav2vec2/encoder/layers.1/Add - INFO:nncf:Not adding activation input quantizer for operation: 90 /wav2vec2/encoder/layers.1/layer_norm/Div - 96 /wav2vec2/encoder/layers.1/layer_norm/Mul - 105 /wav2vec2/encoder/layers.1/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 113 /wav2vec2/encoder/layers.1/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 121 /wav2vec2/encoder/layers.1/final_layer_norm/Div - 126 /wav2vec2/encoder/layers.1/final_layer_norm/Mul - 129 /wav2vec2/encoder/layers.1/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 131 /wav2vec2/encoder/layers.2/Add - INFO:nncf:Not adding activation input quantizer for operation: 137 /wav2vec2/encoder/layers.2/layer_norm/Div - 143 /wav2vec2/encoder/layers.2/layer_norm/Mul - 152 /wav2vec2/encoder/layers.2/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 160 /wav2vec2/encoder/layers.2/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 168 /wav2vec2/encoder/layers.2/final_layer_norm/Div - 173 /wav2vec2/encoder/layers.2/final_layer_norm/Mul - 176 /wav2vec2/encoder/layers.2/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 178 /wav2vec2/encoder/layers.3/Add - INFO:nncf:Not adding activation input quantizer for operation: 184 /wav2vec2/encoder/layers.3/layer_norm/Div - 190 /wav2vec2/encoder/layers.3/layer_norm/Mul - 199 /wav2vec2/encoder/layers.3/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 207 /wav2vec2/encoder/layers.3/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 215 /wav2vec2/encoder/layers.3/final_layer_norm/Div - 220 /wav2vec2/encoder/layers.3/final_layer_norm/Mul - 223 /wav2vec2/encoder/layers.3/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 225 /wav2vec2/encoder/layers.4/Add - INFO:nncf:Not adding activation input quantizer for operation: 231 /wav2vec2/encoder/layers.4/layer_norm/Div - 237 /wav2vec2/encoder/layers.4/layer_norm/Mul - 246 /wav2vec2/encoder/layers.4/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 254 /wav2vec2/encoder/layers.4/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 262 /wav2vec2/encoder/layers.4/final_layer_norm/Div - 267 /wav2vec2/encoder/layers.4/final_layer_norm/Mul - 270 /wav2vec2/encoder/layers.4/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 272 /wav2vec2/encoder/layers.5/Add - INFO:nncf:Not adding activation input quantizer for operation: 278 /wav2vec2/encoder/layers.5/layer_norm/Div - 284 /wav2vec2/encoder/layers.5/layer_norm/Mul - 293 /wav2vec2/encoder/layers.5/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 301 /wav2vec2/encoder/layers.5/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 309 /wav2vec2/encoder/layers.5/final_layer_norm/Div - 314 /wav2vec2/encoder/layers.5/final_layer_norm/Mul - 317 /wav2vec2/encoder/layers.5/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 319 /wav2vec2/encoder/layers.6/Add - INFO:nncf:Not adding activation input quantizer for operation: 325 /wav2vec2/encoder/layers.6/layer_norm/Div - 331 /wav2vec2/encoder/layers.6/layer_norm/Mul - 340 /wav2vec2/encoder/layers.6/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 348 /wav2vec2/encoder/layers.6/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 356 /wav2vec2/encoder/layers.6/final_layer_norm/Div - 361 /wav2vec2/encoder/layers.6/final_layer_norm/Mul - 364 /wav2vec2/encoder/layers.6/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 366 /wav2vec2/encoder/layers.7/Add - INFO:nncf:Not adding activation input quantizer for operation: 372 /wav2vec2/encoder/layers.7/layer_norm/Div - 378 /wav2vec2/encoder/layers.7/layer_norm/Mul - 387 /wav2vec2/encoder/layers.7/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 412 /wav2vec2/encoder/layers.7/feed_forward/output_dense/MatMul - 418 /wav2vec2/encoder/layers.7/feed_forward/output_dense/Add - - INFO:nncf:Not adding activation input quantizer for operation: 395 /wav2vec2/encoder/layers.7/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 403 /wav2vec2/encoder/layers.7/final_layer_norm/Div - 408 /wav2vec2/encoder/layers.7/final_layer_norm/Mul - 411 /wav2vec2/encoder/layers.7/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 413 /wav2vec2/encoder/layers.8/Add - INFO:nncf:Not adding activation input quantizer for operation: 419 /wav2vec2/encoder/layers.8/layer_norm/Div - 425 /wav2vec2/encoder/layers.8/layer_norm/Mul - 434 /wav2vec2/encoder/layers.8/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 442 /wav2vec2/encoder/layers.8/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 450 /wav2vec2/encoder/layers.8/final_layer_norm/Div - 455 /wav2vec2/encoder/layers.8/final_layer_norm/Mul - 458 /wav2vec2/encoder/layers.8/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 460 /wav2vec2/encoder/layers.9/Add - INFO:nncf:Not adding activation input quantizer for operation: 466 /wav2vec2/encoder/layers.9/layer_norm/Div - 472 /wav2vec2/encoder/layers.9/layer_norm/Mul - 481 /wav2vec2/encoder/layers.9/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 489 /wav2vec2/encoder/layers.9/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 497 /wav2vec2/encoder/layers.9/final_layer_norm/Div - 502 /wav2vec2/encoder/layers.9/final_layer_norm/Mul - 505 /wav2vec2/encoder/layers.9/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 507 /wav2vec2/encoder/layers.10/Add - INFO:nncf:Not adding activation input quantizer for operation: 513 /wav2vec2/encoder/layers.10/layer_norm/Div - 519 /wav2vec2/encoder/layers.10/layer_norm/Mul - 528 /wav2vec2/encoder/layers.10/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 536 /wav2vec2/encoder/layers.10/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 544 /wav2vec2/encoder/layers.10/final_layer_norm/Div - 549 /wav2vec2/encoder/layers.10/final_layer_norm/Mul - 552 /wav2vec2/encoder/layers.10/final_layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 554 /wav2vec2/encoder/layers.11/Add - INFO:nncf:Not adding activation input quantizer for operation: 560 /wav2vec2/encoder/layers.11/layer_norm/Div - 566 /wav2vec2/encoder/layers.11/layer_norm/Mul - 575 /wav2vec2/encoder/layers.11/layer_norm/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 583 /wav2vec2/encoder/layers.11/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 591 /wav2vec2/encoder/layers.11/final_layer_norm/Div - 596 /wav2vec2/encoder/layers.11/final_layer_norm/Mul - 599 /wav2vec2/encoder/layers.11/final_layer_norm/Add_1 - - - -.. parsed-literal:: - - Statistics collection: 100%|██████████| 300/300 [02:51<00:00, 1.75it/s] - Biases correction: 100%|██████████| 74/74 [00:25<00:00, 2.96it/s] - - -.. code:: ipython3 - - MODEL_NAME = 'quantized_wav2vec2_base' - quantized_model_path = Path(f"{MODEL_NAME}_openvino_model/{MODEL_NAME}_quantized.xml") - ov.save_model(quantized_model, str(quantized_model_path)) - -Model Usage Example with Inference Pipeline -############################################################################################################################### - -Both initial (``FP16``) and quantized (``INT8``) models are exactly the -same in use. - -Start with taking one example from the dataset to show inference steps -for it. - -Next, load the quantized model to the inference pipeline. - -.. code:: ipython3 - - audio = LibriSpeechDataLoader.read_flac(f'{DATA_DIR}/LibriSpeech/test-clean/121/127105/121-127105-0017.flac') - - ipd.Audio(audio, rate=16000) - - - - -.. raw:: html - - - - - - - -.. code:: ipython3 - - core = ov.Core() - - compiled_model = core.compile_model(model=quantized_model, device_name='CPU') - - input_data = np.expand_dims(audio, axis=0) - output_layer = compiled_model.outputs[0] - -Next, make a prediction. - -.. code:: ipython3 - - predictions = compiled_model([input_data])[output_layer] - -Validate model accuracy on dataset -############################################################################################################################### - -The code below is used for running model inference on a single sample -from the dataset. It contains the following steps: - -- Define ``MetricWER`` class to calculate Word Error Rate. -- Define dataloader for test dataset. -- Define functions to get inference for PyTorch and OpenVINO models. -- Define functions to compute Word Error Rate. - -.. code:: ipython3 - - class MetricWER: - alphabet = [ - "", "", "", "", "|", - "e", "t", "a", "o", "n", "i", "h", "s", "r", "d", "l", "u", - "m", "w", "c", "f", "g", "y", "p", "b", "v", "k", "'", "x", "j", "q", "z"] - words_delimiter = '|' - pad_token = '' - - # Required methods - def __init__(self): - self._name = "WER" - self._sum_score = 0 - self._sum_words = 0 - self._cur_score = 0 - self._decoding_vocab = dict(enumerate(self.alphabet)) - - @property - def value(self): - """Returns accuracy metric value for the last model output.""" - return {self._name: self._cur_score} - - @property - def avg_value(self): - """Returns accuracy metric value for all model outputs.""" - return {self._name: self._sum_score / self._sum_words if self._sum_words != 0 else 0} - - def update(self, output, target): - """ - Updates prediction matches. - - :param output: model output - :param target: annotations - """ - decoded = [decode_logits(i) for i in output] - target = [i.lower() for i in target] - assert len(output) == len(target), "sizes of output and target mismatch!" - for i in range(len(output)): - self._get_metric_per_sample(decoded[i], target[i]) - - def reset(self): - """ - Resets collected matches - """ - self._sum_score = 0 - self._sum_words = 0 - - def get_attributes(self): - """ - Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}. - Required attributes: 'direction': 'higher-better' or 'higher-worse' - 'type': metric type - """ - return {self._name: {"direction": "higher-worse", "type": "WER"}} - - # Methods specific to the current implementation - def _get_metric_per_sample(self, annotation, prediction): - cur_score = self._editdistance_eval(annotation.split(), prediction.split()) - cur_words = len(annotation.split()) - - self._sum_score += cur_score - self._sum_words += cur_words - self._cur_score = cur_score / cur_words - - result = cur_score / cur_words if cur_words != 0 else 0 - return result - - def _editdistance_eval(self, source, target): - n, m = len(source), len(target) - - distance = np.zeros((n + 1, m + 1), dtype=int) - distance[:, 0] = np.arange(0, n + 1) - distance[0, :] = np.arange(0, m + 1) - - for i in range(1, n + 1): - for j in range(1, m + 1): - cost = 0 if source[i - 1] == target[j - 1] else 1 - - distance[i][j] = min(distance[i - 1][j] + 1, - distance[i][j - 1] + 1, - distance[i - 1][j - 1] + cost) - return distance[n][m] - -Now, you just need to decode predicted probabilities to text, using -tokenizer ``decode_logits``. - -Alternatively, use a built-in ``Wav2Vec2Processor`` tokenizer from the -``transformers`` package. - -.. code:: ipython3 - - def decode_logits(logits): - decoding_vocab = dict(enumerate(MetricWER.alphabet)) - token_ids = np.squeeze(np.argmax(logits, -1)) - tokens = [decoding_vocab[idx] for idx in token_ids] - tokens = [token_group[0] for token_group in groupby(tokens)] - tokens = [t for t in tokens if t != MetricWER.pad_token] - res_string = ''.join([t if t != MetricWER.words_delimiter else ' ' for t in tokens]).strip() - res_string = ' '.join(res_string.split(' ')) - res_string = res_string.lower() - return res_string - - - predicted_text = decode_logits(predictions) - predicted_text - - - - -.. parsed-literal:: - - 'it was almost the tone of hope everybody will stay' - - - -.. code:: ipython3 - - from tqdm.notebook import tqdm - - import numpy as np - - - dataset_config = {"data_source": os.path.join(DATA_DIR, "LibriSpeech/test-clean")} - test_data_loader = LibriSpeechDataLoader(dataset_config, samples_limit=300) - - - # inference function for pytorch - def torch_infer(model, sample): - output = model(torch.Tensor(sample[1]['inputs'])).logits - output = output.detach().cpu().numpy() - - return output - - - # inference function for openvino - def ov_infer(model, sample): - output = model.output(0) - output = model(np.array(sample[1]['inputs']))[output] - - return output - - - def compute_wer(dataset, model, infer_fn): - wer = MetricWER() - for sample in tqdm(dataset): - # run infer function on sample - output = infer_fn(model, sample) - # update metric on sample result - wer.update(output, [sample[0][1]]) - - return wer.avg_value - -Now, compute WER for the original PyTorch model, OpenVINO IR model and -quantized model. - -.. code:: ipython3 - - compiled_fp32_ov_model = core.compile_model(ov_model) - - pt_result = compute_wer(test_data_loader, torch_model, torch_infer) - ov_fp32_result = compute_wer(test_data_loader, compiled_fp32_ov_model, ov_infer) - quantized_result = compute_wer(test_data_loader, compiled_model, ov_infer) - - print(f'[PyTorch] Word Error Rate: {pt_result["WER"]:.4f}') - print(f'[OpenVino] Word Error Rate: {ov_fp32_result["WER"]:.4f}') - print(f'[Quantized OpenVino] Word Error Rate: {quantized_result["WER"]:.4f}') - - - -.. parsed-literal:: - - 0%| | 0/300 [00:00`__ -to measure the inference performance of the ``FP16`` and ``INT8`` -models. - -.. note:: - - For more accurate performance, it is recommended to run - ``benchmark_app`` in a terminal/command prompt after closing other - applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark - async inference on CPU for one minute. Change ``CPU`` to ``GPU`` to - benchmark on GPU. Run ``benchmark_app --help`` to see an overview of - all command-line options. - -.. code:: ipython3 - - # Inference FP16 model (OpenVINO IR) - ! benchmark_app -m $ir_model_path -shape [1,30480] -d CPU -api async - - -.. parsed-literal:: - - [Step 1/11] Parsing and validating input arguments - [ INFO ] Parsing input parameters - [Step 2/11] Loading OpenVINO Runtime - [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 - [ INFO ] - [ INFO ] Device info: - [ INFO ] CPU - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 - [ INFO ] - [ INFO ] - [Step 3/11] Setting device configuration - [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. - [Step 4/11] Reading model files - [ INFO ] Loading model files - [ INFO ] Read model took 61.48 ms - [ INFO ] Original model I/O parameters: - [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [?,?] - [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [?,?,32] - [Step 5/11] Resizing model to match image sizes and given batch - [ INFO ] Model batch size: 1 - [ INFO ] Reshaping model: 'inputs': [1,30480] - [ INFO ] Reshape model took 28.87 ms - [Step 6/11] Configuring input of the model - [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [1,30480] - [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [1,95,32] - [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 644.15 ms - [Step 8/11] Querying optimal runtime parameters - [ INFO ] Model: - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] NUM_STREAMS: 6 - [ INFO ] AFFINITY: Affinity.CORE - [ INFO ] INFERENCE_NUM_THREADS: 24 - [ INFO ] PERF_COUNT: False - [ INFO ] INFERENCE_PRECISION_HINT: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT - [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE - [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] ENABLE_CPU_PINNING: True - [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE - [ INFO ] ENABLE_HYPER_THREADING: True - [ INFO ] EXECUTION_DEVICES: ['CPU'] - [ INFO ] CPU_DENORMALS_OPTIMIZATION: False - [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 - [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'inputs'!. This input will be filled with random values! - [ INFO ] Fill input 'inputs' with random values - [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 60000 ms duration) - [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 69.35 ms - [Step 11/11] Dumping statistics report - [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 2748 iterations - [ INFO ] Duration: 60151.82 ms - [ INFO ] Latency: - [ INFO ] Median: 131.23 ms - [ INFO ] Average: 131.13 ms - [ INFO ] Min: 67.66 ms - [ INFO ] Max: 145.43 ms - [ INFO ] Throughput: 45.68 FPS - - -.. code:: ipython3 - - # Inference INT8 model (OpenVINO IR) - ! benchmark_app -m $quantized_model_path -shape [1,30480] -d CPU -api async - - -.. parsed-literal:: - - [Step 1/11] Parsing and validating input arguments - [ INFO ] Parsing input parameters - [Step 2/11] Loading OpenVINO Runtime - [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 - [ INFO ] - [ INFO ] Device info: - [ INFO ] CPU - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 - [ INFO ] - [ INFO ] - [Step 3/11] Setting device configuration - [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. - [Step 4/11] Reading model files - [ INFO ] Loading model files - [ INFO ] Read model took 81.97 ms - [ INFO ] Original model I/O parameters: - [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [?,?] - [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [?,?,32] - [Step 5/11] Resizing model to match image sizes and given batch - [ INFO ] Model batch size: 1 - [ INFO ] Reshaping model: 'inputs': [1,30480] - [ INFO ] Reshape model took 35.47 ms - [Step 6/11] Configuring input of the model - [ INFO ] Model inputs: - [ INFO ] inputs (node: inputs) : f32 / [...] / [1,30480] - [ INFO ] Model outputs: - [ INFO ] logits (node: logits) : f32 / [...] / [1,95,32] - [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 920.18 ms - [Step 8/11] Querying optimal runtime parameters - [ INFO ] Model: - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] NUM_STREAMS: 6 - [ INFO ] AFFINITY: Affinity.CORE - [ INFO ] INFERENCE_NUM_THREADS: 24 - [ INFO ] PERF_COUNT: False - [ INFO ] INFERENCE_PRECISION_HINT: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT - [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE - [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] ENABLE_CPU_PINNING: True - [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE - [ INFO ] ENABLE_HYPER_THREADING: True - [ INFO ] EXECUTION_DEVICES: ['CPU'] - [ INFO ] CPU_DENORMALS_OPTIMIZATION: False - [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 - [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'inputs'!. This input will be filled with random values! - [ INFO ] Fill input 'inputs' with random values - [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 60000 ms duration) - [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 52.31 ms - [Step 11/11] Dumping statistics report - [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 4500 iterations - [ INFO ] Duration: 60105.34 ms - [ INFO ] Latency: - [ INFO ] Median: 79.88 ms - [ INFO ] Average: 79.99 ms - [ INFO ] Min: 47.16 ms - [ INFO ] Max: 106.32 ms - [ INFO ] Throughput: 74.87 FPS - diff --git a/docs/notebooks/108-gpu-device-with-output.rst b/docs/notebooks/108-gpu-device-with-output.rst index b376e60d8c9b70..d5c7910c38dd9a 100644 --- a/docs/notebooks/108-gpu-device-with-output.rst +++ b/docs/notebooks/108-gpu-device-with-output.rst @@ -3,49 +3,66 @@ Working with GPUs in OpenVINO™ **Table of contents:** -- `Introduction <#introduction>`__ - - `Install required packages <#install-required-packages>`__ - -- `Checking GPUs with Query Device <#checking-gpus-with-query-device>`__ - - - `List GPUs with core.available_devices <#list-gpus-with-core.available_devices>`__ - - `Check Properties with core.get_property <#check-properties-with-core.get_property>`__ - - `Brief Descriptions of Key Properties <#brief-descriptions-of-key-properties>`__ - -- `Compiling a Model on GPU <#compiling-a-model-on-gpu>`__ - - - `Download and Convert a Model <#download-and-convert-a-model>`__ - - - `Download and unpack the Model <#download-and-unpack-the-model>`__ - - `Convert the Model to OpenVINO IR format <#convert-the-model-to-openvino-ir-format>`__ - - - `Compile with Default Configuration <#compile-with-default-configuration>`__ - - `Reduce Compile Time through Model Caching <#reduce-compile-time-through-model-caching>`__ - - `Throughput and Latency Performance Hints <#throughput-and-latency-performance-hints>`__ - - `Using Multiple GPUs with Multi-Device and Cumulative Throughput <#using-multiple-gpus-with-multi-device-and-cumulative-throughput>`__ - -- `Performance Comparison with benchmark_app <#performance-comparison-with-benchmark_app>`__ - - - `CPU vs GPU with Latency Hint <#cpu-vs-gpu-with-latency-hint>`__ - - `CPU vs GPU with Throughput Hint <#cpu-vs-gpu-with-throughput-hint>`__ - - `Single GPU vs Multiple GPUs <#single-gpu-vs-multiple-gpus>`__ - -- `Basic Application Using GPUs <#basic-application-using-gpus>`__ - - - `Import Necessary Packages <#import-necessary-packages>`__ - - `Compile the Model <#compile-the-model>`__ - - `Load and Preprocess Video Frames <#load-and-preprocess-video-frames>`__ - - `Define Model Output Classes <#define-model-output-classes>`__ - - `Set up Asynchronous Pipeline <#set-up-asynchronous-pipeline>`__ - - - `Callback Definition <#callback-definition>`__ - - `Create Async Pipeline <#create-async-pipeline>`__ - - - `Perform Inference <#perform-inference>`__ - - `Process Results <#process-results>`__ - -- `Conclusion <#conclusion>`__ +- `Introduction <#introduction>`__ + + - `Install required packages <#install-required-packages>`__ + +- `Checking GPUs with Query + Device <#checking-gpus-with-query-device>`__ + + - `List GPUs with + core.available_devices <#list-gpus-with-coreavailable_devices>`__ + - `Check Properties with + core.get_property <#check-properties-with-coreget_property>`__ + - `Brief Descriptions of Key + Properties <#brief-descriptions-of-key-properties>`__ + +- `Compiling a Model on GPU <#compiling-a-model-on-gpu>`__ + + - `Download and Convert a + Model <#download-and-convert-a-model>`__ + + - `Download and unpack the + Model <#download-and-unpack-the-model>`__ + - `Convert the Model to OpenVINO IR + format <#convert-the-model-to-openvino-ir-format>`__ + + - `Compile with Default + Configuration <#compile-with-default-configuration>`__ + - `Reduce Compile Time through Model + Caching <#reduce-compile-time-through-model-caching>`__ + - `Throughput and Latency Performance + Hints <#throughput-and-latency-performance-hints>`__ + - `Using Multiple GPUs with Multi-Device and Cumulative + Throughput <#using-multiple-gpus-with-multi-device-and-cumulative-throughput>`__ + +- `Performance Comparison with + benchmark_app <#performance-comparison-with-benchmark_app>`__ + - `CPU vs GPU with Latency + Hint <#cpu-vs-gpu-with-latency-hint>`__ - `CPU vs GPU with + Throughput Hint <#cpu-vs-gpu-with-throughput-hint>`__ - + `Single GPU vs Multiple + GPUs <#single-gpu-vs-multiple-gpus>`__ +- `Basic Application Using + GPUs <#basic-application-using-gpus>`__ + + - `Import Necessary Packages <#import-necessary-packages>`__ + - `Compile the Model <#compile-the-model>`__ + - `Load and Preprocess Video + Frames <#load-and-preprocess-video-frames>`__ + - `Define Model Output + Classes <#define-model-output-classes>`__ + - `Set up Asynchronous + Pipeline <#set-up-asynchronous-pipeline>`__ + + - `Callback Definition <#callback-definition>`__ + - `Create Async Pipeline <#create-async-pipeline>`__ + + - `Perform Inference <#perform-inference>`__ + - `Process Results <#process-results>`__ + +- `Conclusion <#conclusion>`__ This tutorial provides a high-level overview of working with Intel GPUs in OpenVINO. It shows how to use Query Device to list system GPUs and @@ -58,8 +75,8 @@ run to compare GPU performance in different configurations. It also provides the code for a basic end-to-end application that compiles a model on GPU and uses it to run inference. -Introduction -############################################################################################################################### +Introduction +------------------------------------------------------ Originally, graphic processing units (GPUs) began as specialized chips, developed to accelerate the rendering of computer graphics. In contrast @@ -70,24 +87,28 @@ learning, where GPUs can easily accelerate inference of neural networks by splitting operations across multiple cores. OpenVINO supports inference on Intel integrated GPUs (which are included -with most `Intel® Core™ desktop and mobile processors `__) +with most `Intel® Core™ desktop and mobile +processors `__) or on Intel discrete GPU products like the `Intel® Arc™ A-Series -Graphics cards `__ +Graphics +cards `__ and `Intel® Data Center GPU Flex Series `__. To get started, first `install OpenVINO `__ -on a system equipped with one or more Intel GPUs. Follow the `GPU configuration instructions `__ +on a system equipped with one or more Intel GPUs. Follow the `GPU +configuration +instructions `__ to configure OpenVINO to work with your GPU. Then, read on to learn how to accelerate inference with GPUs in OpenVINO! -Install required packages -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Install required packages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" - !pip install -q tensorflow + %pip install -q "openvino-dev>=2023.1.0" + %pip install -q tensorflow # Fetch `notebook_utils` module import urllib.request @@ -105,14 +126,14 @@ Install required packages -Checking GPUs with Query Device -############################################################################################################################### +Checking GPUs with Query Device +------------------------------------------------------------------------- In this section, we will see how to list the available GPUs and check their properties. Some of the key properties will also be defined. -List GPUs with core.available_devices -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +List GPUs with core.available_devices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenVINO Runtime provides the ``available_devices`` method for checking which devices are available for inference. The following code will @@ -121,9 +142,9 @@ appear. .. code:: ipython3 - from openvino.runtime import Core + import openvino as ov - core = Core() + core = ov.Core() core.available_devices @@ -140,7 +161,8 @@ GPU always takes the id ``0`` if the system has one. For instance, if the system has a CPU, an integrated and discrete GPU, we should expect to see a list like this: ``['CPU', 'GPU.0', 'GPU.1']``. To simplify its use, the “GPU.0” can also be addressed with just “GPU”. For more -details, see the `Device Naming Convention `__ +details, see the `Device Naming +Convention `__ section. If the GPUs are installed correctly on the system and still do not @@ -149,8 +171,8 @@ appear in the list, follow the steps described to configure your GPU drivers to work with OpenVINO. Once we have the GPUs working with OpenVINO, we can proceed with the next sections. -Check Properties with core.get_property -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Check Properties with core.get_property +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To get information about the GPUs, we can use device properties. In OpenVINO, devices have properties that describe their characteristics @@ -232,8 +254,8 @@ for that property. DEVICE_ID : 0 -Brief Descriptions of Key Properties -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Brief Descriptions of Key Properties +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each device has several properties as seen in the last command. Some of the key properties are: @@ -256,27 +278,30 @@ the key properties are: - ``CACHE_DIR`` - The directory where the model cache data is stored to speed up compilation time. -To learn more about devices and properties, see the `Query Device Properties `__ +To learn more about devices and properties, see the `Query Device +Properties `__ page. -Compiling a Model on GPU -############################################################################################################################### +Compiling a Model on GPU +------------------------------------------------------------------ Now, we know how to list the GPUs in the system and check their properties. We can easily use one for compiling and running models with -OpenVINO `GPU plugin `__. +OpenVINO `GPU +plugin `__. -Download and Convert a Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download and Convert a Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tutorial uses the ``ssdlite_mobilenet_v2`` model. The ``ssdlite_mobilenet_v2`` model is used for object detection. The model -was trained on `Common Objects in Context (COCO) `__ dataset version with 91 +was trained on `Common Objects in Context +(COCO) `__ dataset version with 91 categories of object. For details, see the `paper `__. -Download and unpack the Model -------------------------------------------------------------------------------------------------------------------------------- +Download and unpack the Model +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use the ``download_file`` function from the ``notebook_utils`` to download an archive with the model. It automatically creates a directory @@ -332,8 +357,8 @@ package is already downloaded. -Convert the Model to OpenVINO IR format -------------------------------------------------------------------------------------------------------------------------------- +Convert the Model to OpenVINO IR format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To convert the model to OpenVINO IR with ``FP16`` precision, use model conversion API. The models are saved to the ``model/ir_model/`` @@ -342,8 +367,6 @@ directory. For more details about model conversion, see this .. code:: ipython3 - from openvino.tools import mo - from openvino.runtime import serialize from openvino.tools.mo.front import tf as ov_tf_front precision = 'FP16' @@ -356,14 +379,13 @@ directory. For more details about model conversion, see this model = None if not model_path.exists(): - model = mo.convert_model(input_model=tf_model_path, - input_shape=[1, 300, 300, 3], - layout='NHWC', - compress_to_fp16=True if precision == 'FP16' else False, - transformations_config=trans_config_path, - tensorflow_object_detection_api_pipeline_config=pipeline_config, - reverse_input_channels=True) - serialize(model, str(model_path)) + model = ov.tools.mo.convert_model(input_model=tf_model_path, + input_shape=[1, 300, 300, 3], + layout='NHWC', + transformations_config=trans_config_path, + tensorflow_object_detection_api_pipeline_config=pipeline_config, + reverse_input_channels=True) + ov.save_model(model, model_path, compress_to_fp16=(precision == "FP16")) print("IR model saved to {}".format(model_path)) else: print("Read IR model from {}".format(model_path)) @@ -380,8 +402,8 @@ directory. For more details about model conversion, see this IR model saved to model/ir_model/ssdlite_mobilenet_v2_fp16.xml -Compile with Default Configuration -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compile with Default Configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When the model is ready, first we need to read it, using the ``read_model`` method. Then, we can use the ``compile_model`` method and @@ -397,11 +419,13 @@ use by using “GPU.0”, “GPU.1”, etc. Any of the device names returned by the ``available_devices`` method are valid device specifiers. You may also use “AUTO”, which will automatically select the best device for inference (which is often the GPU). To learn more about AUTO plugin, -visit the `Automatic Device Selection `__ -page as well as the `AUTO device tutorial `__. +visit the `Automatic Device +Selection `__ +page as well as the `AUTO device +tutorial `__. -Reduce Compile Time through Model Caching -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Reduce Compile Time through Model Caching +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Depending on the model used, device-specific optimizations and network compilations can cause the compile step to be time-consuming, especially @@ -465,11 +489,12 @@ compile times with caching enabled and disabled as follows: The actual time improvements will depend on the environment as well as the model being used but it is definitely something to consider when -optimizing an application. To read more about this, see the `Model Caching `__ +optimizing an application. To read more about this, see the `Model +Caching `__ docs. -Throughput and Latency Performance Hints -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Throughput and Latency Performance Hints +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To simplify device and pipeline configuration, OpenVINO provides high-level performance hints that automatically set the batch size and @@ -498,12 +523,13 @@ available memory. compiled_model = core.compile_model(model, device, {"PERFORMANCE_HINT": "THROUGHPUT"}) -Using Multiple GPUs with Multi-Device and Cumulative Throughput -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Using Multiple GPUs with Multi-Device and Cumulative Throughput +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The latency and throughput hints mentioned above are great and can make a difference when used adequately but they usually use just one device, -either due to the `AUTO plugin `__ +either due to the `AUTO +plugin `__ or by manual specification of the device name as above. When we have multiple devices, such as an integrated and discrete GPU, we may use both at the same time to improve the utilization of the resources. In @@ -529,18 +555,18 @@ manually specify devices to use. Below is an example showing how to use ``compiled_model = core.compile_model(model=model, device_name="AUTO", config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"})`` -.. important:: - - **The “THROUGHPUT”, “MULTI”, and + **Important**: **The “THROUGHPUT”, “MULTI”, and “CUMULATIVE_THROUGHPUT” modes are only applicable to asynchronous inferencing pipelines. The example at the end of this article shows how to set up an asynchronous pipeline that takes advantage of parallelism to increase throughput.** To learn more, see - `Asynchronous Inferencing `__ - in OpenVINO as well as the `Asynchronous Inference notebook `__. + `Asynchronous + Inferencing `__ + in OpenVINO as well as the `Asynchronous Inference + notebook `__. -Performance Comparison with benchmark_app -############################################################################################################################### +Performance Comparison with benchmark_app +----------------------------------------------------------------------------------- Given all the different options available when compiling a model, it may be difficult to know which settings work best for a certain application. @@ -639,8 +665,8 @@ performance may depend on the hardware used. Generally, we should expect GPU to be better than CPU, whereas multiple GPUs should be better than a single GPU as long as there is enough work for each of them. -CPU vs GPU with Latency Hint -------------------------------------------------------------------------------------------------------------------------------- +CPU vs GPU with Latency Hint +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -776,8 +802,8 @@ CPU vs GPU with Latency Hint [ INFO ] Throughput: 189.21 FPS -CPU vs GPU with Throughput Hint -------------------------------------------------------------------------------------------------------------------------------- +CPU vs GPU with Throughput Hint +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -913,8 +939,8 @@ CPU vs GPU with Throughput Hint [ INFO ] Throughput: 326.34 FPS -Single GPU vs Multiple GPUs -------------------------------------------------------------------------------------------------------------------------------- +Single GPU vs Multiple GPUs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -1039,8 +1065,8 @@ Single GPU vs Multiple GPUs RuntimeError: Config for device with 1 ID is not registered in GPU plugin -Basic Application Using GPUs -############################################################################################################################### +Basic Application Using GPUs +---------------------------------------------------------------------- We will now show an end-to-end object detection example using GPUs in OpenVINO. The application compiles a model on GPU with the “THROUGHPUT” @@ -1051,8 +1077,8 @@ found in each frame. The detections are then drawn on their corresponding frame and saved as a video, which is displayed at the end of the application. -Import Necessary Packages -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Import Necessary Packages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -1077,8 +1103,8 @@ Import Necessary Packages -Compile the Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compile the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -1101,8 +1127,8 @@ Compile the Model Model input shape: 1 300 300 3 -Load and Preprocess Video Frames -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load and Preprocess Video Frames +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -1144,18 +1170,8 @@ Load and Preprocess Video Frames Number of frames: 288 - - -.. .. raw:: html - -.. - - - -Define Model Output Classes -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Define Model Output Classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -1175,11 +1191,11 @@ Define Model Output Classes "teddy bear", "hair drier", "toothbrush", "hair brush" ] -Set up Asynchronous Pipeline -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Set up Asynchronous Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Callback Definition -------------------------------------------------------------------------------------------------------------------------------- +Callback Definition +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -1195,8 +1211,8 @@ Callback Definition total_time = stop_time - start_time frame_fps[frame_id] = frame_number / total_time -Create Async Pipeline -------------------------------------------------------------------------------------------------------------------------------- +Create Async Pipeline +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -1204,8 +1220,8 @@ Create Async Pipeline infer_queue = AsyncInferQueue(compiled_model) infer_queue.set_callback(completion_callback) -Perform Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Perform Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -1234,8 +1250,8 @@ Perform Inference Time per frame: 0.004744s (210.774 FPS) -Process Results -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Process Results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -1297,17 +1313,17 @@ Process Results -.. .. raw:: html +.. raw:: html -.. + -Conclusion -############################################################################################################################### +Conclusion +---------------------------------------------------- This tutorial demonstrates how easy it is to use one or more GPUs in OpenVINO, check their properties, and even tailor the model performance @@ -1318,11 +1334,19 @@ detected bounding boxes. To read more about any of these topics, feel free to visit their corresponding documentation: -- `GPU Plugin `__ -- `AUTO Plugin `__ -- `Model Caching `__ -- `MULTI Device Mode `__ -- `Query Device Properties `__ -- `Configurations for GPUs with OpenVINO `__ -- `Benchmark Python Tool `__ -- `Asynchronous Inferencing `__ +- `GPU + Plugin `__ +- `AUTO + Plugin `__ +- `Model + Caching `__ +- `MULTI Device + Mode `__ +- `Query Device + Properties `__ +- `Configurations for GPUs with + OpenVINO `__ +- `Benchmark Python + Tool `__ +- `Asynchronous + Inferencing `__ diff --git a/docs/notebooks/109-latency-tricks-with-output.rst b/docs/notebooks/109-latency-tricks-with-output.rst index c8913666ba01a8..97938ff9ebd8c6 100644 --- a/docs/notebooks/109-latency-tricks-with-output.rst +++ b/docs/notebooks/109-latency-tricks-with-output.rst @@ -19,9 +19,7 @@ many hints simultaneously, like more inference threads + shared memory. It should give even better performance, but we recommend testing it anyway. -.. note:: - - We especially recommend trying + **NOTE**: We especially recommend trying ``OpenVINO IR model + CPU + shared memory in latency mode`` or ``OpenVINO IR model + CPU + shared memory + more inference threads``. @@ -34,9 +32,7 @@ optimize performance on OpenVINO IR files in |image0| -.. note:: - - Many of the steps presented below will give you better + **NOTE**: Many of the steps presented below will give you better performance. However, some of them may **not change anything** or even **worsen the performance** if they are strongly dependent on either the hardware or the model. Please run this notebook on your @@ -52,46 +48,60 @@ A similar notebook focused on the throughput mode is available **Table of contents:** -- `Data <#data>`__ -- `Model <#model>`__ -- `Hardware <#hardware>`__ -- `Helper functions <#helper-functions>`__ -- `Optimizations <#optimizations>`__ - - `PyTorch model <#pytorch-model>`__ - - `ONNX model <#onnx-model>`__ - - `OpenVINO IR model <#openvino-ir-model>`__ - - `OpenVINO IR model on GPU <#openvino-ir-model-on-gpu>`__ - - `OpenVINO IR model + more inference threads <#openvino-ir-model-+-more-inference-threads>`__ - - `OpenVINO IR model in latency mode <#openvino-ir-model-in-latency-mode>`__ - - `OpenVINO IR model in latency mode + shared memory <#openvino-ir-model-in-latency-mode-+-shared-memory>`__ - - `Other tricks <#other-tricks>`__ +- `Data <#data>`__ +- `Model <#model>`__ +- `Hardware <#hardware>`__ +- `Helper functions <#helper-functions>`__ +- `Optimizations <#optimizations>`__ + + - `PyTorch model <#pytorch-model>`__ + - `ONNX model <#onnx-model>`__ + - `OpenVINO IR model <#openvino-ir-model>`__ + - `OpenVINO IR model on GPU <#openvino-ir-model-on-gpu>`__ + - `OpenVINO IR model + more inference + threads <#openvino-ir-model--more-inference-threads>`__ + - `OpenVINO IR model in latency + mode <#openvino-ir-model-in-latency-mode>`__ + - `OpenVINO IR model in latency mode + shared + memory <#openvino-ir-model-in-latency-mode--shared-memory>`__ + - `Other tricks <#other-tricks>`__ -- `Performance comparison <#performance-comparison>`__ -- `Conclusions <#conclusions>`__ +- `Performance comparison <#performance-comparison>`__ +- `Conclusions <#conclusions>`__ Prerequisites -############################################################################################################################### +------------- .. |image0| image:: https://user-images.githubusercontent.com/4547501/229120774-01f4f972-424d-4280-8395-220dd432985a.png .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" seaborn ultralytics + %pip install -q "openvino>=2023.1.0" seaborn "ultralytics<=8.0.178" onnx + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 import os - import sys import time from pathlib import Path from typing import Any, List, Tuple - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) import notebook_utils as utils -Data -############################################################################################################################### +Data +---------------------------------------------- We will use the same image of the dog sitting on a bicycle for all experiments below. The image is resized and preprocessed to fulfill the @@ -106,7 +116,7 @@ requirements of this particular object detection model. IMAGE_HEIGHT = 480 # load image - image = utils.load_image("../data/image/coco_bike.jpg") + image = utils.load_image("https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg") image = cv2.resize(image, dsize=(IMAGE_WIDTH, IMAGE_HEIGHT), interpolation=cv2.INTER_AREA) # preprocess it for YOLOv5 @@ -126,12 +136,12 @@ requirements of this particular object detection model. .. parsed-literal:: - + -Model -############################################################################################################################### +Model +----------------------------------------------- We decided to go with `YOLOv5n `__, one of the @@ -159,7 +169,7 @@ PyTorch Hub and small enough to see the difference in performance. .. parsed-literal:: Using cache found in /opt/home/k8sworker/.cache/torch/hub/ultralytics_yolov5_master - YOLOv5 🚀 2023-4-21 Python-3.8.10 torch-1.13.1+cpu CPU + YOLOv5 🚀 2023-4-21 Python-3.8.10 torch-2.1.0+cpu CPU @@ -171,31 +181,20 @@ PyTorch Hub and small enough to see the difference in performance. .. parsed-literal:: Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n.pt to model/yolov5n.pt... - - - -.. parsed-literal:: - - 0%| | 0.00/3.87M [00:00`__ +on the GPU. Please note you need to have an Intel GPU and `install +drivers `__ to be able to run this step. In addition, offloading to the GPU helps reduce CPU load and memory consumption, allowing it to be left for routine processes. If you cannot observe a faster inference on GPU, it @@ -493,12 +492,13 @@ execution. del ov_gpu_model # release resources -OpenVINO IR model + more inference threads -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model + more inference threads +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There is a possibility to add a config for any device (CPU in this case). We will increase the number of threads to an equal number of our -cores. There are `more options `__ +cores. There are `more +options `__ to be changed, so it’s worth playing with them to see what works best in our case. In some cases, this optimization may worsen the performance. If it is the case, don’t use it. @@ -522,12 +522,12 @@ If it is the case, don’t use it. .. parsed-literal:: - OpenVINO model + more threads on CPU. First inference time: 0.0159 seconds - OpenVINO model + more threads on CPU: 0.0134 seconds per image (74.68 FPS) + OpenVINO model + more threads on CPU. First inference time: 0.0156 seconds + OpenVINO model + more threads on CPU: 0.0134 seconds per image (74.72 FPS) -OpenVINO IR model in latency mode -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model in latency mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenVINO offers a virtual device called `AUTO `__, @@ -552,12 +552,12 @@ devices as well. .. parsed-literal:: - OpenVINO model on AUTO. First inference time: 0.0160 seconds - OpenVINO model on AUTO: 0.0136 seconds per image (73.59 FPS) + OpenVINO model on AUTO. First inference time: 0.0162 seconds + OpenVINO model on AUTO: 0.0136 seconds per image (73.76 FPS) -OpenVINO IR model in latency mode + shared memory -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model in latency mode + shared memory +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenVINO is a C++ toolkit with Python wrappers (API). The default behavior in the Python API is copying the input to the additional buffer @@ -586,12 +586,12 @@ performance! .. parsed-literal:: - OpenVINO model + shared memory on AUTO. First inference time: 0.0144 seconds - OpenVINO model + shared memory on AUTO: 0.0054 seconds per image (185.64 FPS) + OpenVINO model + shared memory on AUTO. First inference time: 0.0143 seconds + OpenVINO model + shared memory on AUTO: 0.0054 seconds per image (186.06 FPS) -Other tricks -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Other tricks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are other tricks for performance improvement, such as quantization and pre-post-processing or dedicated to throughput mode. To get even @@ -600,8 +600,8 @@ more from your model, please visit `118-optimize-preprocessing <../118-optimize-preprocessing>`__, and `109-throughput-tricks <109-throughput-tricks.ipynb>`__. -Performance comparison -############################################################################################################################### +Performance comparison +---------------------------------------------------------------- The following graphical comparison is valid for the selected model and hardware simultaneously. If you cannot see any improvement between some @@ -637,13 +637,14 @@ steps, just skip them. .. image:: 109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png -Conclusions -############################################################################################################################### +Conclusions +----------------------------------------------------- We already showed the steps needed to improve the performance of an object detection model. Even if you experience much better performance after running this notebook, please note this may not be valid for every hardware or every model. For the most accurate results, please use -``benchmark_app`` `command-line tool `__. +``benchmark_app`` `command-line +tool `__. Note that ``benchmark_app`` cannot measure the impact of some tricks above, e.g., shared memory. diff --git a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png index dd22aa1c0cdaa2..6fa3f77dbbf8df 100644 --- a/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png +++ b/docs/notebooks/109-latency-tricks-with-output_files/109-latency-tricks-with-output_30_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df3da13ecdc00b84a05159a67c90e4124efafb0f6d93913eaa2dde020e854e8d -size 56962 +oid sha256:ff39f8edc5bdcd106ae6c1d49b52edb7342a5a29490b03f3de375b0315155159 +size 57006 diff --git a/docs/notebooks/109-latency-tricks-with-output_files/index.html b/docs/notebooks/109-latency-tricks-with-output_files/index.html index 3fab8a55c3ffaa..75d2c7dfc5ee89 100644 --- a/docs/notebooks/109-latency-tricks-with-output_files/index.html +++ b/docs/notebooks/109-latency-tricks-with-output_files/index.html @@ -1,14 +1,14 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/109-latency-tricks-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/109-latency-tricks-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/109-latency-tricks-with-output_files/


../
-109-latency-tricks-with-output_14_0.jpg            16-Aug-2023 01:31              162715
-109-latency-tricks-with-output_17_0.jpg            16-Aug-2023 01:31              162715
-109-latency-tricks-with-output_19_0.jpg            16-Aug-2023 01:31              162756
-109-latency-tricks-with-output_23_0.jpg            16-Aug-2023 01:31              162756
-109-latency-tricks-with-output_25_0.jpg            16-Aug-2023 01:31              162756
-109-latency-tricks-with-output_27_0.jpg            16-Aug-2023 01:31              162756
-109-latency-tricks-with-output_30_0.png            16-Aug-2023 01:31               56954
-109-latency-tricks-with-output_4_0.jpg             16-Aug-2023 01:31              155828
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/109-latency-tricks-with-output_files/


../
+109-latency-tricks-with-output_14_0.jpg            31-Oct-2023 00:35              162715
+109-latency-tricks-with-output_17_0.jpg            31-Oct-2023 00:35              162715
+109-latency-tricks-with-output_19_0.jpg            31-Oct-2023 00:35              162715
+109-latency-tricks-with-output_23_0.jpg            31-Oct-2023 00:35              162715
+109-latency-tricks-with-output_25_0.jpg            31-Oct-2023 00:35              162715
+109-latency-tricks-with-output_27_0.jpg            31-Oct-2023 00:35              162715
+109-latency-tricks-with-output_30_0.png            31-Oct-2023 00:35               57006
+109-latency-tricks-with-output_4_0.jpg             31-Oct-2023 00:35              155828
 

diff --git a/docs/notebooks/109-throughput-tricks-with-output.rst b/docs/notebooks/109-throughput-tricks-with-output.rst index 6782c568cdf29d..ff70c41d7c398a 100644 --- a/docs/notebooks/109-throughput-tricks-with-output.rst +++ b/docs/notebooks/109-throughput-tricks-with-output.rst @@ -29,9 +29,7 @@ optimize performance on OpenVINO IR files in |image0| -.. note:: - - Many of the steps presented below will give you better + **NOTE**: Many of the steps presented below will give you better performance. However, some of them may **not change anything** or even **worsen the performance** if they are strongly dependent on either the hardware or the model. Please run this notebook on your @@ -47,46 +45,62 @@ A similar notebook focused on the latency mode is available **Table of contents:** -- `Data <#data>`__ -- `Model <#model>`__ -- `Hardware <#hardware>`__ -- `Helper functions <#helper-functions>`__ -- `Optimizations <#optimizations>`__ - - - `PyTorch model <#pytorch-model>`__ - - `OpenVINO IR model <#openvino-ir-model>`__ - - `OpenVINO IR model + bigger batch <#openvino-ir-model-+-bigger-batch>`__ - - `Asynchronous processing <#asynchronous-processing>`__ - - `OpenVINO IR model in throughput mode <#openvino-ir-model-in-throughput-mode>`__ - - `OpenVINO IR model in throughput mode on GPU <#openvino-ir-model-in-throughput-mode-on-gpu>`__ - - `OpenVINO IR model in throughput mode on AUTO <#openvino-ir-model-in-throughput-mode-on-auto>`__ - - `OpenVINO IR model in cumulative throughput mode on AUTO <#openvino-ir-model-in-cumulative-throughput-mode-on-auto>`__ - - `Other tricks <#other-tricks>`__ - -- `Performance comparison <#performance-comparison>`__ -- `Conclusions <#conclusions>`__ + +- `Data <#data>`__ +- `Model <#model>`__ +- `Hardware <#hardware>`__ +- `Helper functions <#helper-functions>`__ +- `Optimizations <#optimizations>`__ + + - `PyTorch model <#pytorch-model>`__ + - `OpenVINO IR model <#openvino-ir-model>`__ + - `OpenVINO IR model + bigger + batch <#openvino-ir-model--bigger-batch>`__ + - `Asynchronous processing <#asynchronous-processing>`__ + - `OpenVINO IR model in throughput + mode <#openvino-ir-model-in-throughput-mode>`__ + - `OpenVINO IR model in throughput mode on + GPU <#openvino-ir-model-in-throughput-mode-on-gpu>`__ + - `OpenVINO IR model in throughput mode on + AUTO <#openvino-ir-model-in-throughput-mode-on-auto>`__ + - `OpenVINO IR model in cumulative throughput mode on + AUTO <#openvino-ir-model-in-cumulative-throughput-mode-on-auto>`__ + - `Other tricks <#other-tricks>`__ + +- `Performance comparison <#performance-comparison>`__ +- `Conclusions <#conclusions>`__ Prerequisites -############################################################################################################################### +------------- .. |image0| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/4547501/ac17148c-bee9-43aa-87fc-ead61ac75f1d .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" seaborn ultralytics + %pip install -q "openvino>=2023.1.0" "ultralytics<=8.0.178" seaborn ultralytics onnx + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 - import sys import time from pathlib import Path from typing import Any, List, Tuple - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) import notebook_utils as utils -Data -############################################################################################################################### +Data +---------------------------------------------- We will use the same image of the dog sitting on a bicycle copied 1000 times to simulate the video with 1000 frames (about 33s). The image is @@ -104,7 +118,7 @@ object detection model. IMAGE_HEIGHT = 480 # load image - image = utils.load_image("../data/image/coco_bike.jpg") + image = utils.load_image("https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg") image = cv2.resize(image, dsize=(IMAGE_WIDTH, IMAGE_HEIGHT), interpolation=cv2.INTER_AREA) # preprocess it for YOLOv5 @@ -127,12 +141,12 @@ object detection model. .. parsed-literal:: - + -Model -############################################################################################################################### +Model +----------------------------------------------- We decided to go with `YOLOv5n `__, one of the @@ -160,7 +174,7 @@ PyTorch Hub and small enough to see the difference in performance. .. parsed-literal:: Using cache found in /opt/home/k8sworker/.cache/torch/hub/ultralytics_yolov5_master - YOLOv5 🚀 2023-4-21 Python-3.8.10 torch-1.13.1+cpu CPU + YOLOv5 🚀 2023-4-21 Python-3.8.10 torch-2.1.0+cpu CPU Fusing layers... YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients @@ -172,15 +186,13 @@ PyTorch Hub and small enough to see the difference in performance. requirements: /opt/home/k8sworker/.cache/torch/hub/requirements.txt not found, check failed. -Hardware -############################################################################################################################### +Hardware +-------------------------------------------------- The code below lists the available hardware we will use in the benchmarking process. -.. note:: - - The hardware you have is probably completely different from + **NOTE**: The hardware you have is probably completely different from ours. It means you can see completely different results. .. code:: ipython3 @@ -201,8 +213,8 @@ benchmarking process. CPU: Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz -Helper functions -############################################################################################################################### +Helper functions +---------------------------------------------------------- We’re defining a benchmark model function to use for all optimizations below. It runs inference for 1000 frames and prints average frames per @@ -341,15 +353,15 @@ the image. utils.show_array(output_img) -Optimizations -############################################################################################################################### +Optimizations +------------------------------------------------------- Below, we present the performance tricks for faster inference in the throughput mode. We release resources after every benchmarking to be sure the same amount of resource is available for every experiment. -PyTorch model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +PyTorch model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ First, we’re benchmarking the original PyTorch model without any optimizations applied. We will treat it as our baseline. @@ -370,12 +382,12 @@ optimizations applied. We will treat it as our baseline. .. parsed-literal:: - PyTorch model on CPU. First inference time: 0.0192 seconds - PyTorch model on CPU: 0.0189 seconds per image (52.95 FPS) + PyTorch model on CPU. First inference time: 0.0292 seconds + PyTorch model on CPU: 0.0210 seconds per image (47.67 FPS) -OpenVINO IR model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The first optimization is exporting the PyTorch model to OpenVINO Intermediate Representation (IR) FP16 and running it. Reducing the @@ -415,12 +427,12 @@ step in this notebook. .. parsed-literal:: - OpenVINO model on CPU. First inference time: 0.0124 seconds - OpenVINO model on CPU: 0.0073 seconds per image (136.31 FPS) + OpenVINO model on CPU. First inference time: 0.0182 seconds + OpenVINO model on CPU: 0.0073 seconds per image (136.13 FPS) -OpenVINO IR model + bigger batch -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model + bigger batch +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Batch processing often gives higher throughput as more inputs are processed at once. To use bigger batches (than 1), we must convert the @@ -471,12 +483,12 @@ hardware and model. .. parsed-literal:: - OpenVINO model + bigger batch on CPU. First inference time: 0.0428 seconds - OpenVINO model + bigger batch on CPU: 0.0076 seconds per image (131.76 FPS) + OpenVINO model + bigger batch on CPU. First inference time: 0.0502 seconds + OpenVINO model + bigger batch on CPU: 0.0076 seconds per image (131.86 FPS) -Asynchronous processing -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Asynchronous processing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Asynchronous mode means that OpenVINO immediately returns from an inference call and doesn’t wait for the result. It requires more @@ -489,9 +501,7 @@ automatically spawns the pool of InferRequest objects (also called “jobs”) and provides synchronization mechanisms to control the flow of the pipeline. -.. note:: - - Asynchronous processing cannot guarantee outputs to be in + **NOTE**: Asynchronous processing cannot guarantee outputs to be in the same order as inputs, so be careful in case of applications when the order of frames matters, e.g., videos. @@ -516,15 +526,16 @@ the pipeline. del infer_queue # release resources return fps -OpenVINO IR model in throughput mode -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model in throughput mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenVINO allows specifying a performance hint changing the internal configuration of the device. There are three different hints: ``LATENCY``, ``THROUGHPUT``, and ``CUMULATIVE_THROUGHPUT``. As this notebook is focused on the throughput mode, we will use the latter two. The hints can be used with other devices as well. Throughput mode -implicitly triggers using the `Automatic Batching `__ +implicitly triggers using the `Automatic +Batching `__ feature, which sets the batch size to the optimal level. .. code:: ipython3 @@ -542,16 +553,17 @@ feature, which sets the batch size to the optimal level. .. parsed-literal:: - OpenVINO model on CPU (THROUGHPUT). First inference time: 0.0237 seconds - OpenVINO model on CPU (THROUGHPUT): 0.0040 seconds per image (249.96 FPS) + OpenVINO model on CPU (THROUGHPUT). First inference time: 0.0274 seconds + OpenVINO model on CPU (THROUGHPUT): 0.0040 seconds per image (249.34 FPS) -OpenVINO IR model in throughput mode on GPU -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model in throughput mode on GPU +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Usually, a GPU device provides more frames per second than a CPU, so let’s run the above model on the GPU. Please note you need to have an -Intel GPU and `install drivers `__ +Intel GPU and `install +drivers `__ to be able to run this step. In addition, offloading to the GPU helps reduce CPU load and memory consumption, allowing it to be left for routine processes. If you cannot observe a higher throughput on GPU, it @@ -569,8 +581,8 @@ execution. del ov_gpu_model # release resources -OpenVINO IR model in throughput mode on AUTO -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model in throughput mode on AUTO +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenVINO offers a virtual device called `AUTO `__, @@ -592,12 +604,12 @@ performance hint. .. parsed-literal:: - OpenVINO model on AUTO (THROUGHPUT). First inference time: 0.0237 seconds - OpenVINO model on AUTO (THROUGHPUT): 0.0040 seconds per image (250.15 FPS) + OpenVINO model on AUTO (THROUGHPUT). First inference time: 0.0247 seconds + OpenVINO model on AUTO (THROUGHPUT): 0.0040 seconds per image (248.93 FPS) -OpenVINO IR model in cumulative throughput mode on AUTO -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenVINO IR model in cumulative throughput mode on AUTO +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The AUTO device in throughput mode will select the best, but one physical device to bring the highest throughput. However, if we have @@ -618,12 +630,12 @@ activate all devices. .. parsed-literal:: - OpenVINO model on AUTO (CUMULATIVE THROUGHPUT). First inference time: 0.0254 seconds - OpenVINO model on AUTO (CUMULATIVE THROUGHPUT): 0.0040 seconds per image (249.15 FPS) + OpenVINO model on AUTO (CUMULATIVE THROUGHPUT). First inference time: 0.0258 seconds + OpenVINO model on AUTO (CUMULATIVE THROUGHPUT): 0.0040 seconds per image (250.04 FPS) -Other tricks -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Other tricks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are other tricks for performance improvement, such as advanced options, quantization and pre-post-processing or dedicated to latency @@ -634,8 +646,8 @@ options `__, and `118-optimize-preprocessing <../118-optimize-preprocessing>`__. -Performance comparison -############################################################################################################################### +Performance comparison +---------------------------------------------------------------- The following graphical comparison is valid for the selected model and hardware simultaneously. If you cannot see any improvement between some @@ -670,8 +682,8 @@ steps, just skip them. .. image:: 109-throughput-tricks-with-output_files/109-throughput-tricks-with-output_33_0.png -Conclusions -############################################################################################################################### +Conclusions +----------------------------------------------------- We already showed the steps needed to improve the throughput of an object detection model. Even if you experience much better performance diff --git a/docs/notebooks/109-throughput-tricks-with-output_files/109-throughput-tricks-with-output_33_0.png b/docs/notebooks/109-throughput-tricks-with-output_files/109-throughput-tricks-with-output_33_0.png index d6c90e28f4149d..631ed0cf8ecf48 100644 --- a/docs/notebooks/109-throughput-tricks-with-output_files/109-throughput-tricks-with-output_33_0.png +++ b/docs/notebooks/109-throughput-tricks-with-output_files/109-throughput-tricks-with-output_33_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1fc053f1a52fbefbbfbfaaa6e9d0d5c11ddfdc0481929bfd0dd074338a67509 -size 62467 +oid sha256:10e27ed40dfd078777a2cd9513a00136b6327571b82ef0af6485b8ea5234dcfa +size 62451 diff --git a/docs/notebooks/109-throughput-tricks-with-output_files/index.html b/docs/notebooks/109-throughput-tricks-with-output_files/index.html index e5167f73bef355..1ec6286d016602 100644 --- a/docs/notebooks/109-throughput-tricks-with-output_files/index.html +++ b/docs/notebooks/109-throughput-tricks-with-output_files/index.html @@ -1,15 +1,14 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/109-throughput-tricks-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/109-throughput-tricks-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/109-throughput-tricks-with-output_files/


../
-109-throughput-tricks-with-output_14_0.jpg         16-Aug-2023 01:31              162715
-109-throughput-tricks-with-output_17_0.jpg         16-Aug-2023 01:31              162756
-109-throughput-tricks-with-output_20_0.jpg         16-Aug-2023 01:31              162756
-109-throughput-tricks-with-output_22_0.jpg         16-Aug-2023 01:31              162756
-109-throughput-tricks-with-output_26_0.jpg         16-Aug-2023 01:31              162756
-109-throughput-tricks-with-output_28_0.jpg         16-Aug-2023 01:31              162756
-109-throughput-tricks-with-output_30_0.jpg         16-Aug-2023 01:31              162756
-109-throughput-tricks-with-output_33_0.png         16-Aug-2023 01:31               77855
-109-throughput-tricks-with-output_4_0.jpg          16-Aug-2023 01:31              155828
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/109-throughput-tricks-with-output_files/


../
+109-throughput-tricks-with-output_14_0.jpg         31-Oct-2023 00:35              162715
+109-throughput-tricks-with-output_17_0.jpg         31-Oct-2023 00:35              162715
+109-throughput-tricks-with-output_20_0.jpg         31-Oct-2023 00:35              162715
+109-throughput-tricks-with-output_24_0.jpg         31-Oct-2023 00:35              162715
+109-throughput-tricks-with-output_28_0.jpg         31-Oct-2023 00:35              162715
+109-throughput-tricks-with-output_30_0.jpg         31-Oct-2023 00:35              162715
+109-throughput-tricks-with-output_33_0.png         31-Oct-2023 00:35               62451
+109-throughput-tricks-with-output_4_0.jpg          31-Oct-2023 00:35              155828
 

diff --git a/docs/notebooks/110-ct-scan-live-inference-with-output.rst b/docs/notebooks/110-ct-scan-live-inference-with-output.rst index 713d691ac05e65..344cc3ec6509cb 100644 --- a/docs/notebooks/110-ct-scan-live-inference-with-output.rst +++ b/docs/notebooks/110-ct-scan-live-inference-with-output.rst @@ -2,7 +2,7 @@ Live Inference and Benchmark CT-scan Data with OpenVINO™ ======================================================== Kidney Segmentation with PyTorch Lightning and OpenVINO™ - Part 4 -############################################################################################################################### +----------------------------------------------------------------- This tutorial is a part of a series on how to train, optimize, quantize and show live inference on a medical segmentation model. The goal is to @@ -31,24 +31,34 @@ scan to use for inference. **Table of contents:** -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Benchmark Model Performance <#benchmark-model-performance>`__ -- `Download and Prepare Data <#download-and-prepare-data>`__ -- `Show Live Inference <#show-live-inference>`__ - - `Load Model and List of Image Files <#load-model-and-list-of-image-files>`__ - - `Prepare images <#prepare-images>`__ - - `Specify device <#specify-device>`__ - - `Setting callback function <#setting-callback-function>`__ - - `Create asynchronous inference queue and perform it <#create-asynchronous-inference-queue-and-perform-it>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Benchmark Model + Performance <#benchmark-model-performance>`__ +- `Download and Prepare Data <#download-and-prepare-data>`__ +- `Show Live Inference <#show-live-inference>`__ + + - `Load Model and List of Image + Files <#load-model-and-list-of-image-files>`__ + - `Prepare images <#prepare-images>`__ + - `Specify device <#specify-device>`__ + - `Setting callback function <#setting-callback-function>`__ + - `Create asynchronous inference queue and perform + it <#create-asynchronous-inference-queue-and-perform-it>`__ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" "monai>=0.9.1,<1.0.0" + %pip install -q "openvino>=2023.1.0" "monai>=0.9.1,<1.0.0" "nncf>=2.5.0" + + +.. parsed-literal:: -Imports -############################################################################################################################### + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 @@ -69,14 +79,14 @@ Imports .. parsed-literal:: - 2023-09-08 22:52:19.504111: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:52:19.539771: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-30 22:42:33.368243: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 22:42:33.402770: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:52:20.182360: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-30 22:42:34.097093: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -Settings -############################################################################################################################### +Settings +-------------------------------------------------- To use the pre-trained models, set ``IR_PATH`` to ``"pretrained_model/unet44.xml"`` and ``COMPRESSED_MODEL_PATH`` to @@ -113,8 +123,8 @@ trained or optimized yourself, adjust the model paths. pretrained_model/quantized_unet_kits19.bin: 0%| | 0.00/1.90M [00:00`__ @@ -122,9 +132,7 @@ Tool `__. We will use -```AsyncInferQueue`` `__ +`AsyncInferQueue `__ to perform asynchronous inference. It can be instantiated with compiled model and a number of jobs - parallel execution threads. If you don’t pass a number of jobs or pass ``0``, then OpenVINO will pick the optimal @@ -319,8 +327,8 @@ inference queue, there are two jobs to do: Everything else will be handled by the ``AsyncInferQueue`` instance. -Load Model and List of Image Files -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load Model and List of Image Files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Load the segmentation model to OpenVINO Runtime with ``SegmentationModel``, based on the Model API from `Open Model @@ -346,8 +354,8 @@ to see the implementation. case_00117, 69 images -Prepare images -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Prepare images +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the ``reader = LoadImage()`` function to read the images in the same way as in the @@ -367,8 +375,8 @@ tutorial. framebuf.append(image) next_frame_id += 1 -Specify device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Specify device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -383,8 +391,8 @@ Specify device -Setting callback function -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Setting callback function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When ``callback`` is set, any job that ends the inference, calls the Python function. The ``callback`` function must have two arguments: one @@ -417,8 +425,8 @@ The ``callback`` function will show the results of inference. display.clear_output(wait=True) display.display(i) -Create asynchronous inference queue and perform it -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create asynchronous inference queue and perform it +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -462,6 +470,6 @@ Create asynchronous inference queue and perform it .. parsed-literal:: Loaded model to Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') in 0.22 seconds. - Total time to infer all frames: 3.520s - Time per frame: 0.051761s (19.320 FPS) + Total time to infer all frames: 3.558s + Time per frame: 0.052326s (19.111 FPS) diff --git a/docs/notebooks/110-ct-scan-live-inference-with-output_files/index.html b/docs/notebooks/110-ct-scan-live-inference-with-output_files/index.html index bc2298c536dffb..c3b91a9439d7e4 100644 --- a/docs/notebooks/110-ct-scan-live-inference-with-output_files/index.html +++ b/docs/notebooks/110-ct-scan-live-inference-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/110-ct-scan-live-inference-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/110-ct-scan-live-inference-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/110-ct-scan-live-inference-with-output_files/


../
-110-ct-scan-live-inference-with-output_21_0.png    16-Aug-2023 01:31               48780
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/110-ct-scan-live-inference-with-output_files/


../
+110-ct-scan-live-inference-with-output_21_0.png    31-Oct-2023 00:35               48780
 

diff --git a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output.rst index da35d4ac5db319..f189bad8482e59 100644 --- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output.rst @@ -2,7 +2,7 @@ Quantize a Segmentation Model and Show Live Inference ===================================================== Kidney Segmentation with PyTorch Lightning and OpenVINO™ - Part 3 -############################################################################################################################### +----------------------------------------------------------------- This tutorial is a part of a series on how to train, optimize, quantize and show live inference on a medical segmentation model. The goal is to @@ -13,21 +13,27 @@ scratch; the data is from This third tutorial in the series shows how to: -- Convert an Original model to OpenVINO IR with `model conversion API `__ +- Convert an Original model to OpenVINO IR with `model conversion + API `__ - Quantize a PyTorch model with NNCF -- Evaluate the F1 score metric of the original model and the quantized model +- Evaluate the F1 score metric of the original model and the quantized + model - Benchmark performance of the FP32 model and the INT8 quantized model - Show live inference with OpenVINO’s async API All notebooks in this series: -- `Data Preparation for 2D Segmentation of 3D Medical Data `__ -- `Train a 2D-UNet Medical Imaging Model with PyTorch Lightning `__ -- Convert and Quantize a Segmentation Model and Show Live Inference (**this notebook**) -- `Live Inference and Benchmark CT-scan data <110-ct-scan-live-inference-with-output.html>`__ +- `Data Preparation for 2D Segmentation of 3D Medical + Data `__ +- `Train a 2D-UNet Medical Imaging Model with PyTorch + Lightning `__ +- Convert and Quantize a Segmentation Model and Show Live Inference + (this notebook) +- `Live Inference and Benchmark CT-scan + data <110-ct-scan-live-inference.ipynb>`__ Instructions -############################################################################################################################### +------------ This notebook needs a trained UNet model. We provide a pre-trained model, trained for 20 epochs with the full @@ -49,36 +55,49 @@ purposes, use a representative dataset for quantizing the model. **Table of contents:** -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Load PyTorch Model <#load-pytorch-model>`__ -- `Download CT-scan Data <#download-ct-scan-data>`__ -- `Configuration <#configuration>`__ - - `Dataset <#dataset>`__ - - `Metric <#metric>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Load PyTorch Model <#load-pytorch-model>`__ +- `Download CT-scan Data <#download-ct-scan-data>`__ +- `Configuration <#configuration>`__ -- `Quantization <#quantization>`__ -- `Compare FP32 and INT8 Model <#compare-fp32-and-int8-model>`__ + - `Dataset <#dataset>`__ + - `Metric <#metric>`__ - - `Compare File Size <#compare-file-size>`__ - - `Compare Metrics for the original model and the quantized model to be sure that there no degradation. <#compare-metrics-for-the-original-model-and-the-quantized-model-to-be-sure-that-there-no-degradation>`__ - - `Compare Performance of the FP32 IR Model and Quantized Models <#compare-performance-of-the-fp32-ir-model-and-quantized-models>`__ - - `Visually Compare Inference Results <#visually-compare-inference-results>`__ +- `Quantization <#quantization>`__ +- `Compare FP32 and INT8 + Model <#compare-fp-and-int-model>`__ -- `Show Live Inference <#show-live-inference>`__ + - `Compare File Size <#compare-file-size>`__ + - `Compare Metrics for the original model and the quantized model to + be sure that there no + degradation. <#compare-metrics-for-the-original-model-and-the-quantized-model-to-be-sure-that-there-no-degradation>`__ + - `Compare Performance of the FP32 IR Model and Quantized + Models <#compare-performance-of-the-fp-ir-model-and-quantized-models>`__ + - `Visually Compare Inference + Results <#visually-compare-inference-results>`__ - - `Load Model and List of Image Files <#load-model-and-list-of-image-files>`__ - - `Show Inference <#show-inference>`__ +- `Show Live Inference <#show-live-inference>`__ -- `References <#references>`__ + - `Load Model and List of Image + Files <#load-model-and-list-of-image-files>`__ + - `Show Inference <#show-inference>`__ + +- `References <#references>`__ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" "monai>=0.9.1,<1.0.0" "torchmetrics>=0.11.0" + %pip install -q "openvino>=2023.1.0" "monai>=0.9.1,<1.0.0" "torchmetrics>=0.11.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + -Imports -############################################################################################################################### +Imports +------------------------------------------------- .. code:: ipython3 @@ -162,10 +181,10 @@ Imports .. parsed-literal:: - 2023-09-08 22:52:53.736369: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:52:53.771077: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-30 22:43:08.129843: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 22:43:08.164608: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:52:54.411775: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-30 22:43:08.732898: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -173,8 +192,8 @@ Imports INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -Settings -############################################################################################################################### +Settings +-------------------------------------------------- By default, this notebook will download one CT scan from the KITS19 dataset that will be used for quantization. To use the full dataset, set @@ -189,8 +208,8 @@ dataset that will be used for quantization. To use the full dataset, set MODEL_DIR = Path("model") MODEL_DIR.mkdir(exist_ok=True) -Load PyTorch Model -############################################################################################################################### +Load PyTorch Model +------------------------------------------------------------ Download the pre-trained model weights, load the PyTorch model and the ``state_dict`` that was saved after training. The model used in this @@ -235,8 +254,8 @@ notebook `__. -Download CT-scan Data -############################################################################################################################### +Download CT-scan Data +--------------------------------------------------------------- .. code:: ipython3 @@ -261,11 +280,11 @@ Download CT-scan Data Data for case_00117 exists -Configuration -############################################################################################################################### +Configuration +------------------------------------------------------- -Dataset -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``KitsDataset`` class in the next cell expects images and masks in the *``basedir``* directory, in a folder per patient. It is a simplified @@ -273,7 +292,7 @@ version of the Dataset class in the `training notebook `__. Images are loaded with MONAI’s -```LoadImage`` `__, +`LoadImage `__, to align with the image loading method in the training notebook. This method rotates and flips the images. We define a ``rotate_and_flip`` method to display the images in the expected orientation: @@ -362,8 +381,8 @@ kidney pixels to verify that the annotations look correct: .. image:: 110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_15_1.png -Metric -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Metric +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Define a metric to determine the performance of the model. @@ -397,8 +416,8 @@ library. metric.update(label.flatten(), prediction.flatten()) return metric.compute() -Quantization -############################################################################################################################### +Quantization +------------------------------------------------------ Before quantizing the model, we compute the F1 score on the ``FP32`` model, for comparison: @@ -435,7 +454,7 @@ this notebook. [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:179: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:179: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -493,14 +512,12 @@ model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:338: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:336: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:346: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:344: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:179: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:179: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/quantize_functions.py:140: FutureWarning: 'torch.onnx._patch_torch._graph_op' is deprecated in version 1.13 and will be removed in version 1.14. Please note 'g.op()' is to be removed from torch.Graph. Please open a GitHub issue if you need this functionality.. - output = g.op( This notebook demonstrates post-training quantization with NNCF. @@ -510,11 +527,11 @@ than quantization. See the `NNCF documentation `__ in the NNCF repository for more information. -Compare FP32 and INT8 Model -############################################################################################################################### +Compare FP32 and INT8 Model +--------------------------------------------------------------------- -Compare File Size -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compare File Size +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -528,11 +545,11 @@ Compare File Size .. parsed-literal:: FP32 IR model size: 3864.14 KB - INT8 model size: 1940.41 KB + INT8 model size: 1940.55 KB -Compare Metrics for the original model and the quantized model to be sure that there no degradation. -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compare Metrics for the original model and the quantized model to be sure that there no degradation. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -551,8 +568,8 @@ Compare Metrics for the original model and the quantized model to be sure that t INT8 F1: 0.999 -Compare Performance of the FP32 IR Model and Quantized Models -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compare Performance of the FP32 IR Model and Quantized Models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To measure the inference performance of the ``FP32`` and ``INT8`` models, we use `Benchmark @@ -562,9 +579,7 @@ command line application, part of OpenVINO development tools, that can be run in the notebook with ``! benchmark_app`` or ``%sx benchmark_app``. -.. note:: - - For the most accurate performance estimation, it is + **NOTE**: For the most accurate performance estimation, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on @@ -591,18 +606,18 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ INFO ] CPU - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 26.77 ms + [ INFO ] Read model took 34.85 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [?,?,?,?] @@ -616,7 +631,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add_425) : f32 / [...] / [?,1,16..,16..] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 79.78 ms + [ INFO ] Compile model took 76.76 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -638,9 +653,9 @@ be run in the notebook with ``! benchmark_app`` or [Step 9/11] Creating infer requests and preparing input tensors [ ERROR ] Input x is dynamic. Provide data shapes! Traceback (most recent call last): - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 485, in main + File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 485, in main data_queue = get_input_data(paths_to_input, app_inputs_info) - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/utils/inputs_filling.py", line 123, in get_input_data + File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/utils/inputs_filling.py", line 123, in get_input_data raise Exception(f"Input {info.name} is dynamic. Provide data shapes!") Exception: Input x is dynamic. Provide data shapes! @@ -657,18 +672,18 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ INFO ] CPU - [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 13.90 ms + [ INFO ] Read model took 30.95 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x.1 (node: x.1) : f32 / [...] / [1,1,512,512] @@ -682,10 +697,10 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] 578 (node: 578) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 178.40 ms + [ INFO ] Compile model took 199.95 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NETWORK_NAME: main_graph [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1 [ INFO ] NUM_STREAMS: 1 [ INFO ] AFFINITY: Affinity.CORE @@ -706,21 +721,21 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x.1' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 33.31 ms + [ INFO ] First inference took 33.43 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 961 iterations - [ INFO ] Duration: 15003.95 ms + [ INFO ] Count: 954 iterations + [ INFO ] Duration: 15006.26 ms [ INFO ] Latency: - [ INFO ] Median: 15.33 ms - [ INFO ] Average: 15.40 ms - [ INFO ] Min: 15.03 ms - [ INFO ] Max: 18.25 ms - [ INFO ] Throughput: 64.05 FPS + [ INFO ] Median: 15.47 ms + [ INFO ] Average: 15.52 ms + [ INFO ] Min: 15.18 ms + [ INFO ] Max: 19.34 ms + [ INFO ] Throughput: 63.57 FPS -Visually Compare Inference Results -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Visually Compare Inference Results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Visualize the results of the model on four slices of the validation set. Compare the results of the ``FP32`` IR model with the results of the @@ -738,9 +753,7 @@ slices are annotated as kidney. Run this cell again to show results on a different subset. The random seed is displayed to enable reproducing specific runs of this cell. -.. note:: - - The images are shown after optional augmenting and + **NOTE**: the images are shown after optional augmenting and resizing. In the Kits19 dataset all but one of the cases has the ``(512, 512)`` input shape. @@ -801,15 +814,15 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1694206463 + Visualizing results with seed 1698702266 .. image:: 110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png -Show Live Inference -############################################################################################################################### +Show Live Inference +------------------------------------------------------------- To show live inference on the model in the notebook, we will use the asynchronous processing feature of OpenVINO. @@ -823,13 +836,11 @@ inference on the specified CT scan has completed, the total time and throughput (fps), including preprocessing and displaying, will be printed. -.. note:: - - If you experience flickering on Firefox, consider using + **NOTE**: If you experience flickering on Firefox, consider using Chrome or Edge to run this notebook. -Load Model and List of Image Files -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load Model and List of Image Files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We load the segmentation model to OpenVINO Runtime with ``SegmentationModel``, based on the `Open Model @@ -855,8 +866,8 @@ overlay of the segmentation mask on the original image/frame. case_00117, 69 images -Show Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Show Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In the next cell, we run the ``show_live_inference`` function, which loads the ``segmentation_model`` to the specified ``device`` (using @@ -880,27 +891,24 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: - Loaded model to CPU in 0.19 seconds. - Total time for 68 frames: 3.46 seconds, fps:19.95 - - -References -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -**OpenVINO** - `NNCF -Repository `__ - `Neural -Network Compression Framework for fast model -inference `__ - `OpenVINO API -Tutorial <002-openvino-api-with-output.html>`__ - `OpenVINO -PyPI (pip install -openvino-dev) `__ - -**Kits19 Data** - `Kits19 Challenge -Homepage `__ - `Kits19 GitHub -Repository `__ - `The KiTS19 -Challenge Data: 300 Kidney Tumor Cases with Clinical Context, CT -Semantic Segmentations, and Surgical -Outcomes `__ - `The state of the art -in kidney and kidney tumor segmentation in contrast-enhanced CT imaging: -Results of the KiTS19 -challenge `__ + Loaded model to CPU in 0.17 seconds. + Total time for 68 frames: 3.43 seconds, fps:20.10 + + +References +---------------------------------------------------- + +**OpenVINO** + +- `NNCF Repository `__ +- `Neural Network Compression Framework for fast model +inference `__ +- `OpenVINO API Tutorial <002-openvino-api-with-output.html>`__ +- `OpenVINO PyPI (pip install openvino-dev) `__ + +**Kits19 Data** + +- `Kits19 Challenge Homepage `__ +- `Kits19 GitHub Repository `__ +- `The KiTS19 Challenge Data: 300 Kidney Tumor Cases with Clinical Context, CT Semantic Segmentations, and Surgical Outcomes `__ +- `The state of the art in kidney and kidney tumor segmentation in contrast-enhanced CT imaging: Results of the KiTS19 challenge `__ diff --git a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png index d2cfe72b3c1ca8..9b09719c5d40b7 100644 --- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/110-ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73fedab3a1670adba1dd0624fc1ef3e604734eb937542bd55ef0b1ea9dc17f4e -size 379036 +oid sha256:6503457981c8d2d24c46d2879b1a499fb4143c566a5196b903b50b49c4094cc2 +size 378309 diff --git a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/index.html b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/index.html index ca5e1d853de391..e8d45cd1099e5d 100644 --- a/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/index.html +++ b/docs/notebooks/110-ct-segmentation-quantize-nncf-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/110-ct-segmentation-quantize-nncf-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/110-ct-segmentation-quantize-nncf-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/110-ct-segmentation-quantize-nncf-with-output_files/


../
-110-ct-segmentation-quantize-nncf-with-output_1..> 16-Aug-2023 01:31              158997
-110-ct-segmentation-quantize-nncf-with-output_3..> 16-Aug-2023 01:31              383352
-110-ct-segmentation-quantize-nncf-with-output_4..> 16-Aug-2023 01:31               73812
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/110-ct-segmentation-quantize-nncf-with-output_files/


../
+110-ct-segmentation-quantize-nncf-with-output_1..> 31-Oct-2023 00:35              158997
+110-ct-segmentation-quantize-nncf-with-output_3..> 31-Oct-2023 00:35              378309
+110-ct-segmentation-quantize-nncf-with-output_4..> 31-Oct-2023 00:35               73812
 

diff --git a/docs/notebooks/111-yolov5-quantization-migration-with-output.rst b/docs/notebooks/111-yolov5-quantization-migration-with-output.rst index e76d5404ea30ae..46a8162d8bcc2f 100644 --- a/docs/notebooks/111-yolov5-quantization-migration-with-output.rst +++ b/docs/notebooks/111-yolov5-quantization-migration-with-output.rst @@ -25,42 +25,60 @@ The tutorial consists from the following parts: **Table of contents:** -- `Preparation <#preparation>`__ - - `Download the YOLOv5 model <#download-the-yolov5-model>`__ - - `Conversion of the YOLOv5 model to OpenVINO <#conversion-of-the-yolov5-model-to-openvino>`__ - - `Imports <#imports>`__ +- `Preparation <#preparation>`__ -- `Prepare dataset for quantization <#prepare-dataset-for-quantization>`__ + - `Download the YOLOv5 model <#download-the-yolov-model>`__ + - `Conversion of the YOLOv5 model to + OpenVINO <#conversion-of-the-yolov-model-to-openvino>`__ + - `Imports <#imports>`__ - - `Create YOLOv5 DataLoader class for POT <#create-yolov5-dataloader-class-for-pot>`__ - - `Create NNCF Dataset <#create-nncf-dataset>`__ +- `Prepare dataset for + quantization <#prepare-dataset-for-quantization>`__ -- `Configure quantization pipeline <#configure-quantization-pipeline>`__ + - `Create YOLOv5 DataLoader class for + POT <#create-yolov-dataloader-class-for-pot>`__ + - `Create NNCF Dataset <#create-nncf-dataset>`__ - - `Prepare config and pipeline for POT <#prepare-config-and-pipeline-for-pot>`__ - - `Prepare configuration parameters for NNCF <#prepare-configuration-parameters-for-nncf>`__ +- `Configure quantization + pipeline <#configure-quantization-pipeline>`__ -- `Perform model optimization <#perform-model-optimization>`__ + - `Prepare config and pipeline for + POT <#prepare-config-and-pipeline-for-pot>`__ + - `Prepare configuration parameters for + NNCF <#prepare-configuration-parameters-for-nncf>`__ - - `Run quantization using POT <#run-quantization-using-pot>`__ - - `Run quantization using NNCF <#run-quantization-using-nncf>`__ +- `Perform model optimization <#perform-model-optimization>`__ -- `Compare accuracy FP32 and INT8 models <#compare-accuracy-fp32-and-int8-models>`__ -- `Inference Demo Performance Comparison <#inference-demo-performance-comparison>`__ -- `Benchmark <#benchmark>`__ -- `References <#references>`__ + - `Run quantization using + POT <#run-quantization-using-pot>`__ + - `Run quantization using + NNCF <#run-quantization-using-nncf>`__ -Preparation -############################################################################################################################### +- `Compare accuracy FP32 and INT8 + models <#compare-accuracy-fp-and-int-models>`__ +- `Inference Demo Performance + Comparison <#inference-demo-performance-comparison>`__ +- `Benchmark <#benchmark>`__ +- `References <#references>`__ -Download the YOLOv5 model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Download the YOLOv5 model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino-dev==2023.1.0.dev20230811" "nncf>=2.5.0" - !pip install -q psutil "seaborn>=0.11.0" matplotlib numpy onnx + %pip install -q "openvino-dev>=2023.1.0" "nncf>=2.5.0" + %pip install -q psutil "seaborn>=0.11.0" matplotlib numpy onnx + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -91,8 +109,8 @@ Download the YOLOv5 model ``git clone https://github.com/ultralytics/yolov5.git -b v7.0`` -Conversion of the YOLOv5 model to OpenVINO -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Conversion of the YOLOv5 model to OpenVINO +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are three variables provided for easy run through all the notebook cells. @@ -139,21 +157,21 @@ following content: .. parsed-literal:: export: data=data/coco128.yaml, weights=['yolov5m/yolov5m.pt'], imgsz=[640], batch_size=1, device=cpu, half=False, inplace=False, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=12, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['ONNX'] - YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-1.13.1+cpu CPU + YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-2.1.0+cpu CPU Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m.pt to yolov5m/yolov5m.pt... - 100%|██████████████████████████████████████| 40.8M/40.8M [00:10<00:00, 4.11MB/s] + 100%|██████████████████████████████████████| 40.8M/40.8M [00:10<00:00, 4.01MB/s] Fusing layers... YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients PyTorch: starting from yolov5m/yolov5m.pt with output shape (1, 25200, 85) (40.8 MB) - ONNX: starting export with onnx 1.14.1... - ONNX: export success ✅ 1.3s, saved as yolov5m/yolov5m.onnx (81.2 MB) + ONNX: starting export with onnx 1.15.0... + ONNX: export success ✅ 1.4s, saved as yolov5m/yolov5m.onnx (81.2 MB) - Export complete (13.3s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/yolov5m + Export complete (13.8s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/yolov5m Detect: python detect.py --weights yolov5m/yolov5m.onnx Validate: python val.py --weights yolov5m/yolov5m.onnx PyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5m/yolov5m.onnx') @@ -203,8 +221,8 @@ saved with FP16 precision. Export ONNX to OpenVINO FP16 IR to: yolov5/yolov5m/FP16_openvino_model/yolov5m_fp16.xml -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -213,8 +231,8 @@ Imports from yolov5.utils.dataloaders import create_dataloader from yolov5.utils.general import check_dataset -Prepare dataset for quantization -############################################################################################################################### +Prepare dataset for quantization +-------------------------------------------------------------------------- Before starting quantization, we should prepare dataset, which will be used for quantization. Ultralytics YOLOv5 provides data loader for @@ -251,23 +269,14 @@ first. .. parsed-literal:: Downloading https://ultralytics.com/assets/coco128.zip to datasets/coco128.zip... - - - -.. parsed-literal:: - - 0%| | 0.00/6.66M [00:00`__ +`detect.py `__ to run synchronous inference, using the OpenVINO Python API on two images. @@ -780,12 +785,12 @@ images. .. parsed-literal:: ["\x1b[34m\x1b[1mdetect: \x1b[0mweights=['./yolov5m/FP32_openvino_model'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1", - 'YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-1.13.1+cpu CPU', + 'YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-2.1.0+cpu CPU', '', 'Loading yolov5m/FP32_openvino_model for OpenVINO inference...', - 'image 1/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/bus.jpg: 640x640 4 persons, 1 bus, 56.6ms', - 'image 2/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/zidane.jpg: 640x640 3 persons, 2 ties, 45.9ms', - 'Speed: 1.5ms pre-process, 51.2ms inference, 1.3ms NMS per image at shape (1, 3, 640, 640)', + 'image 1/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/bus.jpg: 640x640 4 persons, 1 bus, 57.4ms', + 'image 2/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/zidane.jpg: 640x640 3 persons, 2 ties, 42.3ms', + 'Speed: 1.4ms pre-process, 49.8ms inference, 1.3ms NMS per image at shape (1, 3, 640, 640)', 'Results saved to \x1b[1mruns/detect/exp\x1b[0m'] @@ -807,12 +812,12 @@ images. .. parsed-literal:: ["\x1b[34m\x1b[1mdetect: \x1b[0mweights=['./yolov5m/POT_INT8_openvino_model'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1", - 'YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-1.13.1+cpu CPU', + 'YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-2.1.0+cpu CPU', '', 'Loading yolov5m/POT_INT8_openvino_model for OpenVINO inference...', - 'image 1/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/bus.jpg: 640x640 4 persons, 1 bus, 35.4ms', - 'image 2/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/zidane.jpg: 640x640 3 persons, 1 tie, 33.8ms', - 'Speed: 1.6ms pre-process, 34.6ms inference, 1.4ms NMS per image at shape (1, 3, 640, 640)', + 'image 1/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/bus.jpg: 640x640 4 persons, 1 bus, 36.7ms', + 'image 2/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/zidane.jpg: 640x640 3 persons, 1 tie, 31.5ms', + 'Speed: 1.5ms pre-process, 34.1ms inference, 1.4ms NMS per image at shape (1, 3, 640, 640)', 'Results saved to \x1b[1mruns/detect/exp2\x1b[0m'] @@ -834,12 +839,12 @@ images. .. parsed-literal:: ["\x1b[34m\x1b[1mdetect: \x1b[0mweights=['./yolov5m/NNCF_INT8_openvino_model'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1", - 'YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-1.13.1+cpu CPU', + 'YOLOv5 🚀 v7.0-0-g915bbf2 Python-3.8.10 torch-2.1.0+cpu CPU', '', 'Loading yolov5m/NNCF_INT8_openvino_model for OpenVINO inference...', - 'image 1/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/bus.jpg: 640x640 4 persons, 1 bus, 37.1ms', - 'image 2/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/zidane.jpg: 640x640 3 persons, 2 ties, 30.5ms', - 'Speed: 1.6ms pre-process, 33.8ms inference, 1.4ms NMS per image at shape (1, 3, 640, 640)', + 'image 1/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/bus.jpg: 640x640 4 persons, 1 bus, 37.3ms', + 'image 2/2 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/111-yolov5-quantization-migration/yolov5/data/images/zidane.jpg: 640x640 3 persons, 2 ties, 30.5ms', + 'Speed: 1.5ms pre-process, 33.9ms inference, 1.4ms NMS per image at shape (1, 3, 640, 640)', 'Results saved to \x1b[1mruns/detect/exp3\x1b[0m'] @@ -870,8 +875,8 @@ images. .. image:: 111-yolov5-quantization-migration-with-output_files/111-yolov5-quantization-migration-with-output_40_0.png -Benchmark -############################################################################################################################### +Benchmark +--------------------------------------------------- .. code:: ipython3 @@ -888,7 +893,70 @@ Benchmark .. parsed-literal:: Inference FP32 model (OpenVINO IR) on CPU - /bin/bash: benchmark_app: command not found + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 39.90 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 322.73 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: main_graph + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 102.38 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 450 iterations + [ INFO ] Duration: 15134.50 ms + [ INFO ] Latency: + [ INFO ] Median: 201.54 ms + [ INFO ] Average: 200.97 ms + [ INFO ] Min: 133.47 ms + [ INFO ] Max: 216.06 ms + [ INFO ] Throughput: 29.73 FPS .. code:: ipython3 @@ -904,7 +972,70 @@ Benchmark .. parsed-literal:: Inference FP16 model (OpenVINO IR) on CPU - /bin/bash: benchmark_app: command not found + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 33.36 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 346.06 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: main_graph + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 99.48 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 456 iterations + [ INFO ] Duration: 15202.34 ms + [ INFO ] Latency: + [ INFO ] Median: 200.63 ms + [ INFO ] Average: 199.53 ms + [ INFO ] Min: 115.31 ms + [ INFO ] Max: 219.45 ms + [ INFO ] Throughput: 30.00 FPS .. code:: ipython3 @@ -920,7 +1051,70 @@ Benchmark .. parsed-literal:: Inference POT INT8 model (OpenVINO IR) on CPU - /bin/bash: benchmark_app: command not found + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 48.57 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 684.56 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: main_graph + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 52.63 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 1416 iterations + [ INFO ] Duration: 15069.24 ms + [ INFO ] Latency: + [ INFO ] Median: 63.76 ms + [ INFO ] Average: 63.63 ms + [ INFO ] Min: 49.18 ms + [ INFO ] Max: 83.95 ms + [ INFO ] Throughput: 93.97 FPS .. code:: ipython3 @@ -936,11 +1130,74 @@ Benchmark .. parsed-literal:: Inference NNCF INT8 model (OpenVINO IR) on CPU - /bin/bash: benchmark_app: command not found - - -References -############################################################################################################################### + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 52.87 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,25200,85] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 691.16 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: main_graph + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 50.65 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 1416 iterations + [ INFO ] Duration: 15060.29 ms + [ INFO ] Latency: + [ INFO ] Median: 63.70 ms + [ INFO ] Average: 63.61 ms + [ INFO ] Min: 47.79 ms + [ INFO ] Max: 83.82 ms + [ INFO ] Throughput: 94.02 FPS + + +References +---------------------------------------------------- - `Ultralytics YOLOv5 `__ - `OpenVINO Post-training Optimization diff --git a/docs/notebooks/111-yolov5-quantization-migration-with-output_files/index.html b/docs/notebooks/111-yolov5-quantization-migration-with-output_files/index.html index d42c2759d7f833..2ed92a387d86fe 100644 --- a/docs/notebooks/111-yolov5-quantization-migration-with-output_files/index.html +++ b/docs/notebooks/111-yolov5-quantization-migration-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/111-yolov5-quantization-migration-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/111-yolov5-quantization-migration-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/111-yolov5-quantization-migration-with-output_files/


../
-111-yolov5-quantization-migration-with-output_3..> 16-Aug-2023 01:31               33667
-111-yolov5-quantization-migration-with-output_4..> 16-Aug-2023 01:31              770524
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/111-yolov5-quantization-migration-with-output_files/


../
+111-yolov5-quantization-migration-with-output_3..> 31-Oct-2023 00:35               33667
+111-yolov5-quantization-migration-with-output_4..> 31-Oct-2023 00:35              770524
 

diff --git a/docs/notebooks/112-pytorch-post-training-quantization-nncf-with-output.rst b/docs/notebooks/112-pytorch-post-training-quantization-nncf-with-output.rst index 6beabff6bd0cc1..8fdf5bea838ea4 100644 --- a/docs/notebooks/112-pytorch-post-training-quantization-nncf-with-output.rst +++ b/docs/notebooks/112-pytorch-post-training-quantization-nncf-with-output.rst @@ -18,38 +18,55 @@ downsized to 64×64 colored images. The tutorial will demonstrate that only a tiny part of the dataset is needed for the post-training quantization, not demanding the fine-tuning of the model. -.. note:: - - This notebook requires that a C++ compiler is accessible on + **NOTE**: This notebook requires that a C++ compiler is accessible on the default binary search path of the OS you are running the notebook. **Table of contents:** -- `Preparations <#preparations>`__ - - `Imports <#imports>`__ - - `Settings <#settings>`__ - - `Download and Prepare Tiny ImageNet dataset <#download-and-prepare-tiny-imagenet-dataset>`__ - - `Helpers classes and functions <#helpers-classes-and-functions>`__ - - `Validation function <#validation-function>`__ - - `Create and load original uncompressed model <#create-and-load-original-uncompressed-model>`__ - - `Create train and validation DataLoaders <#create-train-and-validation-dataloaders>`__ +- `Preparations <#preparations>`__ + + - `Imports <#imports>`__ + - `Settings <#settings>`__ + - `Download and Prepare Tiny ImageNet + dataset <#download-and-prepare-tiny-imagenet-dataset>`__ + - `Helpers classes and + functions <#helpers-classes-and-functions>`__ + - `Validation function <#validation-function>`__ + - `Create and load original uncompressed + model <#create-and-load-original-uncompressed-model>`__ + - `Create train and validation + DataLoaders <#create-train-and-validation-dataloaders>`__ + +- `Model quantization and + benchmarking <#model-quantization-and-benchmarking>`__ + + - `I. Evaluate the loaded + model <#i-evaluate-the-loaded-model>`__ + - `II. Create and initialize + quantization <#ii-create-and-initialize-quantization>`__ + - `III. Convert the models to OpenVINO Intermediate Representation + (OpenVINO + IR) <#iii-convert-the-models-to-openvino-intermediate-representation-openvino-ir>`__ + - `IV. Compare performance of INT8 model and FP32 model in + OpenVINO <#iv-compare-performance-of-int-model-and-fp-model-in-openvino>`__ + +Preparations +------------------------------------------------------ -- `Model quantization and benchmarking <#model-quantization-and-benchmarking>`__ +.. code:: ipython3 - - `I. Evaluate the loaded model <#i-evaluate-the-loaded-model>`__ - - `II. Create and initialize quantization <#ii-create-and-initialize-quantization>`__ - - `III. Convert the models to OpenVINO Intermediate Representation (OpenVINO IR) <#iii-convert-the-models-to-openvino-intermediate-representation-openvino-ir>`__ - - `IV. Compare performance of INT8 model and FP32 model in OpenVINO <#iv-compare-performance-of-int8-model-and-fp32-model-in-openvino>`__ + # Install openvino package + %pip install -q "openvino>=2023.1.0" torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.6.0" -Preparations -############################################################################################################################### -.. code:: ipython3 +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. - # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" .. code:: ipython3 @@ -88,8 +105,8 @@ Preparations os.environ["LIB"] = os.pathsep.join(b.library_dirs) print(f"Added {vs_dir} to PATH") -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -111,21 +128,13 @@ Imports from notebook_utils import download_file -.. parsed-literal:: - - 2023-09-08 22:58:07.638790: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 22:58:07.672794: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 22:58:08.221837: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -142,9 +151,7 @@ Settings # Paths where PyTorch and OpenVINO IR models will be stored. fp32_checkpoint_filename = Path(BASE_MODEL_NAME + "_fp32").with_suffix(".pth") - fp32_onnx_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_fp32").with_suffix(".onnx") fp32_ir_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_fp32").with_suffix(".xml") - int8_onnx_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_int8").with_suffix(".onnx") int8_ir_path = OUTPUT_DIR / Path(BASE_MODEL_NAME + "_int8").with_suffix(".xml") @@ -167,12 +174,12 @@ Settings .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/112-pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/112-pytorch-post-training-quantization-nncf/model/resnet50_fp32.pth') -Download and Prepare Tiny ImageNet dataset -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download and Prepare Tiny ImageNet dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 100k images of shape 3x64x64, - 200 different classes: snake, spider, cat, truck, grasshopper, gull, @@ -231,8 +238,8 @@ Download and Prepare Tiny ImageNet dataset Successfully downloaded and extracted dataset to: output -Helpers classes and functions -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Helpers classes and functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The code below will help to count accuracy and visualize validation process. @@ -297,8 +304,8 @@ process. return res -Validation function -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Validation function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -350,10 +357,10 @@ Validation function ) return top1.avg -Create and load original uncompressed model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create and load original uncompressed model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -ResNet-50 from the ```torchivision`` +ResNet-50 from the `torchivision repository `__ is pre-trained on ImageNet with more prediction classes than Tiny ImageNet, so the model is adjusted by swapping the last FC layer to one with fewer output @@ -378,8 +385,8 @@ values. model = create_model(MODEL_DIR / fp32_checkpoint_filename) -Create train and validation DataLoaders -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create train and validation DataLoaders +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -428,15 +435,15 @@ Create train and validation DataLoaders train_loader, val_loader = create_dataloaders() -Model quantization and benchmarking -############################################################################################################################### +Model quantization and benchmarking +----------------------------------------------------------------------------- With the validation pipeline, model files, and data-loading procedures for model calibration now prepared, it’s time to proceed with the actual post-training quantization using NNCF. -I. Evaluate the loaded model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +I. Evaluate the loaded model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -446,20 +453,20 @@ I. Evaluate the loaded model .. parsed-literal:: - Test: [ 0/79] Time 0.289 (0.289) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) - Test: [10/79] Time 0.231 (0.240) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) - Test: [20/79] Time 0.234 (0.239) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) - Test: [30/79] Time 0.233 (0.239) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) - Test: [40/79] Time 0.242 (0.239) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) - Test: [50/79] Time 0.233 (0.242) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) - Test: [60/79] Time 0.241 (0.242) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) - Test: [70/79] Time 0.234 (0.241) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) - * Acc@1 60.740 Acc@5 83.960 Total time: 18.830 + Test: [ 0/79] Time 0.260 (0.260) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) + Test: [10/79] Time 0.232 (0.238) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) + Test: [20/79] Time 0.233 (0.237) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) + Test: [30/79] Time 0.233 (0.236) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) + Test: [40/79] Time 0.236 (0.235) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) + Test: [50/79] Time 0.232 (0.235) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) + Test: [60/79] Time 0.230 (0.235) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) + Test: [70/79] Time 0.244 (0.235) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) + * Acc@1 60.740 Acc@5 83.960 Total time: 18.416 Test accuracy of FP32 model: 60.740 -II. Create and initialize quantization -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +II. Create and initialize quantization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NNCF enables post-training quantization by adding the quantization layers into the model graph and then using a subset of the training @@ -493,6 +500,19 @@ Guide `__. -Before converting models, export them to ONNX. Executing the following -command may take a while. - .. code:: ipython3 dummy_input = torch.randn(128, 3, *IMAGE_SIZE) - torch.onnx.export(model, dummy_input, fp32_onnx_path) - model_ir = ov.convert_model(fp32_onnx_path, input=[-1, 3, *IMAGE_SIZE]) + model_ir = ov.convert_model(model, example_input=dummy_input, input=[-1, 3, *IMAGE_SIZE]) - ov.save_model(model_ir, str(fp32_ir_path)) + ov.save_model(model_ir, fp32_ir_path) + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + .. code:: ipython3 - torch.onnx.export(quantized_model, dummy_input, int8_onnx_path) - quantized_model_ir = ov.convert_model(int8_onnx_path, input=[-1, 3, *IMAGE_SIZE]) + quantized_model_ir = ov.convert_model(quantized_model, example_input=dummy_input, input=[-1, 3, *IMAGE_SIZE]) - ov.save_model(quantized_model_ir, str(int8_ir_path)) + ov.save_model(quantized_model_ir, int8_ir_path) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:338: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:336: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:346: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:344: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/quantize_functions.py:140: FutureWarning: 'torch.onnx._patch_torch._graph_op' is deprecated in version 1.13 and will be removed in version 1.14. Please note 'g.op()' is to be removed from torch.Graph. Please open a GitHub issue if you need this functionality.. - output = g.op( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_patch_torch.py:81: UserWarning: The shape inference of org.openvinotoolkit::FakeQuantize type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_node_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of org.openvinotoolkit::FakeQuantize type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of org.openvinotoolkit::FakeQuantize type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1093: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + Tensor-likes are not close! + + Mismatched elements: 24180 / 25600 (94.5%) + Greatest absolute difference: 0.3703504800796509 at index (32, 149) (up to 1e-05 allowed) + Greatest relative difference: 11.865051118017842 at index (64, 158) (up to 1e-05 allowed) + _check_trace( Select inference device for OpenVINO @@ -624,15 +649,15 @@ Evaluate the FP32 and INT8 models. .. parsed-literal:: - Test: [ 0/79] Time 0.199 (0.199) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) + Test: [ 0/79] Time 0.196 (0.196) Acc@1 81.25 (81.25) Acc@5 92.19 (92.19) Test: [10/79] Time 0.142 (0.146) Acc@1 56.25 (66.97) Acc@5 86.72 (87.50) - Test: [20/79] Time 0.139 (0.143) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) + Test: [20/79] Time 0.138 (0.143) Acc@1 67.97 (64.29) Acc@5 85.16 (87.35) Test: [30/79] Time 0.141 (0.142) Acc@1 53.12 (62.37) Acc@5 77.34 (85.33) - Test: [40/79] Time 0.140 (0.142) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) - Test: [50/79] Time 0.142 (0.142) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) - Test: [60/79] Time 0.145 (0.142) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) - Test: [70/79] Time 0.140 (0.142) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) - * Acc@1 60.740 Acc@5 83.960 Total time: 11.098 + Test: [40/79] Time 0.139 (0.142) Acc@1 67.19 (60.86) Acc@5 90.62 (84.51) + Test: [50/79] Time 0.141 (0.141) Acc@1 60.16 (60.80) Acc@5 88.28 (84.42) + Test: [60/79] Time 0.141 (0.141) Acc@1 66.41 (60.46) Acc@5 86.72 (83.79) + Test: [70/79] Time 0.140 (0.141) Acc@1 52.34 (60.21) Acc@5 80.47 (83.33) + * Acc@1 60.740 Acc@5 83.960 Total time: 11.027 Accuracy of FP32 IR model: 60.740 @@ -645,20 +670,20 @@ Evaluate the FP32 and INT8 models. .. parsed-literal:: - Test: [ 0/79] Time 0.191 (0.191) Acc@1 82.03 (82.03) Acc@5 91.41 (91.41) - Test: [10/79] Time 0.081 (0.092) Acc@1 60.16 (67.76) Acc@5 86.72 (87.29) - Test: [20/79] Time 0.079 (0.086) Acc@1 67.97 (64.96) Acc@5 85.16 (87.35) - Test: [30/79] Time 0.079 (0.084) Acc@1 53.12 (63.00) Acc@5 76.56 (85.26) - Test: [40/79] Time 0.079 (0.083) Acc@1 67.97 (61.34) Acc@5 89.84 (84.43) - Test: [50/79] Time 0.080 (0.082) Acc@1 60.94 (61.21) Acc@5 88.28 (84.38) - Test: [60/79] Time 0.080 (0.082) Acc@1 65.62 (60.75) Acc@5 85.94 (83.68) - Test: [70/79] Time 0.080 (0.082) Acc@1 53.12 (60.44) Acc@5 79.69 (83.25) - * Acc@1 61.050 Acc@5 83.880 Total time: 6.376 - Accuracy of INT8 IR model: 61.050 + Test: [ 0/79] Time 0.196 (0.196) Acc@1 82.03 (82.03) Acc@5 92.97 (92.97) + Test: [10/79] Time 0.082 (0.092) Acc@1 59.38 (67.76) Acc@5 85.16 (88.07) + Test: [20/79] Time 0.080 (0.086) Acc@1 67.97 (64.73) Acc@5 85.16 (87.65) + Test: [30/79] Time 0.078 (0.084) Acc@1 52.34 (62.83) Acc@5 76.56 (85.36) + Test: [40/79] Time 0.079 (0.083) Acc@1 69.53 (61.41) Acc@5 89.84 (84.20) + Test: [50/79] Time 0.077 (0.082) Acc@1 60.94 (61.29) Acc@5 87.50 (84.08) + Test: [60/79] Time 0.078 (0.082) Acc@1 67.19 (60.86) Acc@5 87.50 (83.52) + Test: [70/79] Time 0.079 (0.081) Acc@1 54.69 (60.59) Acc@5 79.69 (83.10) + * Acc@1 61.170 Acc@5 83.720 Total time: 6.361 + Accuracy of INT8 IR model: 61.170 -IV. Compare performance of INT8 model and FP32 model in OpenVINO -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +IV. Compare performance of INT8 model and FP32 model in OpenVINO +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Finally, measure the inference performance of the ``FP32`` and ``INT8`` models, using `Benchmark @@ -668,9 +693,7 @@ Benchmark Tool runs inference for 60 seconds in asynchronous mode on CPU. It returns inference speed as latency (milliseconds per image) and throughput (frames per second) values. -.. note:: - - This notebook runs benchmark_app for 15 seconds to give a + **NOTE**: This notebook runs benchmark_app for 15 seconds to give a quick indication of performance. For more accurate performance, it is recommended to run benchmark_app in a terminal/command prompt after closing other applications. Run ``benchmark_app -m model.xml -d CPU`` @@ -719,13 +742,13 @@ throughput (frames per second) values. .. parsed-literal:: Benchmark FP32 model (OpenVINO IR) - + [ INFO ] Throughput: 38.82 FPS Benchmark INT8 model (OpenVINO IR) - + [ INFO ] Throughput: 155.52 FPS Benchmark FP32 model (OpenVINO IR) synchronously - + [ INFO ] Throughput: 39.97 FPS Benchmark INT8 model (OpenVINO IR) synchronously - + [ INFO ] Throughput: 137.50 FPS Show device Information for reference: diff --git a/docs/notebooks/113-image-classification-quantization-with-output.rst b/docs/notebooks/113-image-classification-quantization-with-output.rst index f000f7569389e8..967bd14a529846 100644 --- a/docs/notebooks/113-image-classification-quantization-with-output.rst +++ b/docs/notebooks/113-image-classification-quantization-with-output.rst @@ -12,49 +12,62 @@ to apply quantization on PyTorch model, please check this This tutorial consists of the following steps: -- Prepare the model for quantization. -- Define a data loading functionality. -- Perform quantization. -- Compare accuracy of the original and quantized models. -- Compare performance of the original and quantized models. -- Compare results on one picture. +- Prepare the model for quantization. +- Define a data loading functionality. +- Perform quantization. +- Compare accuracy of the original and quantized models. +- Compare performance of the original and quantized models. +- Compare results on one picture. **Table of contents:** -- `Prepare the Model <#prepare-the-model>`__ -- `Prepare Dataset <#prepare-dataset>`__ -- `Perform Quantization <#perform-quantization>`__ - - `Create Dataset for Validation <#create-dataset-for-validation>`__ +- `Prepare the Model <#prepare-the-model>`__ +- `Prepare Dataset <#prepare-dataset>`__ +- `Perform Quantization <#perform-quantization>`__ -- `Run nncf.quantize for Getting an Optimized Model <#run-nncf.quantize-for-getting-an-optimized-model>`__ -- `Serialize an OpenVINO IR model <#serialize-an-openvino-ir-model>`__ -- `Compare Accuracy of the Original and Quantized Models <#compare-accuracy-of-the-original-and-quantized-models>`__ + - `Create Dataset for + Validation <#create-dataset-for-validation>`__ - - `Select inference device <#select-inference-device>`__ +- `Run nncf.quantize for Getting an Optimized + Model <#run-nncfquantize-for-getting-an-optimized-model>`__ +- `Serialize an OpenVINO IR + model <#serialize-an-openvino-ir-model>`__ +- `Compare Accuracy of the Original and Quantized + Models <#compare-accuracy-of-the-original-and-quantized-models>`__ -- `Compare Performance of the Original and Quantized Models <#compare-performance-of-the-original-and-quantized-models>`__ -- `Compare results on four pictures <#compare-results-on-four-pictures>`__ + - `Select inference device <#select-inference-device>`__ + +- `Compare Performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ +- `Compare results on four + pictures <#compare-results-on-four-pictures>`__ .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" "nncf>=2.6.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 from pathlib import Path # Set the data and model directories - DATA_DIR = Path('../data/datasets/cifar10') + DATA_DIR = Path("data") MODEL_DIR = Path('model') model_repo = 'pytorch-cifar-models' DATA_DIR.mkdir(exist_ok=True) MODEL_DIR.mkdir(exist_ok=True) -Prepare the Model -############################################################################################################################### +Prepare the Model +----------------------------------------------------------- Model preparation stage has the following steps: @@ -78,10 +91,10 @@ Model preparation stage has the following steps: Cloning into 'pytorch-cifar-models'... remote: Enumerating objects: 282, done. remote: Counting objects: 100% (281/281), done. - remote: Compressing objects: 100% (96/96), done. - remote: Total 282 (delta 135), reused 269 (delta 128), pack-reused 1 - Receiving objects: 100% (282/282), 9.22 MiB | 3.92 MiB/s, done. - Resolving deltas: 100% (135/135), done. + remote: Compressing objects: 100% (95/95), done. + remote: Total 282 (delta 136), reused 269 (delta 129), pack-reused 1 + Receiving objects: 100% (282/282), 9.22 MiB | 3.32 MiB/s, done. + Resolving deltas: 100% (136/136), done. .. code:: ipython3 @@ -112,27 +125,8 @@ can be found on this ov.save_model(ov_model, MODEL_DIR / "mobilenet_v2.xml") - -.. parsed-literal:: - - 2023-09-08 23:00:34.215999: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:00:34.251815: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:00:34.795978: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -.. parsed-literal:: - - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - - -Prepare Dataset -############################################################################################################################### +Prepare Dataset +--------------------------------------------------------- We will use `CIFAR10 `__ dataset from @@ -159,22 +153,21 @@ Preprocessing for model obtained from training .. parsed-literal:: - Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/datasets/cifar10/cifar-10-python.tar.gz - + Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz .. parsed-literal:: - 0%| | 0/170498071 [00:00`__ provides a suite of advanced algorithms for Neural Networks inference optimization in @@ -187,8 +180,8 @@ MobileNetV2. The optimization process contains the following steps: 3. Serialize an OpenVINO IR model, using the ``openvino.save_model`` function. -Create Dataset for Validation -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create Dataset for Validation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NNCF is compatible with ``torch.utils.data.DataLoader`` interface. For performing quantization it should be passed into ``nncf.Dataset`` object @@ -206,8 +199,14 @@ model during quantization, in our case, to pick input tensor from pair quantization_dataset = nncf.Dataset(val_loader, transform_fn) -Run nncf.quantize for Getting an Optimized Model -############################################################################################################################### + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +Run nncf.quantize for Getting an Optimized Model +------------------------------------------------------------------------------------------ ``nncf.quantize`` function accepts model and prepared quantization dataset for performing basic quantization. Optionally, additional @@ -223,12 +222,16 @@ about supported parameters can be found on this .. parsed-literal:: - Statistics collection: 100%|██████████| 300/300 [00:08<00:00, 35.19it/s] - Biases correction: 100%|██████████| 36/36 [00:01<00:00, 21.91it/s] + 2023-10-30 22:54:06.313060: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 22:54:06.344685: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-30 22:54:06.959396: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Statistics collection: 100%|██████████| 300/300 [00:09<00:00, 31.98it/s] + Applying Fast Bias correction: 100%|██████████| 36/36 [00:01<00:00, 20.03it/s] -Serialize an OpenVINO IR model -############################################################################################################################### +Serialize an OpenVINO IR model +------------------------------------------------------------------------ Similar to ``ov.convert_model``, quantized model is ``ov.Model`` object which ready to be loaded into device and can be serialized on disk using @@ -238,8 +241,8 @@ which ready to be loaded into device and can be serialized on disk using ov.save_model(quant_ov_model, MODEL_DIR / "quantized_mobilenet_v2.xml") -Compare Accuracy of the Original and Quantized Models -############################################################################################################################### +Compare Accuracy of the Original and Quantized Models +----------------------------------------------------------------------------------------------- .. code:: ipython3 @@ -256,10 +259,10 @@ Compare Accuracy of the Original and Quantized Models total += 1 return correct / total -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -318,17 +321,15 @@ Select device from dropdown list for running inference using OpenVINO: Accuracy of the optimized model: 93.54% -Compare Performance of the Original and Quantized Models -############################################################################################################################### +Compare Performance of the Original and Quantized Models +-------------------------------------------------------------------------------------------------- Finally, measure the inference performance of the ``FP32`` and ``INT8`` models, using `Benchmark Tool `__ - an inference performance measurement tool in OpenVINO. -.. note:: - - For more accurate performance, it is recommended to run + **NOTE**: For more accurate performance, it is recommended to run benchmark_app in a terminal/command prompt after closing other applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on CPU for one minute. Change CPU to GPU to benchmark @@ -343,7 +344,78 @@ Tool + [ INFO ] NETWORK_NAME: Model2 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 3.36 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 90276 iterations + [ INFO ] Duration: 15002.97 ms + [ INFO ] Latency: + [ INFO ] Median: 1.76 ms + [ INFO ] Average: 1.79 ms + [ INFO ] Min: 1.06 ms + [ INFO ] Max: 8.55 ms + [ INFO ] Throughput: 6017.21 FPS .. code:: ipython3 @@ -354,11 +426,82 @@ Tool + [ INFO ] NETWORK_NAME: Model2 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 1.86 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 165852 iterations + [ INFO ] Duration: 15001.03 ms + [ INFO ] Latency: + [ INFO ] Median: 1.01 ms + [ INFO ] Average: 1.04 ms + [ INFO ] Min: 0.69 ms + [ INFO ] Max: 6.71 ms + [ INFO ] Throughput: 11056.04 FPS + + +Compare results on four pictures +-------------------------------------------------------------------------- .. code:: ipython3 diff --git a/docs/notebooks/113-image-classification-quantization-with-output_files/index.html b/docs/notebooks/113-image-classification-quantization-with-output_files/index.html index c510054b810478..6bd0bdcec3dc05 100644 --- a/docs/notebooks/113-image-classification-quantization-with-output_files/index.html +++ b/docs/notebooks/113-image-classification-quantization-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/113-image-classification-quantization-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/113-image-classification-quantization-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/113-image-classification-quantization-with-output_files/


../
-113-image-classification-quantization-with-outp..> 16-Aug-2023 01:31               14855
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/113-image-classification-quantization-with-output_files/


../
+113-image-classification-quantization-with-outp..> 31-Oct-2023 00:35               14855
 

diff --git a/docs/notebooks/115-async-api-with-output.rst b/docs/notebooks/115-async-api-with-output.rst index 0e36317dc3c612..10c37b8199e54c 100644 --- a/docs/notebooks/115-async-api-with-output.rst +++ b/docs/notebooks/115-async-api-with-output.rst @@ -13,34 +13,48 @@ requests) rather than wait for the current inference to complete first. **Table of contents:** -- `Imports <#imports>`__ -- `Prepare model and data processing <#prepare-model-and-data-processing>`__ - - `Download test model <#download-test-model>`__ - - `Load the model <#load-the-model>`__ - - `Create functions for data processing <#create-functions-for-data-processing>`__ - - `Get the test video <#get-the-test-video>`__ +- `Imports <#imports>`__ +- `Prepare model and data + processing <#prepare-model-and-data-processing>`__ -- `How to improve the throughput of video processing <#how-to-improve-the-throughput-of-video-processing>`__ + - `Download test model <#download-test-model>`__ + - `Load the model <#load-the-model>`__ + - `Create functions for data + processing <#create-functions-for-data-processing>`__ + - `Get the test video <#get-the-test-video>`__ - - `Sync Mode (default) <#sync-mode-default>`__ - - `Test performance in Sync Mode <#test-performance-in-sync-mode>`__ - - `Async Mode <#async-mode>`__ - - `Test the performance in Async Mode <#test-the-performance-in-async-mode>`__ - - `Compare the performance <#compare-the-performance>`__ +- `How to improve the throughput of video + processing <#how-to-improve-the-throughput-of-video-processing>`__ -- `AsyncInferQueue <#asyncinferqueue>`__ + - `Sync Mode (default) <#sync-mode-default>`__ + - `Test performance in Sync + Mode <#test-performance-in-sync-mode>`__ + - `Async Mode <#async-mode>`__ + - `Test the performance in Async + Mode <#test-the-performance-in-async-mode>`__ + - `Compare the performance <#compare-the-performance>`__ - - `Setting Callback <#setting-callback>`__ - - `Test the performance with AsyncInferQueue <#test-the-performance-with-asyncinferqueue>`__ +- `AsyncInferQueue `__ -Imports -############################################################################################################################### + - `Setting Callback <#setting-callback>`__ + - `Test the performance with + AsyncInferQueue <#test-the-performance-with-asyncinferqueue>`__ + +Imports +------------------------------------------------- .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q opencv-python matplotlib + %pip install -q "openvino>=2023.1.0" + %pip install -q opencv-python matplotlib + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -60,11 +74,11 @@ Imports import notebook_utils as utils -Prepare model and data processing -############################################################################################################################### +Prepare model and data processing +--------------------------------------------------------------------------- -Download test model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download test model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We use a pre-trained model from OpenVINO’s `Open Model Zoo `__ to start the @@ -102,8 +116,8 @@ each frame of the video. -Load the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -122,8 +136,8 @@ Load the model N, C, H, W = input_layer_ir.shape shape = (H, W) -Create functions for data processing -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create functions for data processing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -164,21 +178,21 @@ Create functions for data processing cv2.putText(image, str(round(fps, 2)) + " fps", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 3) return image -Get the test video -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Get the test video +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 video_path = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/CEO%20Pat%20Gelsinger%20on%20Leading%20Intel.mp4' -How to improve the throughput of video processing -############################################################################################################################### +How to improve the throughput of video processing +------------------------------------------------------------------------------------------- Below, we compare the performance of the synchronous and async-based approaches: -Sync Mode (default) -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Sync Mode (default) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Let us see how video processing works with the default approach. Using the synchronous approach, the frame is captured with OpenCV and then @@ -267,8 +281,8 @@ immediately processed: player.stop() return sync_fps -Test performance in Sync Mode -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Test performance in Sync Mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -283,11 +297,11 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 38.75 fps + average throuput in sync mode: 38.68 fps -Async Mode -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Async Mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Let us see how the OpenVINO Async API can improve the overall frame rate of an application. The key advantage of the Async approach is as @@ -401,8 +415,8 @@ pipeline (decoding vs inference) and not by the sum of the stages. player.stop() return async_fps -Test the performance in Async Mode -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Test the performance in Async Mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -417,11 +431,11 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 71.45 fps + average throuput in async mode: 73.57 fps -Compare the performance -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compare the performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -448,18 +462,18 @@ Compare the performance .. image:: 115-async-api-with-output_files/115-async-api-with-output_21_0.png -``AsyncInferQueue`` -############################################################################################################################### +``AsyncInferQueue`` +------------------------------------------------------------- Asynchronous mode pipelines can be supported with the -```AsyncInferQueue`` `__ +`AsyncInferQueue `__ wrapper class. This class automatically spawns the pool of ``InferRequest`` objects (also called “jobs”) and provides synchronization mechanisms to control the flow of the pipeline. It is a simpler way to manage the infer request queue in Asynchronous mode. -Setting Callback -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Setting Callback +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When ``callback`` is set, any job that ends inference calls upon the Python function. The ``callback`` function must have two arguments: one @@ -535,8 +549,8 @@ the possibility of passing runtime values. infer_queue.wait_all() player.stop() -Test the performance with ``AsyncInferQueue`` -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Test the performance with ``AsyncInferQueue`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -552,5 +566,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 102.86 fps + average throughput in async mode with async infer queue: 107.25 fps diff --git a/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_21_0.png b/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_21_0.png index 106617e80a951d..9c667ad63b47f5 100644 --- a/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_21_0.png +++ b/docs/notebooks/115-async-api-with-output_files/115-async-api-with-output_21_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0f473d81de64bea167c31dbd9a671d68826ac1ae89a58a7b244c5bcc79198bb -size 30454 +oid sha256:0ba110d0d82c00b211370ff95ad7be6995d288abc3954e53a122acce998ea965 +size 30445 diff --git a/docs/notebooks/115-async-api-with-output_files/index.html b/docs/notebooks/115-async-api-with-output_files/index.html index 9dad300ecd4660..0a4b0d3326eb60 100644 --- a/docs/notebooks/115-async-api-with-output_files/index.html +++ b/docs/notebooks/115-async-api-with-output_files/index.html @@ -1,10 +1,10 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/115-async-api-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/115-async-api-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/115-async-api-with-output_files/


../
-115-async-api-with-output_15_0.png                 16-Aug-2023 01:31                4307
-115-async-api-with-output_19_0.png                 16-Aug-2023 01:31                4307
-115-async-api-with-output_21_0.png                 16-Aug-2023 01:31               30455
-115-async-api-with-output_27_0.png                 16-Aug-2023 01:31                4307
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/115-async-api-with-output_files/


../
+115-async-api-with-output_15_0.png                 31-Oct-2023 00:35                4307
+115-async-api-with-output_19_0.png                 31-Oct-2023 00:35                4307
+115-async-api-with-output_21_0.png                 31-Oct-2023 00:35               30445
+115-async-api-with-output_27_0.png                 31-Oct-2023 00:35                4307
 

diff --git a/docs/notebooks/116-sparsity-optimization-with-output.rst b/docs/notebooks/116-sparsity-optimization-with-output.rst index 97930314759c41..f02e7081021741 100644 --- a/docs/notebooks/116-sparsity-optimization-with-output.rst +++ b/docs/notebooks/116-sparsity-optimization-with-output.rst @@ -23,25 +23,37 @@ consists of the following steps: **Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Imports <#imports>`__ - - `Download, quantize and sparsify the model, using Hugging Face Optimum API <#download-quantize-and-sparsify-the-model-using-hugging-face-optimum-api>`__ +- `Prerequisites <#prerequisites>`__ +- `Imports <#imports>`__ -- `Benchmark quantized dense inference performance <#benchmark-quantized-dense-inference-performance>`__ -- `Benchmark quantized sparse inference performance <#benchmark-quantized-sparse-inference-performance>`__ -- `When this might be helpful <#when-this-might-be-helpful>`__ + - `Download, quantize and sparsify the model, using Hugging Face + Optimum + API <#download-quantize-and-sparsify-the-model-using-hugging-face-optimum-api>`__ -Prerequisites -############################################################################################################################### +- `Benchmark quantized dense inference + performance <#benchmark-quantized-dense-inference-performance>`__ +- `Benchmark quantized sparse inference + performance <#benchmark-quantized-sparse-inference-performance>`__ +- `When this might be helpful <#when-this-might-be-helpful>`__ + +Prerequisites +------------------------------------------------------- .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q "git+https://github.com/huggingface/optimum-intel.git" datasets onnx onnxruntime + %pip install -q "openvino>=2023.1.0" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" datasets onnx onnxruntime + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + -Imports -############################################################################################################################### +Imports +------------------------------------------------- .. code:: ipython3 @@ -53,14 +65,6 @@ Imports from huggingface_hub import hf_hub_download -.. parsed-literal:: - - 2023-09-08 23:03:46.012098: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:03:46.047135: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:03:46.594018: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -69,12 +73,16 @@ Imports .. parsed-literal:: No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + 2023-10-30 22:57:12.569340: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 22:57:12.603049: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-30 22:57:13.131994: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations warnings.warn( -Download, quantize and sparsify the model, using Hugging Face Optimum API -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download, quantize and sparsify the model, using Hugging Face Optimum API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The first step is to download a quantized sparse transformers which has been translated to OpenVINO IR. Then, it will be put through a @@ -104,7 +112,7 @@ model card on Hugging Face. .. parsed-literal:: - Compiling the model... + Compiling the model to CPU ... Set CACHE_DIR to /opt/home/k8sworker/.cache/huggingface/hub/models--OpenVINO--bert-base-uncased-sst2-int8-unstructured80/snapshots/dc44eb46300882463d50ee847e0f6485bad3cdad/model_cache @@ -139,8 +147,8 @@ the IRs into a single folder. -Benchmark quantized dense inference performance -############################################################################################################################### +Benchmark quantized dense inference performance +----------------------------------------------------------------------------------------- Benchmark dense inference performance using parallel execution on four CPU cores to simulate a small instance in the cloud infrastructure. @@ -171,11 +179,88 @@ as an example. It is recommended to tune based on your applications. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - /bin/bash: benchmark_app: command not found -Benchmark quantized sparse inference performance -############################################################################################################################### +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 77.13 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] input_ids (node: input_ids) : i64 / [...] / [?,?] + [ INFO ] attention_mask (node: attention_mask) : i64 / [...] / [?,?] + [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [?,?] + [ INFO ] Model outputs: + [ INFO ] logits (node: logits) : f32 / [...] / [?,2] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'input_ids': [1,64], 'attention_mask': [1,64], 'token_type_ids': [1,64] + [ INFO ] Reshape model took 25.98 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,64] + [ INFO ] attention_mask (node: attention_mask) : i64 / [...] / [1,64] + [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,64] + [ INFO ] Model outputs: + [ INFO ] logits (node: logits) : f32 / [...] / [1,2] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 970.05 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 4 + [ INFO ] NUM_STREAMS: 4 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 4 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'input_ids'!. This input will be filled with random values! + [ WARNING ] No input files were given for input 'attention_mask'!. This input will be filled with random values! + [ WARNING ] No input files were given for input 'token_type_ids'!. This input will be filled with random values! + [ INFO ] Fill input 'input_ids' with random values + [ INFO ] Fill input 'attention_mask' with random values + [ INFO ] Fill input 'token_type_ids' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests, limits: 60000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 28.50 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 9116 iterations + [ INFO ] Duration: 60051.84 ms + [ INFO ] Latency: + [ INFO ] Median: 26.14 ms + [ INFO ] Average: 26.19 ms + [ INFO ] Min: 24.91 ms + [ INFO ] Max: 41.99 ms + [ INFO ] Throughput: 151.80 FPS + + +Benchmark quantized sparse inference performance +------------------------------------------------------------------------------------------ To enable sparse weight decompression feature, users can add it to runtime config like below. ``CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE`` @@ -207,11 +292,88 @@ for which a layer will be enabled. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - /bin/bash: benchmark_app: command not found -When this might be helpful -############################################################################################################################### +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 83.15 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] input_ids (node: input_ids) : i64 / [...] / [?,?] + [ INFO ] attention_mask (node: attention_mask) : i64 / [...] / [?,?] + [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [?,?] + [ INFO ] Model outputs: + [ INFO ] logits (node: logits) : f32 / [...] / [?,2] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'input_ids': [1,64], 'attention_mask': [1,64], 'token_type_ids': [1,64] + [ INFO ] Reshape model took 26.29 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,64] + [ INFO ] attention_mask (node: attention_mask) : i64 / [...] / [1,64] + [ INFO ] token_type_ids (node: token_type_ids) : i64 / [...] / [1,64] + [ INFO ] Model outputs: + [ INFO ] logits (node: logits) : f32 / [...] / [1,2] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 903.83 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 4 + [ INFO ] NUM_STREAMS: 4 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 4 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 0.75 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'input_ids'!. This input will be filled with random values! + [ WARNING ] No input files were given for input 'attention_mask'!. This input will be filled with random values! + [ WARNING ] No input files were given for input 'token_type_ids'!. This input will be filled with random values! + [ INFO ] Fill input 'input_ids' with random values + [ INFO ] Fill input 'attention_mask' with random values + [ INFO ] Fill input 'token_type_ids' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests, limits: 60000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 29.52 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 9128 iterations + [ INFO ] Duration: 60046.44 ms + [ INFO ] Latency: + [ INFO ] Median: 26.15 ms + [ INFO ] Average: 26.18 ms + [ INFO ] Min: 25.14 ms + [ INFO ] Max: 42.73 ms + [ INFO ] Throughput: 152.02 FPS + + +When this might be helpful +-------------------------------------------------------------------- This feature can improve inference performance for models with sparse weights in the scenarios when the model is deployed to handle multiple diff --git a/docs/notebooks/117-model-server-with-output.rst b/docs/notebooks/117-model-server-with-output.rst index 390de056a564ce..870031ab70f1da 100644 --- a/docs/notebooks/117-model-server-with-output.rst +++ b/docs/notebooks/117-model-server-with-output.rst @@ -4,7 +4,7 @@ Hello Model Server Introduction to OpenVINO™ Model Server (OVMS). What is Model Serving? -############################################################################################################################### +---------------------- A model server hosts models and makes them accessible to software components over standard network protocols. A client sends a request to @@ -29,30 +29,37 @@ deployment: - Efficient resource utilization with horizontal and vertical inference scaling. -|ovms_diagram| +.. figure:: https://user-images.githubusercontent.com/91237924/215658773-4720df00-3b95-4a84-85a2-40f06138e914.png + :alt: ovms_diagram + + ovms_diagram **Table of contents:** -- `Serving with OpenVINO Model Server <#serving-with-openvino-model-server>`__ -- `Step 1: Prepare Docker <#step-1-prepare-docker>`__ -- `Step 2: Preparing a Model Repository <#step-2-preparing-a-model-repository>`__ -- `Step 3: Start the Model Server Container <#step-3-start-the-model-server-container>`__ -- `Step 4: Prepare the Example Client Components <#step-4-prepare-the-example-client-components>`__ - - `Prerequisites <#prerequisites>`__ - - `Imports <#imports>`__ - - `Request Model Status <#request-model-status>`__ - - `Request Model Metadata <#request-model-metadata>`__ - - `Load input image <#load-input-image>`__ - - `Request Prediction on a Numpy Array <#request-prediction-on-a-numpy-array>`__ - - `Visualization <#visualization>`__ +- `Serving with OpenVINO Model + Server <#serving-with-openvino-model-server>`__ +- `Step 1: Prepare Docker <#step--prepare-docker>`__ +- `Step 2: Preparing a Model + Repository <#step--preparing-a-model-repository>`__ +- `Step 3: Start the Model Server + Container <#step--start-the-model-server-container>`__ +- `Step 4: Prepare the Example Client + Components <#step--prepare-the-example-client-components>`__ -- `References <#references>`__ + - `Prerequisites <#prerequisites>`__ + - `Imports <#imports>`__ + - `Request Model Status <#request-model-status>`__ + - `Request Model Metadata <#request-model-metadata>`__ + - `Load input image <#load-input-image>`__ + - `Request Prediction on a Numpy + Array <#request-prediction-on-a-numpy-array>`__ + - `Visualization <#visualization>`__ -.. |ovms_diagram| image:: https://user-images.githubusercontent.com/91237924/215658773-4720df00-3b95-4a84-85a2-40f06138e914.png +- `References <#references>`__ -Serving with OpenVINO Model Server -############################################################################################################################### +Serving with OpenVINO Model Server +---------------------------------------------------------------------------- OpenVINO Model Server (OVMS) is a high-performance system for serving models. Implemented in C++ for scalability and optimized for deployment @@ -68,8 +75,8 @@ making deploying new algorithms and AI experiments easy. To quickly start using OpenVINO™ Model Server, follow these steps: -Step 1: Prepare Docker -############################################################################################################################### +Step 1: Prepare Docker +---------------------------------------------------------------- Install `Docker Engine `__, including its @@ -109,8 +116,8 @@ image and a message. -Step 2: Preparing a Model Repository -############################################################################################################################### +Step 2: Preparing a Model Repository +------------------------------------------------------------------------------ The models need to be placed and mounted in a particular directory structure and according to the following rules: @@ -159,12 +166,20 @@ structure and according to the following rules: ``.pdmodel`` for Paddle Paddle, and ``.pb`` for TensorFlow. The file name can be arbitrary. +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" + .. code:: ipython3 import os - import sys - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) from notebook_utils import download_file dedicated_dir = "models" @@ -179,13 +194,7 @@ structure and according to the following rules: model_bin_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.bin" download_file(model_xml_url, XML_PATH, MODEL_DIR) - download_file(model_bin_url, BIN_PATH_name, MODEL_DIR) - - model_xml_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.xml" - model_bin_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.bin" - - download_file(model_xml_url, model_xml_name, base_model_dir) - download_file(model_bin_url, model_bin_name, base_model_dir) + download_file(model_bin_url, BIN_PATH, MODEL_DIR) .. parsed-literal:: @@ -193,8 +202,8 @@ structure and according to the following rules: Model Copied to "./models/detection/1". -Step 3: Start the Model Server Container -############################################################################################################################### +Step 3: Start the Model Server Container +---------------------------------------------------------------------------------- Pull and start the container: @@ -642,8 +651,8 @@ openvino/model_server:latest If the serving port ``9000`` is already in use, please switch it to another available port on your system. For example:\ ``-p 9020:9000`` -Step 4: Prepare the Example Client Components -############################################################################################################################### +Step 4: Prepare the Example Client Components +--------------------------------------------------------------------------------------- OpenVINO Model Server exposes two sets of APIs: one compatible with ``TensorFlow Serving`` and another one, with ``KServe API``, for @@ -653,14 +662,14 @@ into existing systems the already leverage one of these APIs for inference. This example will demonstrate how to write a TensorFlow Serving API client for object detection. -Prerequisites -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Prerequisites +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Install necessary packages. .. code:: ipython3 - !pip install -q ovmsclient + %pip install -q ovmsclient .. parsed-literal:: @@ -688,8 +697,8 @@ Install necessary packages. You should consider upgrading via the '/home/adrian/repos/openvino_notebooks_adrian/venv/bin/python -m pip install --upgrade pip' command. -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -698,8 +707,8 @@ Imports import matplotlib.pyplot as plt from ovmsclient import make_grpc_client -Request Model Status -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Request Model Status +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -716,8 +725,8 @@ Request Model Status {1: {'state': 'AVAILABLE', 'error_code': 0, 'error_message': 'OK'}} -Request Model Metadata -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Request Model Metadata +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -730,13 +739,19 @@ Request Model Metadata {'model_version': 1, 'inputs': {'image': {'shape': [1, 3, 704, 704], 'dtype': 'DT_FLOAT'}}, 'outputs': {'1469_1470.0': {'shape': [-1], 'dtype': 'DT_FLOAT'}, '1078_1079.0': {'shape': [1000], 'dtype': 'DT_FLOAT'}, '1330_1331.0': {'shape': [36], 'dtype': 'DT_FLOAT'}, 'labels': {'shape': [-1], 'dtype': 'DT_INT32'}, '1267_1268.0': {'shape': [121], 'dtype': 'DT_FLOAT'}, '1141_1142.0': {'shape': [1000], 'dtype': 'DT_FLOAT'}, '1204_1205.0': {'shape': [484], 'dtype': 'DT_FLOAT'}, 'boxes': {'shape': [-1, 5], 'dtype': 'DT_FLOAT'}}} -Load input image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load input image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 + # Download the image from the openvino_notebooks storage + image_filename = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg", + directory="data" + ) + # Text detection models expect an image in BGR format. - image = cv2.imread("../data/image/intel_rnb.jpg") + image = cv2.imread(str(image_filename)) fp_image = image.astype("float32") # Resize the image to meet network expected input sizes. @@ -758,11 +773,11 @@ Load input image -.. image:: 117-model-server-with-output_files/117-model-server-with-output_20_1.png +.. image:: 117-model-server-with-output_files/117-model-server-with-output_21_1.png -Request Prediction on a Numpy Array -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Request Prediction on a Numpy Array +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -786,8 +801,8 @@ Request Prediction on a Numpy Array [2.2261986e+01 4.5406548e+01 1.8868817e+02 1.0225631e+02 3.0407205e-01]] -Visualization -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Visualization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -853,7 +868,7 @@ Visualization -.. image:: 117-model-server-with-output_files/117-model-server-with-output_25_1.png +.. image:: 117-model-server-with-output_files/117-model-server-with-output_26_1.png To stop and remove the model server container, you can use the following @@ -869,8 +884,8 @@ command: ovms -References -############################################################################################################################### +References +---------------------------------------------------- 1. `OpenVINO™ Model Server documentation `__ diff --git a/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_20_1.png b/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_21_1.png similarity index 100% rename from docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_20_1.png rename to docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_21_1.png diff --git a/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_25_1.png b/docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_26_1.png similarity index 100% rename from docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_25_1.png rename to docs/notebooks/117-model-server-with-output_files/117-model-server-with-output_26_1.png diff --git a/docs/notebooks/117-model-server-with-output_files/index.html b/docs/notebooks/117-model-server-with-output_files/index.html index bc4151c4fbebf0..0df2967d038785 100644 --- a/docs/notebooks/117-model-server-with-output_files/index.html +++ b/docs/notebooks/117-model-server-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/117-model-server-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/117-model-server-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/117-model-server-with-output_files/


../
-117-model-server-with-output_20_1.png              16-Aug-2023 01:31              112408
-117-model-server-with-output_25_1.png              16-Aug-2023 01:31              232667
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/117-model-server-with-output_files/


../
+117-model-server-with-output_21_1.png              31-Oct-2023 00:35              112408
+117-model-server-with-output_26_1.png              31-Oct-2023 00:35              232667
 

diff --git a/docs/notebooks/118-optimize-preprocessing-with-output.rst b/docs/notebooks/118-optimize-preprocessing-with-output.rst index 2f9a8116753605..c940e59c7a89d3 100644 --- a/docs/notebooks/118-optimize-preprocessing-with-output.rst +++ b/docs/notebooks/118-optimize-preprocessing-with-output.rst @@ -25,45 +25,62 @@ This tutorial include following steps: **Table of contents:** -- `Settings <#settings>`__ -- `Imports <#imports>`__ - - `Setup image and device <#setup-image-and-device>`__ - - `Downloading the model <#downloading-the-model>`__ - - `Create core <#create-core>`__ - - `Check the original parameters of image <#check-the-original-parameters-of-image>`__ +- `Settings <#settings>`__ +- `Imports <#imports>`__ -- `Setup preprocessing steps with Preprocessing API and perform inference <#setup-preprocessing-steps-with-preprocessing-api-and-perform-inference>`__ + - `Setup image and device <#setup-image-and-device>`__ + - `Downloading the model <#downloading-the-model>`__ + - `Create core <#create-core>`__ + - `Check the original parameters of + image <#check-the-original-parameters-of-image>`__ - - `Convert model to OpenVINO IR with model conversion API <#convert-model-to-openvino-ir-with-model-conversion-apI>`__ - - `Create PrePostProcessor Object <#create-prepostprocessor-object>`__ - - `Declare User’s Data Format <#declare-user’s-data-format>`__ - - `Declaring Model Layout <#declaring-model-layout>`__ - - `Preprocessing Steps <#preprocessing-steps>`__ - - `Integrating Steps into a Model <#integrating-steps-into-a-model>`__ +- `Setup preprocessing steps with Preprocessing API and perform + inference <#setup-preprocessing-steps-with-preprocessing-api-and-perform-inference>`__ -- `Load model and perform inference <#load-model-and-perform-inference>`__ -- `Fit image manually and perform inference <#fit-image-manually-and-perform-inference>`__ + - `Convert model to OpenVINO IR with model conversion + API <#convert-model-to-openvino-ir-with-model-conversion-api>`__ + - `Create PrePostProcessor + Object <#create-prepostprocessor-object>`__ + - `Declare User’s Data + Format <#declare-users-data-format>`__ + - `Declaring Model Layout <#declaring-model-layout>`__ + - `Preprocessing Steps <#preprocessing-steps>`__ + - `Integrating Steps into a + Model <#integrating-steps-into-a-model>`__ - - `Load the model <#load-the-model>`__ - - `Load image and fit it to model input <#load-image-and-fit-it-to-model-input>`__ - - `Perform inference <#perform-inference>`__ +- `Load model and perform + inference <#load-model-and-perform-inference>`__ +- `Fit image manually and perform + inference <#fit-image-manually-and-perform-inference>`__ -- `Compare results <#compare-results>`__ + - `Load the model <#load-the-model>`__ + - `Load image and fit it to model + input <#load-image-and-fit-it-to-model-input>`__ + - `Perform inference <#perform-inference>`__ - - `Compare results on one image <#compare-results-on-one-image>`__ - - `Compare performance <#compare-performance>`__ +- `Compare results <#compare-results>`__ -Settings -############################################################################################################################### + - `Compare results on one + image <#compare-results-on-one-image>`__ + - `Compare performance <#compare-performance>`__ + +Settings +-------------------------------------------------- .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" tensorflow opencv-python matplotlib + %pip install -q "openvino>=2023.1.0" tensorflow opencv-python matplotlib + + +.. parsed-literal:: -Imports -############################################################################################################################### + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 @@ -75,22 +92,42 @@ Imports import numpy as np import openvino as ov import tensorflow as tf + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file .. parsed-literal:: - 2023-09-08 23:04:01.488557: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:04:01.524594: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-30 22:59:29.607370: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 22:59:29.641564: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:04:02.060166: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-30 22:59:30.151509: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -Setup image and device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Setup image and device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - image_path = "../data/image/coco.jpg" + # Download the image from the openvino_notebooks storage + image_path = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", + directory="data" + ) + image_path = str(image_path) + + + +.. parsed-literal:: + + data/coco.jpg: 0%| | 0.00/202k [00:00`__. @@ -147,22 +184,13 @@ and save it to the disk. .. parsed-literal:: - 2023-09-08 23:04:03.032233: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2023-10-30 22:59:32.526472: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... .. parsed-literal:: WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. - - -.. parsed-literal:: - - WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 94). These functions will not be directly callable after loading. - - -.. parsed-literal:: - INFO:tensorflow:Assets written to: model/InceptionResNetV2/assets @@ -171,15 +199,15 @@ and save it to the disk. INFO:tensorflow:Assets written to: model/InceptionResNetV2/assets -Create core -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create core +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 core = ov.Core() -Check the original parameters of image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Check the original parameters of image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -199,8 +227,8 @@ Check the original parameters of image .. image:: 118-optimize-preprocessing-with-output_files/118-optimize-preprocessing-with-output_14_1.png -Setup preprocessing steps with Preprocessing API and perform inference -############################################################################################################################### +Setup preprocessing steps with Preprocessing API and perform inference +---------------------------------------------------------------------------------------------------------------- Intuitively, preprocessing API consists of the following parts: @@ -225,8 +253,8 @@ Pre-processing support following operations (please, see more details - Color Conversion - Custom Operations -Convert model to OpenVINO IR with model conversion API -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert model to OpenVINO IR with model conversion API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The options for preprocessing are not required. @@ -244,11 +272,11 @@ The options for preprocessing are not required. input=[1,299,299,3]) ov.save_model(ppp_model, str(ir_path)) -Create ``PrePostProcessor`` Object -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create ``PrePostProcessor`` Object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The -```PrePostProcessor()`` `__ +`PrePostProcessor() `__ class enables specifying the preprocessing and postprocessing steps for a model. @@ -258,8 +286,8 @@ a model. ppp = PrePostProcessor(ppp_model) -Declare User’s Data Format -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Declare User’s Data Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To address particular input of a model/preprocessor, use the ``PrePostProcessor.input(input_name)`` method. If the model has only one @@ -297,12 +325,12 @@ for mean/scale normalization. .. parsed-literal:: - + -Declaring Model Layout -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Declaring Model Layout +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Model input already has information about precision and shape. Preprocessing API is not intended to modify this. The only thing that @@ -326,12 +354,12 @@ may be specified is input data .. parsed-literal:: - + -Preprocessing Steps -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preprocessing Steps +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, the sequence of preprocessing steps can be defined. For more information about preprocessing steps, see @@ -344,7 +372,7 @@ Perform the following: dynamic size, for example, ``{?, 3, ?, ?}`` resize will not know how to resize the picture. Therefore, in this case, target height/ width should be specified. For more details, see also the - ```PreProcessSteps.resize()`` `__. + `PreProcessSteps.resize() `__. - Subtract mean from each channel. - Divide each pixel data to appropriate scale value. @@ -365,12 +393,12 @@ then such conversion will be added explicitly. .. parsed-literal:: - + -Integrating Steps into a Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Integrating Steps into a Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Once the preprocessing steps have been finished, the model can be finally built. It is possible to display ``PrePostProcessor`` @@ -395,8 +423,8 @@ configuration for debugging purposes. -Load model and perform inference -############################################################################################################################### +Load model and perform inference +-------------------------------------------------------------------------- .. code:: ipython3 @@ -413,19 +441,19 @@ Load model and perform inference ppp_input_tensor = prepare_image_api_preprocess(image_path) results = compiled_model_with_preprocess_api(ppp_input_tensor)[ppp_output_layer][0] -Fit image manually and perform inference -############################################################################################################################### +Fit image manually and perform inference +---------------------------------------------------------------------------------- -Load the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 model = core.read_model(model=ir_path) compiled_model = core.compile_model(model=model, device_name=device.value) -Load image and fit it to model input -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load image and fit it to model input +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -458,8 +486,8 @@ Load image and fit it to model input The data type of the image is float32 -Perform inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Perform inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -467,11 +495,11 @@ Perform inference result = compiled_model(input_tensor)[output_layer] -Compare results -############################################################################################################################### +Compare results +--------------------------------------------------------- -Compare results on one image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compare results on one image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -490,7 +518,11 @@ Compare results on one image # Convert the inference result to a class name. - imagenet_classes = open("../data/datasets/imagenet/imagenet_2012.txt").read().splitlines() + imagenet_filename = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/datasets/imagenet/imagenet_2012.txt", + directory="data" + ) + imagenet_classes = imagenet_filename.read_text().splitlines() imagenet_classes = ['background'] + imagenet_classes # get result for inference with preprocessing api @@ -504,6 +536,12 @@ Compare results on one image res = check_results(input_tensor, compiled_model, imagenet_classes) + +.. parsed-literal:: + + data/imagenet_2012.txt: 0%| | 0.00/30.9k [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/118-optimize-preprocessing-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/118-optimize-preprocessing-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/118-optimize-preprocessing-with-output_files/


../
-118-optimize-preprocessing-with-output_13_1.png    16-Aug-2023 01:31              387941
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/118-optimize-preprocessing-with-output_files/


../
+118-optimize-preprocessing-with-output_14_1.png    31-Oct-2023 00:35              387941
 

diff --git a/docs/notebooks/119-tflite-to-openvino-with-output.rst b/docs/notebooks/119-tflite-to-openvino-with-output.rst index 1d8af0103e85a8..3fc5cd80494c7b 100644 --- a/docs/notebooks/119-tflite-to-openvino-with-output.rst +++ b/docs/notebooks/119-tflite-to-openvino-with-output.rst @@ -16,30 +16,34 @@ and do inference with a sample image. **Table of contents:** -- `Preparation <#preparation>`__ - - `Install requirements <#install-requirements>`__ - - `Imports <#imports>`__ +- `Preparation <#preparation>`__ -- `Download TFLite model <#download-tflite-model>`__ -- `Convert a Model to OpenVINO IR Format <#convert-a-model-to-openvino-ir-format>`__ -- `Load model using OpenVINO TensorFlow Lite Frontend <#load-model-using-openvino-tensorflow-lite-frontend>`__ -- `Run OpenVINO model inference <#run-openvino-model-inference>`__ + - `Install requirements <#install-requirements>`__ + - `Imports <#imports>`__ - - `Select inference device <#select-inference-device>`__ +- `Download TFLite model <#download-tflite-model>`__ +- `Convert a Model to OpenVINO IR + Format <#convert-a-model-to-openvino-ir-format>`__ +- `Load model using OpenVINO TensorFlow Lite + Frontend <#load-model-using-openvino-tensorflow-lite-frontend>`__ +- `Run OpenVINO model + inference <#run-openvino-model-inference>`__ -- `Estimate Model Performance <#estimate-model-performance>`__ + - `Select inference device <#select-inference-device>`__ -Preparation -############################################################################################################################### +- `Estimate Model Performance <#estimate-model-performance>`__ -Install requirements -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q opencv-python requests tqdm + %pip install -q "openvino>=2023.1.0" + %pip install -q opencv-python requests tqdm # Fetch `notebook_utils` module import urllib.request @@ -48,8 +52,15 @@ Install requirements filename='notebook_utils.py' ); -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -60,8 +71,8 @@ Imports from notebook_utils import download_file, load_image -Download TFLite model -############################################################################################################################### +Download TFLite model +--------------------------------------------------------------- .. code:: ipython3 @@ -84,12 +95,12 @@ Download TFLite model .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/119-tflite-to-openvino/model/efficientnet_lite0_fp32_2.tflite') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/119-tflite-to-openvino/model/efficientnet_lite0_fp32_2.tflite') -Convert a Model to OpenVINO IR Format -############################################################################################################################### +Convert a Model to OpenVINO IR Format +------------------------------------------------------------------------------- To convert the TFLite model to OpenVINO IR, model conversion Python API can be used. ``ov.convert_model`` function accepts the path to the @@ -116,8 +127,8 @@ For TensorFlow Lite models support, refer to this Model model/efficientnet_lite0_fp32_2.tflite successfully converted and saved to model/efficientnet_lite0_fp32_2.xml -Load model using OpenVINO TensorFlow Lite Frontend -############################################################################################################################### +Load model using OpenVINO TensorFlow Lite Frontend +-------------------------------------------------------------------------------------------- TensorFlow Lite models are supported via ``FrontEnd`` API. You may skip conversion to IR and read models directly by OpenVINO runtime API. For @@ -130,8 +141,8 @@ this `tutorial <../002-openvino-api>`__. ov_model = core.read_model(tflite_model_path) -Run OpenVINO model inference -############################################################################################################################### +Run OpenVINO model inference +---------------------------------------------------------------------- We can find information about model input preprocessing in its `description `__ @@ -145,10 +156,10 @@ on `TensorFlow Hub `__. resized_image = image.resize((224, 224)) input_tensor = np.expand_dims((np.array(resized_image).astype(np.float32) - 127) / 128, 0) -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -205,17 +216,15 @@ Select device from dropdown list for running inference using OpenVINO: Predicted label: n02109047 Great Dane with probability 0.715318 -Estimate Model Performance -############################################################################################################################### +Estimate Model Performance +-------------------------------------------------------------------- `Benchmark Tool `__ is used to measure the inference performance of the model on CPU and GPU. -.. note:: - - For more accurate performance, it is recommended to run + **NOTE**: For more accurate performance, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on CPU for one minute. Change ``CPU`` to ``GPU`` to @@ -234,5 +243,68 @@ GPU. .. parsed-literal:: Benchmark model inference on CPU - /bin/bash: benchmark_app: command not found + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 29.37 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [1,224,224,3] + [ INFO ] Model outputs: + [ INFO ] Softmax (node: 63) : f32 / [...] / [1,1000] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,H,W,C] / [1,224,224,3] + [ INFO ] Model outputs: + [ INFO ] Softmax (node: 63) : f32 / [...] / [1,1000] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 133.03 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: TensorFlow_Lite_Frontend_IR + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 7.30 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 17562 iterations + [ INFO ] Duration: 15010.30 ms + [ INFO ] Latency: + [ INFO ] Median: 4.98 ms + [ INFO ] Average: 4.99 ms + [ INFO ] Min: 3.69 ms + [ INFO ] Max: 15.16 ms + [ INFO ] Throughput: 1170.00 FPS diff --git a/docs/notebooks/119-tflite-to-openvino-with-output_files/index.html b/docs/notebooks/119-tflite-to-openvino-with-output_files/index.html index 902847c51f83d2..783453225f99b3 100644 --- a/docs/notebooks/119-tflite-to-openvino-with-output_files/index.html +++ b/docs/notebooks/119-tflite-to-openvino-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/119-tflite-to-openvino-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/119-tflite-to-openvino-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/119-tflite-to-openvino-with-output_files/


../
-119-tflite-to-openvino-with-output_16_1.jpg        16-Aug-2023 01:31               68170
-119-tflite-to-openvino-with-output_16_1.png        16-Aug-2023 01:31              621006
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/119-tflite-to-openvino-with-output_files/


../
+119-tflite-to-openvino-with-output_16_1.jpg        31-Oct-2023 00:35               68170
+119-tflite-to-openvino-with-output_16_1.png        31-Oct-2023 00:35              621006
 

diff --git a/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output.rst b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output.rst new file mode 100644 index 00000000000000..8b39f89c50ea0d --- /dev/null +++ b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output.rst @@ -0,0 +1,664 @@ +Convert a TensorFlow Instance Segmentation Model to OpenVINO™ +============================================================= + +`TensorFlow `__, or TF for short, is an +open-source framework for machine learning. + +The `TensorFlow Object Detection +API `__ +is an open-source computer vision framework built on top of TensorFlow. +It is used for building object detection and instance segmentation +models that can localize multiple objects in the same image. TensorFlow +Object Detection API supports various architectures and models, which +can be found and downloaded from the `TensorFlow +Hub `__. + +This tutorial shows how to convert a TensorFlow `Mask R-CNN with +Inception ResNet +V2 `__ +instance segmentation model to OpenVINO `Intermediate +Representation `__ +(OpenVINO IR) format, using `Model +Optimizer `__. +After creating the OpenVINO IR, load the model in `OpenVINO +Runtime `__ +and do inference with a sample image. + +**Table of contents:** +-- + +- `Prerequisites <#prerequisites>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Download Model from TensorFlow Hub <#download-model-from-tensorflow-hub>`__ +- `Convert Model to OpenVINO IR <#convert-model-to-openvino-ir>`__ +- `Test Inference on the Converted Model <#test-inference-on-the-converted-model>`__ +- `Select inference device <#select-inference-device>`__ +- `Load the Model <#load-the-model>`__ +- `Get Model Information <#get-model-information>`__ +- `Get an Image for Test Inference <#get-an-image-for-test-inference>`__ +- `Perform Inference <#perform-inference>`__ +- `Inference Result Visualization <#inference-result-visualization>`__ +- `Next Steps <#next-steps>`__ +- `Async inference pipeline <#async-inference-pipeline>`__ +- `Integration preprocessing to model <#integration-preprocessing-to-model>`__ + +Prerequisites +------------------------------------------------------- + +Install required packages: + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" "numpy>=1.21.0" "opencv-python" "matplotlib>=3.4" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + + +The notebook uses utility functions. The cell below will download the +``notebook_utils`` Python module from GitHub. + +.. code:: ipython3 + + # Fetch the notebook utils script from the openvino_notebooks repo + import urllib.request + + urllib.request.urlretrieve( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py", + filename="notebook_utils.py", + ); + +Imports +------------------------------------------------- + +.. code:: ipython3 + + # Standard python modules + from pathlib import Path + + # External modules and dependencies + import cv2 + import matplotlib.pyplot as plt + import numpy as np + + # Notebook utils module + from notebook_utils import download_file + + # OpenVINO modules + import openvino as ov + +Settings +-------------------------------------------------- + +Define model related variables and create corresponding directories: + +.. code:: ipython3 + + # Create directories for models files + model_dir = Path("model") + model_dir.mkdir(exist_ok=True) + + # Create directory for TensorFlow model + tf_model_dir = model_dir / "tf" + tf_model_dir.mkdir(exist_ok=True) + + # Create directory for OpenVINO IR model + ir_model_dir = model_dir / "ir" + ir_model_dir.mkdir(exist_ok=True) + + model_name = "mask_rcnn_inception_resnet_v2_1024x1024" + + openvino_ir_path = ir_model_dir / f"{model_name}.xml" + + tf_model_url = "https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1?tf-hub-format=compressed" + + tf_model_archive_filename = f"{model_name}.tar.gz" + +Download Model from TensorFlow Hub +---------------------------------------------------------------------------- + +Download archive with TensorFlow Instance Segmentation model +(`mask_rcnn_inception_resnet_v2_1024x1024 `__) +from TensorFlow Hub: + +.. code:: ipython3 + + download_file( + url=tf_model_url, + filename=tf_model_archive_filename, + directory=tf_model_dir + ); + + + +.. parsed-literal:: + + model/tf/mask_rcnn_inception_resnet_v2_1024x1024.tar.gz: 0%| | 0.00/232M [00:00`__. +Optionally, we can apply compression to FP16 model weights using +``compress_to_fp16=True`` option and integrate preprocessing using this +approach. + +The converted model is ready to load on a device using ``compile_model`` +or saved on disk using the ``serialize`` function to reduce loading time +when the model is run in the future. + +.. code:: ipython3 + + ov_model = ov.convert_model(tf_model_dir) + + # Save converted OpenVINO IR model to the corresponding directory + ov.save_model(ov_model, openvino_ir_path) + +Test Inference on the Converted Model +------------------------------------------------------------------------------- + +Select inference device +----------------------------------------------------------------- + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + core = ov.Core() + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Load the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + openvino_ir_model = core.read_model(openvino_ir_path) + compiled_model = core.compile_model(model=openvino_ir_model, device_name=device.value) + +Get Model Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Mask R-CNN with Inception ResNet V2 instance segmentation model has one +input - a three-channel image of variable size. The input tensor shape +is ``[1, height, width, 3]`` with values in ``[0, 255]``. + +Model output dictionary contains a lot of tensors, we will use only 5 of +them: + +- ``num_detections``: A ``tf.int`` tensor with only one value, the number of detections ``[N]``. +- ``detection_boxes``: A ``tf.float32`` tensor of shape ``[N, 4]`` containing bounding box coordinates in the following order: ``[ymin, xmin, ymax, xmax]``. +- ``detection_classes``: A ``tf.int`` tensor of shape ``[N]`` containing detection class index from the label file. +- ``detection_scores``: A ``tf.float32`` tensor of shape ``[N]`` containing detection scores. +- ``detection_masks``: A ``[batch, max_detections, mask_height, mask_width]`` tensor. + + Note that apixel-wise sigmoid score converter is applied to the detection masks. + +For more information about model inputs, outputs and their formats, see +the `model overview page on TensorFlow +Hub `__. + +It is important to mention, that values of ``detection_boxes``, +``detection_classes``, ``detection_scores``, ``detection_masks`` +correspond to each other and are ordered by the highest detection score: +the first detection mask corresponds to the first detection class and to +the first (and highest) detection score. + +.. code:: ipython3 + + model_inputs = compiled_model.inputs + model_outputs = compiled_model.outputs + + print("Model inputs count:", len(model_inputs)) + print("Model inputs:") + for _input in model_inputs: + print(" ", _input) + + print("Model outputs count:", len(model_outputs)) + print("Model outputs:") + for output in model_outputs: + print(" ", output) + + +.. parsed-literal:: + + Model inputs count: 1 + Model inputs: + + Model outputs count: 23 + Model outputs: + + + + + + + + + + + + + + + + + + + + + + + + + +Get an Image for Test Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Load and save an image: + +.. code:: ipython3 + + image_path = Path("./data/coco_bike.jpg") + + download_file( + url="https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg", + filename=image_path.name, + directory=image_path.parent, + ); + + + +.. parsed-literal:: + + data/coco_bike.jpg: 0%| | 0.00/182k [00:00 + + + + +.. image:: 120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_25_1.png + + +Perform Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + inference_result = compiled_model(network_input_image) + +After model inference on the test image, instance segmentation data can +be extracted from the result. For further model result visualization +``detection_boxes``, ``detection_masks``, ``detection_classes`` and +``detection_scores`` outputs will be used. + +.. code:: ipython3 + + detection_boxes = compiled_model.output("detection_boxes") + image_detection_boxes = inference_result[detection_boxes] + print("image_detection_boxes:", image_detection_boxes.shape) + + detection_masks = compiled_model.output("detection_masks") + image_detection_masks = inference_result[detection_masks] + print("image_detection_masks:", image_detection_masks.shape) + + detection_classes = compiled_model.output("detection_classes") + image_detection_classes = inference_result[detection_classes] + print("image_detection_classes:", image_detection_classes.shape) + + detection_scores = compiled_model.output("detection_scores") + image_detection_scores = inference_result[detection_scores] + print("image_detection_scores:", image_detection_scores.shape) + + num_detections = compiled_model.output("num_detections") + image_num_detections = inference_result[num_detections] + print("image_detections_num:", image_num_detections) + + # Alternatively, inference result data can be extracted by model output name with `.get()` method + assert (inference_result[detection_boxes] == inference_result.get("detection_boxes")).all(), "extracted inference result data should be equal" + + +.. parsed-literal:: + + image_detection_boxes: (1, 100, 4) + image_detection_masks: (1, 100, 33, 33) + image_detection_classes: (1, 100) + image_detection_scores: (1, 100) + image_detections_num: [100.] + + +Inference Result Visualization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Define utility functions to visualize the inference results + +.. code:: ipython3 + + import random + from typing import Optional + + + def add_detection_box( + box: np.ndarray, image: np.ndarray, mask: np.ndarray, label: Optional[str] = None + ) -> np.ndarray: + """ + Helper function for adding single bounding box to the image + + Parameters + ---------- + box : np.ndarray + Bounding box coordinates in format [ymin, xmin, ymax, xmax] + image : np.ndarray + The image to which detection box is added + mask: np.ndarray + Segmentation mask in format (H, W) + label : str, optional + Detection box label string, if not provided will not be added to result image (default is None) + + Returns + ------- + np.ndarray + NumPy array including image, detection box, and segmentation mask + + """ + ymin, xmin, ymax, xmax = box + point1, point2 = (int(xmin), int(ymin)), (int(xmax), int(ymax)) + box_color = [random.randint(0, 255) for _ in range(3)] + line_thickness = round(0.002 * (image.shape[0] + image.shape[1]) / 2) + 1 + + result = cv2.rectangle( + img=image, + pt1=point1, + pt2=point2, + color=box_color, + thickness=line_thickness, + lineType=cv2.LINE_AA, + ) + + if label: + font_thickness = max(line_thickness - 1, 1) + font_face = 0 + font_scale = line_thickness / 3 + font_color = (255, 255, 255) + text_size = cv2.getTextSize( + text=label, fontFace=font_face, fontScale=font_scale, thickness=font_thickness + )[0] + # Calculate rectangle coordinates + rectangle_point1 = point1 + rectangle_point2 = (point1[0] + text_size[0], point1[1] - text_size[1] - 3) + # Add filled rectangle + result = cv2.rectangle( + img=result, + pt1=rectangle_point1, + pt2=rectangle_point2, + color=box_color, + thickness=-1, + lineType=cv2.LINE_AA, + ) + # Calculate text position + text_position = point1[0], point1[1] - 3 + # Add text with label to filled rectangle + result = cv2.putText( + img=result, + text=label, + org=text_position, + fontFace=font_face, + fontScale=font_scale, + color=font_color, + thickness=font_thickness, + lineType=cv2.LINE_AA, + ) + mask_img = mask[:, :, np.newaxis] * box_color + result = cv2.addWeighted(result, 1, mask_img.astype(np.uint8), 0.6, 0) + return result + +.. code:: ipython3 + + def get_mask_frame(box, frame, mask): + """ + Transform a binary mask to fit within a specified bounding box in a frame using perspective transformation. + + Args: + box (tuple): A bounding box represented as a tuple (y_min, x_min, y_max, x_max). + frame (numpy.ndarray): The larger frame or image where the mask will be placed. + mask (numpy.ndarray): A binary mask image to be transformed. + + Returns: + numpy.ndarray: A transformed mask image that fits within the specified bounding box in the frame. + """ + x_min = frame.shape[1] * box[1] + y_min = frame.shape[0] * box[0] + x_max = frame.shape[1] * box[3] + y_max = frame.shape[0] * box[2] + rect_src = np.array( + [[0, 0], [mask.shape[1], 0], [mask.shape[1], mask.shape[0]], [0, mask.shape[0]]], + dtype=np.float32, + ) + rect_dst = np.array( + [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]], dtype=np.float32 + ) + M = cv2.getPerspectiveTransform(rect_src[:, :], rect_dst[:, :]) + mask_frame = cv2.warpPerspective( + mask, M, (frame.shape[1], frame.shape[0]), flags=cv2.INTER_CUBIC + ) + return mask_frame + + +.. code:: ipython3 + + from typing import Dict + + from openvino.runtime.utils.data_helpers import OVDict + + + def visualize_inference_result( + inference_result: OVDict, + image: np.ndarray, + labels_map: Dict, + detections_limit: Optional[int] = None, + ): + """ + Helper function for visualizing inference result on the image + + Parameters + ---------- + inference_result : OVDict + Result of the compiled model inference on the test image + image : np.ndarray + Original image to use for visualization + labels_map : Dict + Dictionary with mappings of detection classes numbers and its names + detections_limit : int, optional + Number of detections to show on the image, if not provided all detections will be shown (default is None) + """ + detection_boxes = inference_result.get("detection_boxes") + detection_classes = inference_result.get("detection_classes") + detection_scores = inference_result.get("detection_scores") + num_detections = inference_result.get("num_detections") + detection_masks = inference_result.get("detection_masks") + + detections_limit = int( + min(detections_limit, num_detections[0]) + if detections_limit is not None + else num_detections[0] + ) + + # Normalize detection boxes coordinates to original image size + original_image_height, original_image_width, _ = image.shape + normalized_detection_boxes = detection_boxes[0, :detections_limit] * [ + original_image_height, + original_image_width, + original_image_height, + original_image_width, + ] + result = np.copy(image) + for i in range(detections_limit): + detected_class_name = labels_map[int(detection_classes[0, i])] + score = detection_scores[0, i] + mask = detection_masks[0, i] + mask_reframed = get_mask_frame(detection_boxes[0, i], image, mask) + mask_reframed = (mask_reframed > 0.5).astype(np.uint8) + label = f"{detected_class_name} {score:.2f}" + result = add_detection_box( + box=normalized_detection_boxes[i], image=result, mask=mask_reframed, label=label + ) + + plt.imshow(result) + +TensorFlow Instance Segmentation model +(`mask_rcnn_inception_resnet_v2_1024x1024 `__) +used in this notebook was trained on `COCO +2017 `__ dataset with 91 classes. For better +visualization experience we can use COCO dataset labels with human +readable class names instead of class numbers or indexes. + +We can download COCO dataset classes labels from `Open Model +Zoo `__: + +.. code:: ipython3 + + coco_labels_file_path = Path("./data/coco_91cl.txt") + + download_file( + url="https://raw.githubusercontent.com/openvinotoolkit/open_model_zoo/master/data/dataset_classes/coco_91cl.txt", + filename=coco_labels_file_path.name, + directory=coco_labels_file_path.parent, + ); + + + +.. parsed-literal:: + + data/coco_91cl.txt: 0%| | 0.00/421 [00:00`__. + +Integration preprocessing to model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Preprocessing API enables making preprocessing a part of the model +reducing application code and dependency on additional image processing +libraries. The main advantage of Preprocessing API is that preprocessing +steps will be integrated into the execution graph and will be performed +on a selected device (CPU/GPU etc.) rather than always being executed on +CPU as part of an application. This will improve selected device +utilization. + +For more information, refer to the `Optimize Preprocessing +tutorial <118-optimize-preprocessing-with-output.html>`__ +and to the overview of `Preprocessing +API `__. diff --git a/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_25_1.png b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_25_1.png new file mode 100644 index 00000000000000..0a3d192dfb9b9a --- /dev/null +++ b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_25_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899a1126af7e881b5b8ad2182133a5334f3c98031a9d1b5d9285a76b44a162fc +size 395346 diff --git a/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_39_0.png b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_39_0.png new file mode 100644 index 00000000000000..b9851ff336418c --- /dev/null +++ b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/120-tensorflow-instance-segmentation-to-openvino-with-output_39_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8537cac1002162c69c666fe659effaf3bb23d10d3da276349abe7af2469499 +size 394617 diff --git a/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/index.html b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/index.html new file mode 100644 index 00000000000000..f0b5ea945592d3 --- /dev/null +++ b/docs/notebooks/120-tensorflow-instance-segmentation-to-openvino-with-output_files/index.html @@ -0,0 +1,8 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/120-tensorflow-instance-segmentation-to-openvino-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/120-tensorflow-instance-segmentation-to-openvino-with-output_files/


../
+120-tensorflow-instance-segmentation-to-openvin..> 31-Oct-2023 00:35              395346
+120-tensorflow-instance-segmentation-to-openvin..> 31-Oct-2023 00:35              394617
+

+ diff --git a/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output.rst b/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output.rst index add2c7016faed1..b0496634980513 100644 --- a/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output.rst +++ b/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output.rst @@ -25,33 +25,46 @@ and do inference with a sample image. **Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Download Model from TensorFlow Hub <#download-model-from-tensorflow-hub>`__ -- `Convert Model to OpenVINO IR <#convert-model-to-openvino-ir>`__ -- `Test Inference on the Converted Model <#test-inference-on-the-converted-model>`__ -- `Select inference device <#select-inference-device>`__ - - `Load the Model <#load-the-model>`__ - - `Get Model Information <#get-model-information>`__ - - `Get an Image for Test Inference <#get-an-image-for-test-inference>`__ - - `Perform Inference <#perform-inference>`__ - - `Inference Result Visualization <#inference-result-visualization>`__ +- `Prerequisites <#prerequisites>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Download Model from TensorFlow + Hub <#download-model-from-tensorflow-hub>`__ +- `Convert Model to OpenVINO + IR <#convert-model-to-openvino-ir>`__ +- `Test Inference on the Converted + Model <#test-inference-on-the-converted-model>`__ +- `Select inference device <#select-inference-device>`__ + + - `Load the Model <#load-the-model>`__ + - `Get Model Information <#get-model-information>`__ + - `Get an Image for Test + Inference <#get-an-image-for-test-inference>`__ + - `Perform Inference <#perform-inference>`__ + - `Inference Result + Visualization <#inference-result-visualization>`__ + +- `Next Steps <#next-steps>`__ + + - `Async inference pipeline <#async-inference-pipeline>`__ + - `Integration preprocessing to + model <#integration-preprocessing-to-model>`__ + +Prerequisites +------------------------------------------------------- -- `Next Steps <#next-steps>`__ +Install required packages: - - `Async inference pipeline <#async-inference-pipeline>`__ - - `Integration preprocessing to model <#integration-preprocessing-to-model>`__ +.. code:: ipython3 -Prerequisites -############################################################################################################################### + %pip install -q "openvino>=2023.1.0" "numpy>=1.21.0" "opencv-python" "matplotlib>=3.4" -Install required packages: -.. code:: ipython3 +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. - !pip install -q "openvino==2023.1.0.dev20230811" "numpy>=1.21.0" "opencv-python" "matplotlib>=3.4,<3.5.3" The notebook uses utility functions. The cell below will download the ``notebook_utils`` Python module from GitHub. @@ -66,8 +79,8 @@ The notebook uses utility functions. The cell below will download the filename="notebook_utils.py", ); -Imports -############################################################################################################################### +Imports +------------------------------------------------- .. code:: ipython3 @@ -84,8 +97,8 @@ Imports # Notebook utils module from notebook_utils import download_file -Settings -############################################################################################################################### +Settings +-------------------------------------------------- Define model related variables and create corresponding directories: @@ -111,8 +124,8 @@ Define model related variables and create corresponding directories: tf_model_archive_filename = f"{model_name}.tar.gz" -Download Model from TensorFlow Hub -############################################################################################################################### +Download Model from TensorFlow Hub +---------------------------------------------------------------------------- Download archive with TensorFlow Object Detection model (`faster_rcnn_resnet50_v1_640x640 `__) @@ -137,7 +150,7 @@ from TensorFlow Hub: .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/120-tensorflow-object-detection-to-openvino/model/tf/faster_rcnn_resnet50_v1_640x640.tar.gz') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/120-tensorflow-object-detection-to-openvino/model/tf/faster_rcnn_resnet50_v1_640x640.tar.gz') @@ -150,8 +163,8 @@ Extract TensorFlow Object Detection model from the downloaded archive: with tarfile.open(tf_model_dir / tf_model_archive_filename) as file: file.extractall(path=tf_model_dir) -Convert Model to OpenVINO IR -############################################################################################################################### +Convert Model to OpenVINO IR +---------------------------------------------------------------------- OpenVINO Model Converter Python API can be used to convert the TensorFlow model to OpenVINO IR. @@ -173,21 +186,18 @@ support + Model input: Model outputs count: 8 Model outputs: @@ -289,8 +299,8 @@ for more information about model inputs, outputs and their formats. -Get an Image for Test Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Get an Image for Test Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Load and save an image: @@ -305,17 +315,16 @@ Load and save an image: ) - .. parsed-literal:: - data/coco_bike.jpg: 0%| | 0.00/182k [00:00 + @@ -351,8 +360,8 @@ Read the image, resize and convert it to the input shape of the network: .. image:: 120-tensorflow-object-detection-to-openvino-with-output_files/120-tensorflow-object-detection-to-openvino-with-output_25_1.png -Perform Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Perform Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -386,7 +395,7 @@ outputs will be used. .. parsed-literal:: image_detection_boxes: [[[0.1645457 0.54601336 0.8953864 0.85500604] - [0.67189544 0.01240013 0.9843237 0.5308594 ] + [0.67189544 0.01240015 0.9843237 0.53085935] [0.49188587 0.0117609 0.98050654 0.8866383 ] ... [0.43604603 0.59332204 0.4692565 0.6341099 ] @@ -409,61 +418,61 @@ outputs will be used. 84. 38. 1. 15. 3. 20. 62. 58. 41. 20. 2. 4. 88. 62. 15. 31. 1. 31. 14. 19. 4. 1. 2. 8. 18. 15. 4. 2. 2. 2. 31. 84. 15. 3. 28. 2. 27. 18. 15. 1. 31. 28. 1. 41. 8. 1. 3. 20.]] - image_detection_scores: [[0.9810079 0.9406672 0.9318088 0.87736803 0.8406418 0.590001 - 0.5544931 0.5395725 0.49390146 0.48142615 0.46272704 0.44070086 - 0.40116653 0.3470845 0.31795666 0.27489564 0.2474634 0.23632632 - 0.23248206 0.22401379 0.21871325 0.20231566 0.19377239 0.14768396 - 0.14555264 0.14337891 0.12709722 0.12582931 0.11867397 0.11002139 - 0.10564936 0.09225632 0.08963246 0.08887175 0.08704519 0.08072548 - 0.08002183 0.07911441 0.0666113 0.06338128 0.06100732 0.06005874 - 0.05798699 0.05364133 0.05204991 0.05011017 0.04850946 0.04709009 - 0.04469202 0.04128509 0.04075823 0.03989557 0.03523415 0.03272378 - 0.03108068 0.02970159 0.02872299 0.02845932 0.02585638 0.02348834 - 0.02330401 0.02148149 0.02133745 0.02086147 0.0203565 0.01959799 - 0.01931953 0.01926655 0.01872199 0.01856231 0.018533 0.01838779 - 0.0181897 0.01780706 0.01727113 0.0166365 0.01586579 0.01579068 - 0.01573388 0.01528254 0.01502856 0.01451417 0.01439991 0.01428939 - 0.01419332 0.01380482 0.01360496 0.01299109 0.01249149 0.01198874 - 0.0114887 0.01145835 0.01144462 0.01139608 0.01113943 0.01108595 - 0.01089338 0.01082359 0.01051233 0.01027331 0.01006837 0.00979451 - 0.00973239 0.00960592 0.00957181 0.00953101 0.00949827 0.00942653 - 0.00942553 0.00931231 0.00907305 0.00887801 0.00884456 0.00881256 - 0.00864554 0.00854315 0.00849876 0.00849663 0.00846909 0.00820139 - 0.00816586 0.00791354 0.0079015 0.00769929 0.00768903 0.00766408 - 0.00766067 0.00764458 0.00745573 0.00721994 0.00706666 0.00700596 - 0.0067884 0.00648051 0.00646964 0.00638165 0.00635813 0.00625102 - 0.00622972 0.00599667 0.00591933 0.00585055 0.00578007 0.00576509 - 0.00572359 0.00560451 0.00558354 0.00556508 0.00553865 0.00548295 - 0.00547358 0.00543471 0.00543379 0.0054083 0.0053792 0.00535764 - 0.00523385 0.00518936 0.00505314 0.00505005 0.00492085 0.00482561 - 0.00471782 0.00470318 0.00464702 0.00461123 0.00458301 0.00457273 - 0.00455804 0.00454316 0.00454089 0.00441311 0.00437611 0.0042632 - 0.00420744 0.00415997 0.00409999 0.00409556 0.00407972 0.00405195 - 0.00404086 0.00399852 0.00399512 0.00393439 0.00390283 0.00387304 - 0.0038489 0.00382758 0.00380029 0.00379529 0.00376791 0.00374193 - 0.0037119 0.00369629 0.00366445 0.00358808 0.00351782 0.0035044 - 0.00344527 0.00343268 0.00342918 0.0033823 0.00332239 0.00330844 - 0.00329753 0.00327268 0.00315135 0.0031098 0.00308979 0.00308363 - 0.00305497 0.00304868 0.00304043 0.00303659 0.00302582 0.00301236 - 0.0029885 0.00291268 0.00290264 0.00289243 0.00287722 0.00286564 - 0.0028257 0.00282503 0.00275258 0.00274533 0.0027204 0.00268618 - 0.00261918 0.00260795 0.00256593 0.00254094 0.00252855 0.00250768 - 0.00249793 0.00249551 0.00248255 0.00247912 0.00246619 0.00241695 - 0.00240165 0.00236032 0.00235902 0.00234437 0.00234337 0.00233791 - 0.00233535 0.00230773 0.00230558 0.00229112 0.00228888 0.0022631 - 0.00225214 0.00224187 0.00222553 0.00219966 0.00219677 0.00217865 - 0.00217776 0.00215922 0.0021541 0.00214997 0.00212955 0.00211928 - 0.0021005 0.00205066 0.00204869 0.00203888 0.00203537 0.00203026 - 0.00201357 0.00199936 0.00199387 0.00197951 0.00197288 0.00195503 - 0.00194848 0.00192129 0.00189951 0.00187286 0.0018519 0.00182989 - 0.00179158 0.00177909 0.00176328 0.00176319 0.00175034 0.00173788 - 0.00172983 0.00172819 0.00168273 0.0016768 0.00167542 0.00167398 - 0.0016395 0.00163637 0.00163319 0.00162887 0.00162824 0.00162028]] + image_detection_scores: [[0.9810079 0.9406672 0.9318088 0.877368 0.8406416 0.590001 + 0.55449295 0.53957206 0.49390146 0.48142543 0.46272704 0.44070077 + 0.40116653 0.34708446 0.31795666 0.27489546 0.24746332 0.23632598 + 0.23248206 0.22401379 0.21871354 0.20231584 0.19377239 0.14768413 + 0.1455532 0.14337878 0.12709719 0.12582931 0.11867398 0.11002147 + 0.10564942 0.09225623 0.08963215 0.08887199 0.08704525 0.08072542 + 0.08002211 0.07911447 0.0666113 0.06338121 0.06100726 0.06005874 + 0.05798694 0.05364129 0.0520498 0.05011013 0.04850959 0.04709018 + 0.04469205 0.04128502 0.04075819 0.03989548 0.03523409 0.03272378 + 0.03108071 0.02970156 0.028723 0.02845931 0.02585638 0.02348842 + 0.0233041 0.02148155 0.02133748 0.02086138 0.02035652 0.01959795 + 0.01931953 0.01926655 0.01872199 0.0185623 0.01853302 0.01838779 + 0.01818969 0.01780701 0.01727104 0.0166365 0.01586579 0.01579063 + 0.01573381 0.01528252 0.01502847 0.01451413 0.01439992 0.01428944 + 0.01419329 0.01380476 0.01360496 0.0129911 0.01249144 0.01198867 + 0.01148862 0.01145841 0.01144459 0.01139607 0.01113943 0.01108592 + 0.01089338 0.01082358 0.01051232 0.01027328 0.01006837 0.00979451 + 0.0097324 0.00960593 0.00957182 0.00953105 0.00949826 0.00942655 + 0.00942555 0.00931226 0.00907306 0.00887798 0.00884452 0.00881256 + 0.00864548 0.00854316 0.00849879 0.00849662 0.00846909 0.00820138 + 0.00816586 0.00791354 0.00790157 0.0076993 0.00768906 0.00766408 + 0.00766065 0.00764457 0.0074557 0.00721993 0.00706666 0.00700596 + 0.0067884 0.00648049 0.00646963 0.0063817 0.00635814 0.00625102 + 0.0062297 0.00599666 0.00591931 0.00585055 0.00578007 0.00576511 + 0.00572359 0.00560452 0.00558355 0.00556507 0.00553867 0.00548295 + 0.00547356 0.00543471 0.00543378 0.00540831 0.0053792 0.00535764 + 0.00523385 0.00518935 0.00505314 0.00505005 0.00492085 0.0048256 + 0.00471783 0.00470318 0.00464703 0.00461124 0.004583 0.00457273 + 0.00455803 0.00454314 0.00454088 0.00441311 0.00437612 0.00426319 + 0.00420744 0.00415996 0.00409997 0.00409557 0.00407971 0.00405195 + 0.00404085 0.00399853 0.00399512 0.00393439 0.00390283 0.00387302 + 0.0038489 0.00382758 0.00380028 0.00379529 0.00376791 0.00374193 + 0.00371191 0.0036963 0.00366445 0.00358808 0.00351783 0.00350439 + 0.00344527 0.00343266 0.00342918 0.0033823 0.00332239 0.00330844 + 0.00329753 0.00327267 0.00315135 0.0031098 0.00308979 0.00308362 + 0.00305496 0.00304868 0.00304044 0.00303659 0.00302582 0.00301237 + 0.00298851 0.00291267 0.00290264 0.00289242 0.00287722 0.00286563 + 0.0028257 0.00282502 0.00275258 0.00274531 0.0027204 0.00268617 + 0.00261917 0.00260795 0.00256594 0.00254094 0.00252856 0.00250768 + 0.00249793 0.00249551 0.00248255 0.00247911 0.00246619 0.00241695 + 0.00240165 0.00236032 0.00235902 0.00234437 0.00234337 0.0023379 + 0.00233535 0.00230773 0.00230558 0.00229113 0.00228888 0.0022631 + 0.00225214 0.00224186 0.00222553 0.00219966 0.00219677 0.00217865 + 0.00217775 0.00215921 0.0021541 0.00214997 0.00212954 0.00211928 + 0.0021005 0.00205066 0.0020487 0.00203887 0.00203537 0.00203026 + 0.00201357 0.00199936 0.00199386 0.00197951 0.00197287 0.00195502 + 0.00194848 0.00192128 0.00189951 0.00187285 0.0018519 0.0018299 + 0.00179158 0.00177908 0.00176328 0.00176319 0.00175034 0.00173788 + 0.00172983 0.00172819 0.00168272 0.0016768 0.00167543 0.00167397 + 0.0016395 0.00163637 0.00163319 0.00162886 0.00162824 0.00162028]] image_detections_num: [300.] -Inference Result Visualization -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Inference Result Visualization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Define utility functions to visualize the inference results @@ -603,7 +612,7 @@ Zoo `__: .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/120-tensorflow-object-detection-to-openvino/data/coco_91cl.txt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/120-tensorflow-object-detection-to-openvino/data/coco_91cl.txt') @@ -642,14 +651,14 @@ original test image: .. image:: 120-tensorflow-object-detection-to-openvino-with-output_files/120-tensorflow-object-detection-to-openvino-with-output_38_0.png -Next Steps -############################################################################################################################### +Next Steps +---------------------------------------------------- This section contains suggestions on how to additionally improve the performance of your application using OpenVINO. -Async inference pipeline -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Async inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The key advantage of the Async API is that when a device is busy with inference, the application can perform other tasks in parallel (for @@ -658,8 +667,8 @@ wait for the current inference to complete first. To understand how to perform async inference using openvino, refer to the `Async API tutorial <115-async-api-with-output.html>`__. -Integration preprocessing to model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Integration preprocessing to model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Preprocessing API enables making preprocessing a part of the model reducing application code and dependency on additional image processing @@ -672,4 +681,4 @@ utilization. For more information, refer to the `Optimize Preprocessing tutorial <118-optimize-preprocessing-with-output.html>`__ and to the overview of `Preprocessing -API `__. +API `__. diff --git a/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/120-tensorflow-object-detection-to-openvino-with-output_38_0.png b/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/120-tensorflow-object-detection-to-openvino-with-output_38_0.png index 58eab9f05da9ae..33a6b939145c42 100644 --- a/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/120-tensorflow-object-detection-to-openvino-with-output_38_0.png +++ b/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/120-tensorflow-object-detection-to-openvino-with-output_38_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31311ed17093e4368b19e35fdb57bcc230816766a3908e831be520abe18a263e -size 391413 +oid sha256:df9e84273cf7c5c19bf1c4ea8d6b61372f59b04228a95bacd2fac1b97dde6d4a +size 392067 diff --git a/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/index.html b/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/index.html index e98e97f854276e..1faa6594c627b4 100644 --- a/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/index.html +++ b/docs/notebooks/120-tensorflow-object-detection-to-openvino-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/120-tensorflow-object-detection-to-openvino-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/120-tensorflow-object-detection-to-openvino-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/120-tensorflow-object-detection-to-openvino-with-output_files/


../
-120-tensorflow-object-detection-to-openvino-wit..> 16-Aug-2023 01:31              395346
-120-tensorflow-object-detection-to-openvino-wit..> 16-Aug-2023 01:31              391330
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/120-tensorflow-object-detection-to-openvino-with-output_files/


../
+120-tensorflow-object-detection-to-openvino-wit..> 31-Oct-2023 00:35              395346
+120-tensorflow-object-detection-to-openvino-wit..> 31-Oct-2023 00:35              392067
 

diff --git a/docs/notebooks/121-convert-to-openvino-with-output.rst b/docs/notebooks/121-convert-to-openvino-with-output.rst index 4fc417fb304551..e0f554470d26a5 100644 --- a/docs/notebooks/121-convert-to-openvino-with-output.rst +++ b/docs/notebooks/121-convert-to-openvino-with-output.rst @@ -4,47 +4,46 @@ OpenVINO™ model conversion API This notebook shows how to convert a model from original framework format to OpenVINO Intermediate Representation (IR). Contents: -- `OpenVINO IR format <#openvino-ir-format>`__ -- `IR preparation with Python conversion API and Model Optimizer command-line tool <#ir-preparation-with-python-conversion-api-and-model-optimizer-command-line-tool>`__ -- `Fetching example models <#fetching-example-models>`__ -- `Basic conversion <#basic-conversion>`__ -- `Model conversion parameters <#model-conversion-parameters>`__ +- `OpenVINO IR format <#openvino-ir-format>`__ +- `IR preparation with Python conversion API and Model Optimizer + command-line + tool <#ir-preparation-with-python-conversion-api-and-model-optimizer-command-line-tool>`__ +- `Fetching example models <#fetching-example-models>`__ +- `Basic conversion <#basic-conversion>`__ +- `Model conversion parameters <#model-conversion-parameters>`__ + + - `Setting Input Shapes <#setting-input-shapes>`__ + - `Cutting Off Parts of a Model <#cutting-off-parts-of-a-model>`__ + - `Embedding Preprocessing + Computation <#embedding-preprocessing-computation>`__ + + - `Specifying Layout <#specifying-layout>`__ + - `Changing Model Layout <#changing-model-layout>`__ + - `Specifying Mean and Scale + Values <#specifying-mean-and-scale-values>`__ + - `Reversing Input Channels <#reversing-input-channels>`__ + + - `Compressing a Model to FP16 <#compressing-a-model-to-fp>`__ + +- `Convert Models Represented as Python + Objects <#convert-models-represented-as-python-objects>`__ - - `Setting Input Shapes <#setting-input-shapes>`__ - - `Cutting Off Parts of a Model <#cutting-off-parts-of-a-model>`__ - - `Embedding Preprocessing Computation <#embedding-preprocessing-computation>`__ - - - `Specifying Layout <#specifying-layout>`__ - - `Changing Model Layout <#changing-model-layout>`__ - - `Specifying Mean and Scale Values <#specifying-mean-and-scale-values>`__ - - `Reversing Input Channels <#reversing-input-channels>`__ - - - `Compressing a Model to FP16 <#compressing-a-model-to-fp16>`__ - -- `Convert Models Represented as Python Objects <#convert-models-represented-as-python-objects>`__ - -.. code:: +.. code:: ipython3 - # Required imports. Please execute this cell first. - ! pip install -q --find-links https://download.pytorch.org/whl/torch_stable.html \ - "openvino-dev>=2023.0.1" \ - "requests" \ - "tqdm" \ - "transformers[onnx]>=4.21.1" \ - "torch==1.13.1; sys_platform == 'darwin'" \ - "torch==1.13.1+cpu; sys_platform == 'linux' or platform_system == 'Windows'" \ - "torchvision==0.14.1; sys_platform == 'darwin'" \ - "torchvision==0.14.1+cpu; sys_platform == 'linux' or platform_system == 'Windows'" + # Required imports. Please execute this cell first. + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu \ + "openvino-dev>=2023.1.0" "requests" "tqdm" "transformers[onnx]>=4.21.1" "torch" "torchvision" .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible. - + tensorflow 2.13.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + OpenVINO IR format -############################################################################################################################### +------------------ OpenVINO `Intermediate Representation (IR) `__ is the @@ -57,7 +56,7 @@ an ``.xml`` file, containing information about network topology, and a ``.bin`` file, containing the weights and biases binary data. IR preparation with Python conversion API and Model Optimizer command-line tool -############################################################################################################################### +------------------------------------------------------------------------------- There are two ways to convert a model from the original framework format to OpenVINO IR: Python conversion API and Model Optimizer command-line @@ -68,11 +67,11 @@ refer to `Model Preparation `__ documentation. -.. code:: +.. code:: ipython3 - # Model Optimizer CLI tool parameters description - - ! mo --help + # Model Optimizer CLI tool parameters description + + ! mo --help .. parsed-literal:: @@ -377,11 +376,11 @@ documentation. .. code:: ipython3 - # Python conversion API parameters description - from openvino.tools import mo + # Python conversion API parameters description + from openvino.tools import mo + - - mo.convert_model(help=True) + mo.convert_model(help=True) .. parsed-literal:: @@ -677,7 +676,7 @@ documentation. Fetching example models -############################################################################################################################### +----------------------- This notebook uses two models for conversion examples: @@ -707,26 +706,38 @@ NLP model from Hugging Face and export it in ONNX format: ONNX_NLP_MODEL_PATH = MODEL_DIRECTORY_PATH / "distilbert.onnx" # download model - hf_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") + hf_model = AutoModelForSequenceClassification.from_pretrained( + "distilbert-base-uncased-finetuned-sst-2-english" + ) # initialize tokenizer - tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") + tokenizer = AutoTokenizer.from_pretrained( + "distilbert-base-uncased-finetuned-sst-2-english" + ) # get model onnx config function for output feature format sequence-classification - model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(hf_model, feature="sequence-classification") + model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise( + hf_model, feature="sequence-classification" + ) # fill onnx config based on pytorch model config onnx_config = model_onnx_config(hf_model.config) # export to onnx format - export(preprocessor=tokenizer, model=hf_model, config=onnx_config, opset=onnx_config.default_onnx_opset, output=ONNX_NLP_MODEL_PATH) + export( + preprocessor=tokenizer, + model=hf_model, + config=onnx_config, + opset=onnx_config.default_onnx_opset, + output=ONNX_NLP_MODEL_PATH, + ) .. parsed-literal:: - 2023-09-08 23:06:13.646146: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:06:13.679884: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-30 23:03:34.054449: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 23:03:34.088016: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:06:14.259953: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:223: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + 2023-10-30 23:03:34.718197: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:223: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -952,9 +963,7 @@ Convert PyTorch model to ONNX format: with warnings.catch_warnings(): warnings.filterwarnings("ignore") torch.onnx.export( - model=pytorch_model, - args=torch.randn(1, 3, 780, 520), - f=ONNX_CV_MODEL_PATH + model=pytorch_model, args=torch.randn(1, 3, 780, 520), f=ONNX_CV_MODEL_PATH ) print(f"ONNX model exported to {ONNX_CV_MODEL_PATH}") @@ -982,13 +991,17 @@ To convert a model to OpenVINO IR, use the following command: To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/distilbert.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/distilbert.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/distilbert.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/distilbert.bin .. code:: ipython3 @@ -1002,7 +1015,7 @@ To convert a model to OpenVINO IR, use the following command: # then model can be serialized to *.xml & *.bin files from openvino.runtime import serialize - serialize(ov_model, xml_path=MODEL_DIRECTORY_PATH / 'distilbert.xml') + serialize(ov_model, xml_path=MODEL_DIRECTORY_PATH / "distilbert.xml") .. parsed-literal:: @@ -1040,7 +1053,7 @@ specified) is not successful, it may be required to use the parameters mentioned above to override input shapes and cut the model. Setting Input Shapes -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~~~ Model conversion is supported for models with dynamic input shapes that contain undefined dimensions. However, if the shape of data is not going @@ -1070,24 +1083,36 @@ guide `__. Specifying Layout -------------------------------------------------------------------------------------------------------------------------------- +^^^^^^^^^^^^^^^^^ Layout defines the meaning of dimensions in a shape and can be specified for both inputs and outputs. Some preprocessing requires to set input @@ -1293,13 +1355,17 @@ Resnet50 model that was exported to the ONNX format: To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin .. code:: ipython3 @@ -1311,7 +1377,7 @@ Resnet50 model that was exported to the ONNX format: ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw") Changing Model Layout -------------------------------------------------------------------------------------------------------------------------------- +^^^^^^^^^^^^^^^^^^^^^ Changing the model layout may be necessary if it differs from the one presented by input data. Use either ``layout`` or ``source_layout`` with @@ -1333,24 +1399,36 @@ presented by input data. Use either ``layout`` or ``source_layout`` with To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + + +.. parsed-literal:: + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin .. code:: ipython3 @@ -1362,10 +1440,12 @@ presented by input data. Use either ``layout`` or ``source_layout`` with ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, layout="nchw->nhwc") # alternatively use source_layout and target_layout parameters - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc") + ov_model = mo.convert_model( + ONNX_CV_MODEL_PATH, source_layout="nchw", target_layout="nhwc" + ) Specifying Mean and Scale Values -------------------------------------------------------------------------------------------------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Model conversion API has the following parameters to specify the values: ``mean_values``, ``scale_values``, ``scale``. Using these parameters, @@ -1388,24 +1468,36 @@ that the preprocessing takes negligible time for inference. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + + +.. parsed-literal:: + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin .. code:: ipython3 @@ -1414,12 +1506,14 @@ that the preprocessing takes negligible time for inference. from openvino.tools import mo - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, mean_values=[123,117,104], scale=255) + ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, mean_values=[123, 117, 104], scale=255) - ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, mean_values=[123,117,104], scale_values=[255,255,255]) + ov_model = mo.convert_model( + ONNX_CV_MODEL_PATH, mean_values=[123, 117, 104], scale_values=[255, 255, 255] + ) Reversing Input Channels -------------------------------------------------------------------------------------------------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^ Sometimes, input images for your application can be of the ``RGB`` (or ``BGR``) format, and the model is trained on images of the ``BGR`` (or @@ -1440,13 +1534,17 @@ the color channels before inference. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin .. code:: ipython3 @@ -1458,7 +1556,7 @@ the color channels before inference. ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, reverse_input_channels=True) Compressing a Model to FP16 -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~ Optionally all relevant floating-point weights can be compressed to FP16 data type during the model conversion, creating a compressed FP16 model. @@ -1479,13 +1577,17 @@ models, this decrease is negligible. To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/121-convert-to-openvino/model/resnet.bin .. code:: ipython3 @@ -1497,7 +1599,7 @@ models, this decrease is negligible. ov_model = mo.convert_model(ONNX_CV_MODEL_PATH, compress_to_fp16=True) Convert Models Represented as Python Objects -############################################################################################################################### +-------------------------------------------- Python conversion API can pass Python model objects, such as a Pytorch model or TensorFlow Keras model directly, without saving them into files @@ -1516,24 +1618,6 @@ training scripts). .. parsed-literal:: WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - - -.. parsed-literal:: - - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' ``convert_model()`` accepts all parameters available in the MO @@ -1546,8 +1630,15 @@ string analogs, similar to the command-line tool. from openvino.tools import mo - ov_model = mo.convert_model(pytorch_model, input_shape=[1,3,100,100], mean_values=[127, 127, 127], layout="nchw") + ov_model = mo.convert_model( + pytorch_model, + input_shape=[1, 3, 100, 100], + mean_values=[127, 127, 127], + layout="nchw", + ) ov_model = mo.convert_model(pytorch_model, source_layout="nchw", target_layout="nhwc") - ov_model = mo.convert_model(pytorch_model, compress_to_fp16=True, reverse_input_channels=True) + ov_model = mo.convert_model( + pytorch_model, compress_to_fp16=True, reverse_input_channels=True + ) diff --git a/docs/notebooks/122-speech-recognition-quantization-wav2vec2-with-output.rst b/docs/notebooks/122-speech-recognition-quantization-wav2vec2-with-output.rst index cd6ebf0ab3dedb..50076690c479b9 100644 --- a/docs/notebooks/122-speech-recognition-quantization-wav2vec2-with-output.rst +++ b/docs/notebooks/122-speech-recognition-quantization-wav2vec2-with-output.rst @@ -42,218 +42,32 @@ and has the following differences: .. -.. note:: + **NOTE**: Currently, 8-bit quantization with accuracy control in NNCF + is available only for models in OpenVINO representation. - Currently, 8-bit quantization with accuracy control in NNCF is available only for models in OpenVINO representation. - -The steps for the quantization with accuracy control are described below. +The steps for the quantization with accuracy control are described +below. **Table of contents:** -- `Imports <#imports>`__ -- `Prepare the Model <#prepare-the-model>`__ -- `Prepare LibriSpeech Dataset <#prepare-librispeech-dataset>`__ -- `Prepare calibration and validation datasets <#prepare-calibration-and-validation-datasets>`__ -- `Prepare validation function <#prepare-validation-function>`__ -- `Run quantization with accuracy control <#run-quantization-with-accuracy-control>`__ -- `Model Usage Example <#model-usage-example>`__ -- `Compare Accuracy of the Original and Quantized Models <#compare-accuracy-of-the-original-and-quantized-models>`__ - -.. code:: ipython3 - # !pip install -q "openvino-dev>=2023.1.0" "nncf>=2.6.0" - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install git+https://github.com/openvinotoolkit/nncf.git@develop - !pip install -q soundfile librosa transformers torch datasets torchmetrics +- `Imports <#>`__ +- `Prepare the Model <#>`__ +- `Prepare LibriSpeech Dataset <#>`__ +- `Prepare calibration and validation datasets <#>`__ +- `Prepare validation function <#>`__ +- `Run quantization with accuracy control <#>`__ +- `Model Usage Example <#>`__ +- `Compare Performance of the Original and Quantized Models <#>`__ +.. code:: ipython3 -.. parsed-literal:: + %pip install -q "openvino>=2023.1.0" + %pip install -q "nncf>=2.6.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu soundfile librosa transformers torch datasets torchmetrics - Collecting git+https://github.com/openvinotoolkit/nncf.git@develop - Cloning https://github.com/openvinotoolkit/nncf.git (to revision develop) to /tmp/pip-req-build-o2lphim0 - Running command git clone --filter=blob:none --quiet https://github.com/openvinotoolkit/nncf.git /tmp/pip-req-build-o2lphim0 - Filtering content: 1% (2/142) - Filtering content: 2% (3/142) - Filtering content: 3% (5/142) - Filtering content: 4% (6/142) - Filtering content: 5% (8/142), 10.29 MiB | 16.71 MiB/s - Filtering content: 6% (9/142), 10.29 MiB | 16.71 MiB/s - Filtering content: 7% (10/142), 10.29 MiB | 16.71 MiB/s - Filtering content: 7% (10/142), 12.61 MiB | 8.69 MiB/s - Filtering content: 8% (12/142), 12.61 MiB | 8.69 MiB/s - Filtering content: 9% (13/142), 12.61 MiB | 8.69 MiB/s - Filtering content: 10% (15/142), 14.35 MiB | 7.17 MiB/s - Filtering content: 11% (16/142), 14.35 MiB | 7.17 MiB/s - Filtering content: 12% (18/142), 14.35 MiB | 7.17 MiB/s - Filtering content: 13% (19/142), 17.07 MiB | 6.80 MiB/s - Filtering content: 14% (20/142), 17.07 MiB | 6.80 MiB/s - Filtering content: 15% (22/142), 17.07 MiB | 6.80 MiB/s - Filtering content: 16% (23/142), 17.07 MiB | 6.80 MiB/s - Filtering content: 17% (25/142), 19.78 MiB | 6.42 MiB/s - Filtering content: 18% (26/142), 19.78 MiB | 6.42 MiB/s - Filtering content: 19% (27/142), 19.78 MiB | 6.42 MiB/s - Filtering content: 20% (29/142), 19.78 MiB | 6.42 MiB/s - Filtering content: 21% (30/142), 19.78 MiB | 6.42 MiB/s - Filtering content: 22% (32/142), 22.80 MiB | 6.19 MiB/s - Filtering content: 23% (33/142), 22.80 MiB | 6.19 MiB/s - Filtering content: 24% (35/142), 22.80 MiB | 6.19 MiB/s - Filtering content: 25% (36/142), 22.80 MiB | 6.19 MiB/s - Filtering content: 26% (37/142), 22.80 MiB | 6.19 MiB/s - Filtering content: 26% (37/142), 25.18 MiB | 5.93 MiB/s - Filtering content: 27% (39/142), 25.18 MiB | 5.93 MiB/s - Filtering content: 28% (40/142), 25.18 MiB | 5.93 MiB/s - Filtering content: 29% (42/142), 25.18 MiB | 5.93 MiB/s - Filtering content: 30% (43/142), 25.18 MiB | 5.93 MiB/s - Filtering content: 31% (45/142), 25.18 MiB | 5.93 MiB/s - Filtering content: 32% (46/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 33% (47/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 34% (49/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 35% (50/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 36% (52/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 37% (53/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 38% (54/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 39% (56/142), 27.34 MiB | 5.71 MiB/s - Filtering content: 40% (57/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 41% (59/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 42% (60/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 43% (62/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 44% (63/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 45% (64/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 46% (66/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 47% (67/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 48% (69/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 49% (70/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 50% (71/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 51% (73/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 52% (74/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 53% (76/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 54% (77/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 55% (79/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 56% (80/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 57% (81/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 58% (83/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 59% (84/142), 29.35 MiB | 5.54 MiB/s - Filtering content: 60% (86/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 61% (87/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 62% (89/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 63% (90/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 64% (91/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 65% (93/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 66% (94/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 67% (96/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 68% (97/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 69% (98/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 70% (100/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 71% (101/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 72% (103/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 73% (104/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 74% (106/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 75% (107/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 76% (108/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 77% (110/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 78% (111/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 79% (113/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 80% (114/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 81% (116/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 82% (117/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 83% (118/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 84% (120/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 85% (121/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 86% (123/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 87% (124/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 88% (125/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 89% (127/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 90% (128/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 91% (130/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 92% (131/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 93% (133/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 94% (134/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 95% (135/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 96% (137/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 97% (138/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 98% (140/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 99% (141/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 100% (142/142), 31.63 MiB | 4.19 MiB/s - Filtering content: 100% (142/142), 32.00 MiB | 3.57 MiB/s, done. - Resolved https://github.com/openvinotoolkit/nncf.git to commit 90a1e860c93b553fa9684113e02d41d622235c55 - Preparing metadata (setup.py) ... - done - Collecting pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd (from nncf==2.5.0.dev0+90a1e860) - Using cached pymoo-0.6.0.1-py3-none-any.whl - Requirement already satisfied: jsonschema>=3.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (4.19.0) - Requirement already satisfied: jstyleson>=0.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (0.0.2) - Requirement already satisfied: natsort>=7.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (8.4.0) - Requirement already satisfied: networkx<=2.8.2,>=2.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2.8.2) - Requirement already satisfied: ninja<1.11,>=1.10.0.post2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.10.2.4) - Requirement already satisfied: numpy<1.25,>=1.19.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2023.1.1) - Requirement already satisfied: packaging>=20.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (23.1) - Requirement already satisfied: pandas<2.1,>=1.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2.0.3) - Requirement already satisfied: psutil in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (5.9.5) - Requirement already satisfied: pydot>=1.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.4.2) - Requirement already satisfied: pyparsing<3.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2.4.7) - Requirement already satisfied: scikit-learn>=0.24.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.3.0) - Requirement already satisfied: scipy<1.11,>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.10.1) - Requirement already satisfied: texttable>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.6.7) - Requirement already satisfied: tqdm>=4.54.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (4.66.1) - Requirement already satisfied: attrs>=22.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (23.1.0) - Requirement already satisfied: importlib-resources>=1.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (6.0.1) - Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (2023.7.1) - Requirement already satisfied: pkgutil-resolve-name>=1.3.10 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (1.3.10) - Requirement already satisfied: referencing>=0.28.4 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (0.30.2) - Requirement already satisfied: rpds-py>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (0.10.2) - Requirement already satisfied: python-dateutil>=2.8.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (2.8.2) - Requirement already satisfied: pytz>=2020.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (2023.3.post1) - Requirement already satisfied: tzdata>=2022.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (2023.3) - Requirement already satisfied: joblib>=1.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from scikit-learn>=0.24.0->nncf==2.5.0.dev0+90a1e860) (1.3.2) - Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from scikit-learn>=0.24.0->nncf==2.5.0.dev0+90a1e860) (3.2.0) - Requirement already satisfied: matplotlib>=3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (3.5.2) - Requirement already satisfied: autograd>=1.4 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (1.6.2) - Collecting cma==3.2.2 (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) - Using cached cma-3.2.2-py2.py3-none-any.whl (249 kB) - Collecting alive-progress (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) - Obtaining dependency information for alive-progress from https://files.pythonhosted.org/packages/e3/02/5d7f9158d69b36fbe9eb0df8fb435008ec881e41bc7d839239004207d807/alive_progress-3.1.4-py3-none-any.whl.metadata - Using cached alive_progress-3.1.4-py3-none-any.whl.metadata (68 kB) - Requirement already satisfied: dill in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (0.3.7) - Collecting Deprecated (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) - Obtaining dependency information for Deprecated from https://files.pythonhosted.org/packages/20/8d/778b7d51b981a96554f29136cd59ca7880bf58094338085bcf2a979a0e6a/Deprecated-1.2.14-py2.py3-none-any.whl.metadata - Using cached Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB) - Requirement already satisfied: future>=0.15.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from autograd>=1.4->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (0.18.3) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources>=1.4.0->jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (3.16.2) - Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (0.11.0) - Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (4.42.1) - Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (1.4.5) - Requirement already satisfied: pillow>=6.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (10.0.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (1.16.0) - Collecting about-time==4.2.1 (from alive-progress->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) - Using cached about_time-4.2.1-py3-none-any.whl (13 kB) - Collecting grapheme==0.6.0 (from alive-progress->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) - Using cached grapheme-0.6.0-py3-none-any.whl - Requirement already satisfied: wrapt<2,>=1.10 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from Deprecated->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (1.14.1) - Using cached alive_progress-3.1.4-py3-none-any.whl (75 kB) - Using cached Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB) - Building wheels for collected packages: nncf - Building wheel for nncf (setup.py) ... - \ | / done - Created wheel for nncf: filename=nncf-2.5.0.dev0+90a1e860-py3-none-any.whl size=1139358 sha256=35a2f1daf4360a3b65a6a2996cca9f15d165f6c25994f64d8ccf10960e7a55bc - Stored in directory: /tmp/pip-ephem-wheel-cache-mdg9hjsd/wheels/6d/17/88/a292ae87701bc65e2e1c63261d22d7fb0e15aa8448ee693d5f - Successfully built nncf - Installing collected packages: grapheme, Deprecated, cma, about-time, alive-progress, pymoo, nncf - Attempting uninstall: cma - Found existing installation: cma 2.7.0 - Uninstalling cma-2.7.0: - Successfully uninstalled cma-2.7.0 - Attempting uninstall: pymoo - Found existing installation: pymoo 0.5.0 - Uninstalling pymoo-0.5.0: - Successfully uninstalled pymoo-0.5.0 - Attempting uninstall: nncf - Found existing installation: nncf 2.5.0 - Uninstalling nncf-2.5.0: - Successfully uninstalled nncf-2.5.0 - Successfully installed Deprecated-1.2.14 about-time-4.2.1 alive-progress-3.1.4 cma-3.2.2 grapheme-0.6.0 nncf-2.5.0.dev0+90a1e860 pymoo-0.6.0.1 - - -Imports -############################################################################################################################### +Imports +---------------------------------- .. code:: ipython3 @@ -265,14 +79,14 @@ Imports .. parsed-literal:: - 2023-09-08 23:07:39.211214: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:07:39.246066: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-10 09:32:06.465943: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-10 09:32:06.505459: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:07:39.789011: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-10 09:32:07.113533: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -Prepare the Model -############################################################################################################################### +Prepare the Model +---------------------------------- For instantiating PyTorch model class, we should use ``Wav2Vec2ForCTC.from_pretrained`` method with providing @@ -319,25 +133,14 @@ Convert it to the OpenVINO Intermediate Representation (OpenVINO IR) .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - WARNING:nncf:NNCF provides best results with torch==2.0.1, while current torch version is 1.13.1+cpu. If you encounter issues, consider switching to torch==2.0.1 - - -.. parsed-literal:: - - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:634: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:634: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): -Prepare LibriSpeech Dataset -############################################################################################################################### +Prepare LibriSpeech Dataset +--------------------------------------------------- For demonstration purposes, we will use short dummy version of LibriSpeech dataset - ``patrickvonplaten/librispeech_asr_dummy`` to @@ -365,8 +168,15 @@ dataset. # apply preprocessing function to dataset and remove audio column, to save memory as we do not need it anymore dataset = dataset.map(map_to_input, batched=False, remove_columns=["audio"]) -Prepare calibration dataset -############################################################################################################################### + +.. parsed-literal:: + + Found cached dataset librispeech_asr_dummy (/home/ea/.cache/huggingface/datasets/patrickvonplaten___librispeech_asr_dummy/clean/2.1.0/f2c70a4d03ab4410954901bde48c54b85ca1b7f9bf7d616e7e2a72b5ee6ddbfc) + Loading cached processed dataset at /home/ea/.cache/huggingface/datasets/patrickvonplaten___librispeech_asr_dummy/clean/2.1.0/f2c70a4d03ab4410954901bde48c54b85ca1b7f9bf7d616e7e2a72b5ee6ddbfc/cache-dcb48242e67b91b1.arrow + + +Prepare calibration dataset +---------------------------------- .. code:: ipython3 @@ -384,8 +194,14 @@ Prepare calibration dataset calibration_dataset = nncf.Dataset(dataset, transform_fn) -Prepare validation function -############################################################################################################################### + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +Prepare validation function +---------------------------------- Define the validation function. @@ -400,7 +216,7 @@ Define the validation function. Calculate and returns a metric for the model. """ wer = WordErrorRate() - for sample in tqdm(dataset): + for sample in dataset: # run infer function on sample output = model.output(0) logits = model(np.array(sample['input_values']))[output] @@ -414,25 +230,30 @@ Define the validation function. return 1 - result -Run quantization with accuracy control -############################################################################################################################### +Run quantization with accuracy control +--------------------------------------------------- You should provide the calibration dataset and the validation dataset. It can be the same -dataset. - parameter ``max_drop`` defines the accuracy drop threshold. -The quantization process stops when the degradation of accuracy metric -on the validation dataset is less than the ``max_drop``. The default -value is 0.01. NNCF will stop the quantization and report an error if -the ``max_drop`` value can’t be reached. - ``drop_type`` defines how the -accuracy drop will be calculated: ABSOLUTE (used by default) or -RELATIVE. - ``ranking_subset_size`` - size of a subset that is used to -rank layers by their contribution to the accuracy drop. Default value is -300, and the more samples it has the better ranking, potentially. Here -we use the value 25 to speed up the execution. - -.. code:: - - Execution can take tens of minutes and requires up to 10 GB of free memory +dataset. + +- parameter ``max_drop`` defines the accuracy drop threshold. + The quantization process stops when the degradation of accuracy metric + on the validation dataset is less than the ``max_drop``. The default + value is 0.01. NNCF will stop the quantization and report an error if + the ``max_drop`` value can’t be reached. + +- ``drop_type`` defines how the + accuracy drop will be calculated: ABSOLUTE (used by default) or + RELATIVE. + +- ``ranking_subset_size`` - size of a subset that is used to + rank layers by their contribution to the accuracy drop. Default value is + 300, and the more samples it has the better ranking, potentially. Here + we use the value 25 to speed up the execution. + + **NOTE**: Execution can take tens of minutes and requires up to 10 GB + of free memory .. code:: ipython3 @@ -455,8 +276,8 @@ we use the value 25 to speed up the execution. .. parsed-literal:: - Statistics collection: 24%|██▍ | 73/300 [00:13<00:42, 5.37it/s] - Applying Smooth Quant: 100%|██████████| 50/50 [00:00<00:00, 58.74it/s] + Statistics collection: 24%|███████████████████████████████████▎ | 73/300 [00:12<00:37, 5.98it/s] + Applying Smooth Quant: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:01<00:00, 41.01it/s] .. parsed-literal:: @@ -466,8 +287,8 @@ we use the value 25 to speed up the execution. .. parsed-literal:: - Statistics collection: 24%|██▍ | 73/300 [00:23<01:12, 3.12it/s] - Applying Fast Bias correction: 100%|██████████| 74/74 [00:25<00:00, 2.91it/s] + Statistics collection: 24%|███████████████████████████████████▎ | 73/300 [00:22<01:08, 3.31it/s] + Applying Fast Bias correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 74/74 [00:23<00:00, 3.09it/s] .. parsed-literal:: @@ -477,1397 +298,41 @@ we use the value 25 to speed up the execution. .. parsed-literal:: INFO:nncf:Elapsed Time: 00:00:00 - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:62: FutureWarning: Importing `WordErrorRate` from `torchmetrics` was deprecated and will be removed in 2.0. Import `WordErrorRate` from `torchmetrics.text` instead. - _future_warning( - - - -.. parsed-literal:: - - 0it [00:00, ?it/s] - - - -.. parsed-literal:: - - 0it [00:00, ?it/s] - - -.. parsed-literal:: - - INFO:nncf:Elapsed Time: 00:00:13 + INFO:nncf:Elapsed Time: 00:00:11 INFO:nncf:Metric of initial model: 0.9469565153121948 INFO:nncf:Collecting values for each data item using the initial model - - - -.. parsed-literal:: - - 0%| | 0/1 [00:00 - + Your browser does not support the audio element. @@ -1912,12 +377,12 @@ Next, make a prediction. .. parsed-literal:: - ['A MAN SAID TO THE UNIVERSE SIR I EXIST'] + ['I E O WE WORD I O O FAGGI FARE E BO'] -Compare Accuracy of the Original and Quantized Models -############################################################################################################################### +Compare Accuracy of the Original and Quantized Models +-------------------------------------------------------------------- - Define dataloader for test dataset. - Define functions to get inference for PyTorch and OpenVINO models. @@ -1980,5 +445,5 @@ Now, compute WER for the original PyTorch model and quantized model. .. parsed-literal:: [PyTorch] Word Error Rate: 0.0530 - [Quantized OpenVino] Word Error Rate: 0.0609 + [Quantized OpenVino] Word Error Rate: 0.0600 diff --git a/docs/notebooks/122-yolov8-quantization-with-accuracy-control-with-output.rst b/docs/notebooks/122-yolov8-quantization-with-accuracy-control-with-output.rst index 33fc5c4c92af98..5def1e4d4b2bdd 100644 --- a/docs/notebooks/122-yolov8-quantization-with-accuracy-control-with-output.rst +++ b/docs/notebooks/122-yolov8-quantization-with-accuracy-control-with-output.rst @@ -30,9 +30,9 @@ and has the following differences: the Basic 8-bit quantization flow because some of the operations are kept in the original precision. -..note:: +.. - Currently, 8-bit quantization with accuracy control in NNCF + **NOTE**: Currently, 8-bit quantization with accuracy control in NNCF is available only for models in OpenVINO representation. The steps for the quantization with accuracy control are described @@ -40,191 +40,30 @@ below. The tutorial consists of the following steps: -- `Prerequisites <#prerequisites>`__ -- `Get Pytorch model and OpenVINO IR model <#get-pytorch-model-and-openvino-ir-model>`__ -- `Define validator and data loader <#define-validator-and-data-loader>`__ -- `Prepare calibration and validation datasets <#prepare-calibration-and-validation-datasets>`__ -- `Prepare validation function <#prepare-validation-function>`__ -- `Run quantization with accuracy control <#run-quantization-with-accuracy-control>`__ -- `Compare Accuracy and Performance of the Original and Quantized Models <#compare-accuracy-and-performance-of-the-original-and-quantized-models>`__ +- `Prerequisites <#>`__ +- `Get Pytorch model and OpenVINO IR model <#>`__ +- `Define validator and data loader <#>`__ +- `Prepare calibration and validation datasets <#>`__ +- `Prepare validation function <#>`__ +- `Run quantization with accuracy control <#>`__ +- `Compare Performance of the Original and Quantized Models <#>`__ -Prerequisites -############################################################################################################################### +Prerequisites +---------------------------------- Install necessary packages. .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install git+https://github.com/openvinotoolkit/nncf.git@develop - !pip install -q "ultralytics==8.0.43" + %pip install -q "openvino>=2023.1.0" + %pip install "nncf>=2.6.0" + %pip install -q "ultralytics==8.0.43" - -.. parsed-literal:: - - Collecting git+https://github.com/openvinotoolkit/nncf.git@develop - Cloning https://github.com/openvinotoolkit/nncf.git (to revision develop) to /tmp/pip-req-build-q26q169c - Running command git clone --filter=blob:none --quiet https://github.com/openvinotoolkit/nncf.git /tmp/pip-req-build-q26q169c - Filtering content: 1% (2/142) - Filtering content: 2% (3/142) - Filtering content: 3% (5/142) - Filtering content: 4% (6/142) - Filtering content: 5% (8/142) - Filtering content: 6% (9/142), 11.23 MiB | 16.49 MiB/s - Filtering content: 7% (10/142), 11.23 MiB | 16.49 MiB/s - Filtering content: 7% (10/142), 12.61 MiB | 10.32 MiB/s - Filtering content: 8% (12/142), 12.61 MiB | 10.32 MiB/s - Filtering content: 9% (13/142), 13.81 MiB | 7.30 MiB/s - Filtering content: 10% (15/142), 13.81 MiB | 7.30 MiB/s - Filtering content: 11% (16/142), 13.81 MiB | 7.30 MiB/s - Filtering content: 11% (17/142), 13.81 MiB | 7.30 MiB/s - Filtering content: 12% (18/142), 13.81 MiB | 7.30 MiB/s - Filtering content: 13% (19/142), 13.81 MiB | 7.30 MiB/s - Filtering content: 14% (20/142), 13.81 MiB | 7.30 MiB/s - Filtering content: 15% (22/142), 18.00 MiB | 7.01 MiB/s - Filtering content: 16% (23/142), 18.00 MiB | 7.01 MiB/s - Filtering content: 17% (25/142), 18.00 MiB | 7.01 MiB/s - Filtering content: 17% (25/142), 20.21 MiB | 6.50 MiB/s - Filtering content: 18% (26/142), 20.21 MiB | 6.50 MiB/s - Filtering content: 19% (27/142), 20.21 MiB | 6.50 MiB/s - Filtering content: 20% (29/142), 20.21 MiB | 6.50 MiB/s - Filtering content: 21% (30/142), 20.21 MiB | 6.50 MiB/s - Filtering content: 22% (32/142), 20.21 MiB | 6.50 MiB/s - Filtering content: 23% (33/142), 23.21 MiB | 6.41 MiB/s - Filtering content: 24% (35/142), 23.21 MiB | 6.41 MiB/s - Filtering content: 25% (36/142), 23.21 MiB | 6.41 MiB/s - Filtering content: 26% (37/142), 23.21 MiB | 6.41 MiB/s - Filtering content: 26% (38/142), 23.21 MiB | 6.41 MiB/s - Filtering content: 27% (39/142), 25.49 MiB | 6.14 MiB/s - Filtering content: 28% (40/142), 25.49 MiB | 6.14 MiB/s - Filtering content: 29% (42/142), 25.49 MiB | 6.14 MiB/s - Filtering content: 30% (43/142), 25.49 MiB | 6.14 MiB/s - Filtering content: 31% (45/142), 25.49 MiB | 6.14 MiB/s - Filtering content: 32% (46/142), 25.49 MiB | 6.14 MiB/s - Filtering content: 33% (47/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 34% (49/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 35% (50/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 36% (52/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 37% (53/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 38% (54/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 38% (55/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 39% (56/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 40% (57/142), 27.56 MiB | 5.89 MiB/s - Filtering content: 41% (59/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 42% (60/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 43% (62/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 44% (63/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 45% (64/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 46% (66/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 47% (67/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 48% (69/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 49% (70/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 50% (71/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 51% (73/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 52% (74/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 53% (76/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 54% (77/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 55% (79/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 56% (80/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 57% (81/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 58% (83/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 59% (84/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 60% (86/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 61% (87/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 62% (89/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 63% (90/142), 29.59 MiB | 5.66 MiB/s - Filtering content: 64% (91/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 65% (93/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 66% (94/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 67% (96/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 68% (97/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 69% (98/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 70% (100/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 71% (101/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 72% (103/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 73% (104/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 74% (106/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 75% (107/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 76% (108/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 77% (110/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 78% (111/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 79% (113/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 80% (114/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 81% (116/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 82% (117/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 83% (118/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 84% (120/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 85% (121/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 86% (123/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 87% (124/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 88% (125/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 89% (127/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 90% (128/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 91% (130/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 92% (131/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 93% (133/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 94% (134/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 95% (135/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 96% (137/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 97% (138/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 98% (140/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 99% (141/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 100% (142/142), 31.76 MiB | 4.16 MiB/s - Filtering content: 100% (142/142), 32.00 MiB | 3.58 MiB/s, done. - Resolved https://github.com/openvinotoolkit/nncf.git to commit 90a1e860c93b553fa9684113e02d41d622235c55 - Preparing metadata (setup.py) ... - done - Collecting pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd (from nncf==2.5.0.dev0+90a1e860) - Using cached pymoo-0.6.0.1-py3-none-any.whl - Requirement already satisfied: jsonschema>=3.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (4.19.0) - Requirement already satisfied: jstyleson>=0.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (0.0.2) - Requirement already satisfied: natsort>=7.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (8.4.0) - Requirement already satisfied: networkx<=2.8.2,>=2.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2.8.2) - Requirement already satisfied: ninja<1.11,>=1.10.0.post2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.10.2.4) - Requirement already satisfied: numpy<1.25,>=1.19.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2023.1.1) - Requirement already satisfied: packaging>=20.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (23.1) - Requirement already satisfied: pandas<2.1,>=1.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2.0.3) - Requirement already satisfied: psutil in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (5.9.5) - Requirement already satisfied: pydot>=1.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.4.2) - Requirement already satisfied: pyparsing<3.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (2.4.7) - Requirement already satisfied: scikit-learn>=0.24.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.3.0) - Requirement already satisfied: scipy<1.11,>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.10.1) - Requirement already satisfied: texttable>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (1.6.7) - Requirement already satisfied: tqdm>=4.54.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from nncf==2.5.0.dev0+90a1e860) (4.66.1) - Requirement already satisfied: attrs>=22.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (23.1.0) - Requirement already satisfied: importlib-resources>=1.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (6.0.1) - Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (2023.7.1) - Requirement already satisfied: pkgutil-resolve-name>=1.3.10 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (1.3.10) - Requirement already satisfied: referencing>=0.28.4 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (0.30.2) - Requirement already satisfied: rpds-py>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (0.10.2) - Requirement already satisfied: python-dateutil>=2.8.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (2.8.2) - Requirement already satisfied: pytz>=2020.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (2023.3.post1) - Requirement already satisfied: tzdata>=2022.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (2023.3) - Requirement already satisfied: joblib>=1.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from scikit-learn>=0.24.0->nncf==2.5.0.dev0+90a1e860) (1.3.2) - Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from scikit-learn>=0.24.0->nncf==2.5.0.dev0+90a1e860) (3.2.0) - Requirement already satisfied: matplotlib>=3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (3.5.2) - Requirement already satisfied: autograd>=1.4 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (1.6.2) - Requirement already satisfied: cma==3.2.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (3.2.2) - Requirement already satisfied: alive-progress in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (3.1.4) - Requirement already satisfied: dill in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (0.3.7) - Requirement already satisfied: Deprecated in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (1.2.14) - Requirement already satisfied: future>=0.15.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from autograd>=1.4->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (0.18.3) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources>=1.4.0->jsonschema>=3.2.0->nncf==2.5.0.dev0+90a1e860) (3.16.2) - Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (0.11.0) - Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (4.42.1) - Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (1.4.5) - Requirement already satisfied: pillow>=6.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib>=3->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (10.0.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas<2.1,>=1.1.5->nncf==2.5.0.dev0+90a1e860) (1.16.0) - Requirement already satisfied: about-time==4.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from alive-progress->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (4.2.1) - Requirement already satisfied: grapheme==0.6.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from alive-progress->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (0.6.0) - Requirement already satisfied: wrapt<2,>=1.10 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from Deprecated->pymoo@ git+https://github.com/anyoptimization/pymoo.git@695cb26923903f872c7256a9013609769f3cc2bd->nncf==2.5.0.dev0+90a1e860) (1.14.1) - - -Get Pytorch model and OpenVINO IR model -############################################################################################################################### +Get Pytorch model and OpenVINO IR Model +--------------------------------------------------- Generally, PyTorch models represent an instance of the -```torch.nn.Module`` `__ +`torch.nn.Module `__ class, initialized by a state dictionary with model weights. We will use the YOLOv8 nano model (also known as ``yolov8n``) pre-trained on a COCO dataset, which is available in this @@ -262,18 +101,6 @@ we do not need to do these steps manually. args = get_cfg(cfg=DEFAULT_CFG) args.data = "coco128-seg.yaml" - -.. parsed-literal:: - - Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt to /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/122-quantizing-model-with-accuracy-control/yolov8n-seg.pt... - - - -.. parsed-literal:: - - 0%| | 0.00/6.73M [00:00 float: validator.seen = 0 validator.jdict = [] @@ -414,32 +221,35 @@ Prepare validation function stats_metrics = stats["metrics/mAP50-95(B)"] else: stats_metrics = stats["metrics/mAP50-95(M)"] - print(f"Validate: dataset length = {counter}, metric value = {stats_metrics:.3f}") + if log: + print(f"Validate: dataset length = {counter}, metric value = {stats_metrics:.3f}") return stats_metrics - validation_fn = partial(validation_ac, validator=validator) + validation_fn = partial(validation_ac, validator=validator, log=False) Run quantization with accuracy control -############################################################################################################################### +--------------------------------------------------- You should provide the calibration dataset and the validation dataset. It can be the same -dataset. - parameter ``max_drop`` defines the accuracy drop threshold. -The quantization process stops when the degradation of accuracy metric -on the validation dataset is less than the ``max_drop``. The default -value is 0.01. NNCF will stop the quantization and report an error if -the ``max_drop`` value can’t be reached. - ``drop_type`` defines how the -accuracy drop will be calculated: ABSOLUTE (used by default) or -RELATIVE. - ``ranking_subset_size`` - size of a subset that is used to -rank layers by their contribution to the accuracy drop. Default value is -300, and the more samples it has the better ranking, potentially. Here -we use the value 25 to speed up the execution. - -.. note:: - - Execution can take tens of minutes and requires up to 15 GB +dataset. + +- parameter ``max_drop`` defines the accuracy drop threshold. + The quantization process stops when the degradation of accuracy metric + on the validation dataset is less than the ``max_drop``. The default + value is 0.01. NNCF will stop the quantization and report an error if + the ``max_drop`` value can’t be reached. +- ``drop_type`` defines how the + accuracy drop will be calculated: ABSOLUTE (used by default) or + RELATIVE. +- ``ranking_subset_size`` - size of a subset that is used to + rank layers by their contribution to the accuracy drop. Default value is + 300, and the more samples it has the better ranking, potentially. Here + we use the value 25 to speed up the execution. + + **NOTE**: Execution can take tens of minutes and requires up to 15 GB of free memory .. code:: ipython3 @@ -451,21 +261,21 @@ we use the value 25 to speed up the execution. validation_fn=validation_fn, max_drop=0.01, preset=nncf.QuantizationPreset.MIXED, + subset_size=128, advanced_accuracy_restorer_parameters=AdvancedAccuracyRestorerParameters( - ranking_subset_size=25, - num_ranking_processes=1 + ranking_subset_size=25 ), ) .. parsed-literal:: - 2023-09-08 23:17:54.173599: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:17:54.207357: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-10 09:55:44.477778: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-10 09:55:44.516624: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:17:54.764356: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - Statistics collection: 43%|████▎ | 128/300 [00:16<00:22, 7.55it/s] - Applying Fast Bias correction: 100%|██████████| 75/75 [00:04<00:00, 17.89it/s] + 2023-10-10 09:55:45.324364: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 128/128 [00:16<00:00, 7.79it/s] + Applying Fast Bias correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [00:03<00:00, 18.84it/s] .. parsed-literal:: @@ -475,354 +285,33 @@ we use the value 25 to speed up the execution. .. parsed-literal:: INFO:nncf:Elapsed Time: 00:00:00 - Validate: dataset length = 1, metric value = 0.589 - Validate: dataset length = 128, metric value = 0.366 - INFO:nncf:Elapsed Time: 00:00:04 - INFO:nncf:Metric of initial model: 0.36611468358574506 - INFO:nncf:Collecting values for each data item using the initial model - Validate: dataset length = 1, metric value = 0.589 - Validate: dataset length = 1, metric value = 0.622 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.895 - Validate: dataset length = 1, metric value = 0.846 - Validate: dataset length = 1, metric value = 0.365 - Validate: dataset length = 1, metric value = 0.432 - Validate: dataset length = 1, metric value = 0.172 - Validate: dataset length = 1, metric value = 0.771 - Validate: dataset length = 1, metric value = 0.255 - Validate: dataset length = 1, metric value = 0.431 - Validate: dataset length = 1, metric value = 0.399 - Validate: dataset length = 1, metric value = 0.671 - Validate: dataset length = 1, metric value = 0.315 - Validate: dataset length = 1, metric value = 0.995 - Validate: dataset length = 1, metric value = 0.895 - Validate: dataset length = 1, metric value = 0.497 - Validate: dataset length = 1, metric value = 0.594 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.597 - Validate: dataset length = 1, metric value = 0.074 - Validate: dataset length = 1, metric value = 0.231 - Validate: dataset length = 1, metric value = 0.502 - Validate: dataset length = 1, metric value = 0.347 - Validate: dataset length = 1, metric value = 0.398 - Validate: dataset length = 1, metric value = 0.477 - Validate: dataset length = 1, metric value = 0.537 - Validate: dataset length = 1, metric value = 0.344 - Validate: dataset length = 1, metric value = 0.544 - Validate: dataset length = 1, metric value = 0.237 - Validate: dataset length = 1, metric value = 0.109 - Validate: dataset length = 1, metric value = 0.564 - Validate: dataset length = 1, metric value = 0.853 - Validate: dataset length = 1, metric value = 0.306 - Validate: dataset length = 1, metric value = 0.416 - Validate: dataset length = 1, metric value = 0.388 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.199 - Validate: dataset length = 1, metric value = 0.323 - Validate: dataset length = 1, metric value = 0.305 - Validate: dataset length = 1, metric value = 0.506 - Validate: dataset length = 1, metric value = 0.319 - Validate: dataset length = 1, metric value = 0.319 - Validate: dataset length = 1, metric value = 0.255 - Validate: dataset length = 1, metric value = 0.487 - Validate: dataset length = 1, metric value = 0.697 - Validate: dataset length = 1, metric value = 0.654 - Validate: dataset length = 1, metric value = 0.368 - Validate: dataset length = 1, metric value = 0.730 - Validate: dataset length = 1, metric value = 0.374 - Validate: dataset length = 1, metric value = 0.227 - Validate: dataset length = 1, metric value = 0.500 - Validate: dataset length = 1, metric value = 0.101 - Validate: dataset length = 1, metric value = 0.855 - Validate: dataset length = 1, metric value = 0.430 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.358 - Validate: dataset length = 1, metric value = 0.373 - Validate: dataset length = 1, metric value = 0.692 - Validate: dataset length = 1, metric value = 0.556 - Validate: dataset length = 1, metric value = 0.274 - Validate: dataset length = 1, metric value = 0.670 - Validate: dataset length = 1, metric value = 0.044 - Validate: dataset length = 1, metric value = 0.627 - Validate: dataset length = 1, metric value = 0.945 - Validate: dataset length = 1, metric value = 0.267 - Validate: dataset length = 1, metric value = 0.354 - Validate: dataset length = 1, metric value = 0.265 - Validate: dataset length = 1, metric value = 0.522 - Validate: dataset length = 1, metric value = 0.945 - Validate: dataset length = 1, metric value = 0.394 - Validate: dataset length = 1, metric value = 0.349 - Validate: dataset length = 1, metric value = 0.564 - Validate: dataset length = 1, metric value = 0.094 - Validate: dataset length = 1, metric value = 0.763 - Validate: dataset length = 1, metric value = 0.157 - Validate: dataset length = 1, metric value = 0.531 - Validate: dataset length = 1, metric value = 0.597 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.781 - Validate: dataset length = 1, metric value = 0.447 - Validate: dataset length = 1, metric value = 0.562 - Validate: dataset length = 1, metric value = 0.697 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.461 - Validate: dataset length = 1, metric value = 0.697 - Validate: dataset length = 1, metric value = 0.696 - Validate: dataset length = 1, metric value = 0.378 - Validate: dataset length = 1, metric value = 0.246 - Validate: dataset length = 1, metric value = 0.647 - Validate: dataset length = 1, metric value = 0.367 - Validate: dataset length = 1, metric value = 0.995 - Validate: dataset length = 1, metric value = 0.995 - Validate: dataset length = 1, metric value = 0.597 - Validate: dataset length = 1, metric value = 0.398 - Validate: dataset length = 1, metric value = 0.359 - Validate: dataset length = 1, metric value = 0.407 - Validate: dataset length = 1, metric value = 0.191 - Validate: dataset length = 1, metric value = 0.549 - Validate: dataset length = 1, metric value = 0.290 - Validate: dataset length = 1, metric value = 0.166 - Validate: dataset length = 1, metric value = 0.131 - Validate: dataset length = 1, metric value = 0.745 - Validate: dataset length = 1, metric value = 0.336 - Validate: dataset length = 1, metric value = 0.248 - Validate: dataset length = 1, metric value = 0.290 - Validate: dataset length = 1, metric value = 0.413 - Validate: dataset length = 1, metric value = 0.790 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.265 - Validate: dataset length = 1, metric value = 0.423 - Validate: dataset length = 1, metric value = 0.398 - Validate: dataset length = 1, metric value = 0.039 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.685 - Validate: dataset length = 1, metric value = 0.635 - Validate: dataset length = 1, metric value = 0.829 - Validate: dataset length = 1, metric value = 0.525 - Validate: dataset length = 1, metric value = 0.315 - Validate: dataset length = 1, metric value = 0.348 - Validate: dataset length = 1, metric value = 0.567 - Validate: dataset length = 1, metric value = 0.751 - Validate: dataset length = 1, metric value = 0.597 - Validate: dataset length = 1, metric value = 0.557 - Validate: dataset length = 1, metric value = 0.995 - Validate: dataset length = 1, metric value = 0.341 - Validate: dataset length = 1, metric value = 0.427 - Validate: dataset length = 1, metric value = 0.846 INFO:nncf:Elapsed Time: 00:00:05 + INFO:nncf:Metric of initial model: 0.366118260036709 + INFO:nncf:Collecting values for each data item using the initial model + INFO:nncf:Elapsed Time: 00:00:06 INFO:nncf:Validation of quantized model was started INFO:nncf:Elapsed Time: 00:00:01 - Validate: dataset length = 128, metric value = 0.342 INFO:nncf:Elapsed Time: 00:00:04 - INFO:nncf:Metric of quantized model: 0.3419095833156649 + INFO:nncf:Metric of quantized model: 0.3418411101103462 INFO:nncf:Collecting values for each data item using the quantized model - Validate: dataset length = 1, metric value = 0.513 - Validate: dataset length = 1, metric value = 0.647 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.895 - Validate: dataset length = 1, metric value = 0.846 - Validate: dataset length = 1, metric value = 0.448 - Validate: dataset length = 1, metric value = 0.426 - Validate: dataset length = 1, metric value = 0.165 - Validate: dataset length = 1, metric value = 0.697 - Validate: dataset length = 1, metric value = 0.255 - Validate: dataset length = 1, metric value = 0.464 - Validate: dataset length = 1, metric value = 0.427 - Validate: dataset length = 1, metric value = 0.631 - Validate: dataset length = 1, metric value = 0.307 - Validate: dataset length = 1, metric value = 0.895 - Validate: dataset length = 1, metric value = 0.895 - Validate: dataset length = 1, metric value = 0.531 - Validate: dataset length = 1, metric value = 0.518 - Validate: dataset length = 1, metric value = 0.696 - Validate: dataset length = 1, metric value = 0.647 - Validate: dataset length = 1, metric value = 0.142 - Validate: dataset length = 1, metric value = 0.205 - Validate: dataset length = 1, metric value = 0.487 - Validate: dataset length = 1, metric value = 0.331 - Validate: dataset length = 1, metric value = 0.348 - Validate: dataset length = 1, metric value = 0.415 - Validate: dataset length = 1, metric value = 0.542 - Validate: dataset length = 1, metric value = 0.333 - Validate: dataset length = 1, metric value = 0.489 - Validate: dataset length = 1, metric value = 0.270 - Validate: dataset length = 1, metric value = 0.067 - Validate: dataset length = 1, metric value = 0.564 - Validate: dataset length = 1, metric value = 0.764 - Validate: dataset length = 1, metric value = 0.301 - Validate: dataset length = 1, metric value = 0.400 - Validate: dataset length = 1, metric value = 0.392 - Validate: dataset length = 1, metric value = 0.696 - Validate: dataset length = 1, metric value = 0.193 - Validate: dataset length = 1, metric value = 0.199 - Validate: dataset length = 1, metric value = 0.267 - Validate: dataset length = 1, metric value = 0.484 - Validate: dataset length = 1, metric value = 0.299 - Validate: dataset length = 1, metric value = 0.299 - Validate: dataset length = 1, metric value = 0.255 - Validate: dataset length = 1, metric value = 0.431 - Validate: dataset length = 1, metric value = 0.697 - Validate: dataset length = 1, metric value = 0.623 - Validate: dataset length = 1, metric value = 0.348 - Validate: dataset length = 1, metric value = 0.763 - Validate: dataset length = 1, metric value = 0.354 - Validate: dataset length = 1, metric value = 0.129 - Validate: dataset length = 1, metric value = 0.507 - Validate: dataset length = 1, metric value = 0.082 - Validate: dataset length = 1, metric value = 0.855 - Validate: dataset length = 1, metric value = 0.398 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.381 - Validate: dataset length = 1, metric value = 0.384 - Validate: dataset length = 1, metric value = 0.586 - Validate: dataset length = 1, metric value = 0.503 - Validate: dataset length = 1, metric value = 0.172 - Validate: dataset length = 1, metric value = 0.540 - Validate: dataset length = 1, metric value = 0.027 - Validate: dataset length = 1, metric value = 0.561 - Validate: dataset length = 1, metric value = 0.945 - Validate: dataset length = 1, metric value = 0.170 - Validate: dataset length = 1, metric value = 0.409 - Validate: dataset length = 1, metric value = 0.272 - Validate: dataset length = 1, metric value = 0.507 - Validate: dataset length = 1, metric value = 0.945 - Validate: dataset length = 1, metric value = 0.377 - Validate: dataset length = 1, metric value = 0.343 - Validate: dataset length = 1, metric value = 0.564 - Validate: dataset length = 1, metric value = 0.080 - Validate: dataset length = 1, metric value = 0.721 - Validate: dataset length = 1, metric value = 0.174 - Validate: dataset length = 1, metric value = 0.564 - Validate: dataset length = 1, metric value = 0.497 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.454 - Validate: dataset length = 1, metric value = 0.536 - Validate: dataset length = 1, metric value = 0.647 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.461 - Validate: dataset length = 1, metric value = 0.697 - Validate: dataset length = 1, metric value = 0.746 - Validate: dataset length = 1, metric value = 0.332 - Validate: dataset length = 1, metric value = 0.218 - Validate: dataset length = 1, metric value = 0.547 - Validate: dataset length = 1, metric value = 0.309 - Validate: dataset length = 1, metric value = 0.995 - Validate: dataset length = 1, metric value = 0.995 - Validate: dataset length = 1, metric value = 0.597 - Validate: dataset length = 1, metric value = 0.398 - Validate: dataset length = 1, metric value = 0.309 - Validate: dataset length = 1, metric value = 0.423 - Validate: dataset length = 1, metric value = 0.146 - Validate: dataset length = 1, metric value = 0.535 - Validate: dataset length = 1, metric value = 0.274 - Validate: dataset length = 1, metric value = 0.166 - Validate: dataset length = 1, metric value = 0.111 - Validate: dataset length = 1, metric value = 0.585 - Validate: dataset length = 1, metric value = 0.351 - Validate: dataset length = 1, metric value = 0.327 - Validate: dataset length = 1, metric value = 0.260 - Validate: dataset length = 1, metric value = 0.411 - Validate: dataset length = 1, metric value = 0.788 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.265 - Validate: dataset length = 1, metric value = 0.442 - Validate: dataset length = 1, metric value = 0.398 - Validate: dataset length = 1, metric value = 0.029 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.613 - Validate: dataset length = 1, metric value = 0.610 - Validate: dataset length = 1, metric value = 0.796 - Validate: dataset length = 1, metric value = 0.457 - Validate: dataset length = 1, metric value = 0.323 - Validate: dataset length = 1, metric value = 0.348 - Validate: dataset length = 1, metric value = 0.600 - Validate: dataset length = 1, metric value = 0.854 - Validate: dataset length = 1, metric value = 0.597 - Validate: dataset length = 1, metric value = 0.567 - Validate: dataset length = 1, metric value = 0.995 - Validate: dataset length = 1, metric value = 0.325 - Validate: dataset length = 1, metric value = 0.398 - Validate: dataset length = 1, metric value = 0.796 - INFO:nncf:Elapsed Time: 00:00:04 - INFO:nncf:Accuracy drop: 0.02420510027008016 (DropType.ABSOLUTE) - INFO:nncf:Accuracy drop: 0.02420510027008016 (DropType.ABSOLUTE) + INFO:nncf:Elapsed Time: 00:00:05 + INFO:nncf:Accuracy drop: 0.024277149926362818 (DropType.ABSOLUTE) + INFO:nncf:Accuracy drop: 0.024277149926362818 (DropType.ABSOLUTE) INFO:nncf:Total number of quantized operations in the model: 91 - INFO:nncf:Number of parallel processes to rank quantized operations: 1 + INFO:nncf:Number of parallel processes to rank quantized operations: 6 INFO:nncf:ORIGINAL metric is used to rank quantizers INFO:nncf:Calculating ranking score for groups of quantizers - Validate: dataset length = 25, metric value = 0.523 - Validate: dataset length = 25, metric value = 0.517 - Validate: dataset length = 25, metric value = 0.504 - Validate: dataset length = 25, metric value = 0.516 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.507 - Validate: dataset length = 25, metric value = 0.505 - Validate: dataset length = 25, metric value = 0.503 - Validate: dataset length = 25, metric value = 0.504 - Validate: dataset length = 25, metric value = 0.501 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.503 - Validate: dataset length = 25, metric value = 0.500 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.509 - Validate: dataset length = 25, metric value = 0.507 - Validate: dataset length = 25, metric value = 0.506 - Validate: dataset length = 25, metric value = 0.505 - Validate: dataset length = 25, metric value = 0.504 - Validate: dataset length = 25, metric value = 0.505 - Validate: dataset length = 25, metric value = 0.503 - Validate: dataset length = 25, metric value = 0.503 - Validate: dataset length = 25, metric value = 0.501 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.500 - Validate: dataset length = 25, metric value = 0.505 - Validate: dataset length = 25, metric value = 0.508 - Validate: dataset length = 25, metric value = 0.505 - Validate: dataset length = 25, metric value = 0.506 - Validate: dataset length = 25, metric value = 0.506 - Validate: dataset length = 25, metric value = 0.501 - Validate: dataset length = 25, metric value = 0.500 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.512 - Validate: dataset length = 25, metric value = 0.504 - Validate: dataset length = 25, metric value = 0.510 - Validate: dataset length = 25, metric value = 0.514 - Validate: dataset length = 25, metric value = 0.510 - Validate: dataset length = 25, metric value = 0.508 - Validate: dataset length = 25, metric value = 0.507 - Validate: dataset length = 25, metric value = 0.509 - Validate: dataset length = 25, metric value = 0.495 - Validate: dataset length = 25, metric value = 0.510 - Validate: dataset length = 25, metric value = 0.511 - Validate: dataset length = 25, metric value = 0.502 - Validate: dataset length = 25, metric value = 0.511 - Validate: dataset length = 25, metric value = 0.507 - Validate: dataset length = 25, metric value = 0.506 - Validate: dataset length = 25, metric value = 0.515 - Validate: dataset length = 25, metric value = 0.506 - Validate: dataset length = 25, metric value = 0.499 - Validate: dataset length = 25, metric value = 0.492 - Validate: dataset length = 25, metric value = 0.505 - Validate: dataset length = 25, metric value = 0.499 - Validate: dataset length = 25, metric value = 0.519 - Validate: dataset length = 25, metric value = 0.522 - Validate: dataset length = 25, metric value = 0.516 - INFO:nncf:Elapsed Time: 00:02:45 + INFO:nncf:Elapsed Time: 00:02:16 INFO:nncf:Changing the scope of quantizer nodes was started INFO:nncf:Reverted 1 operations to the floating-point precision: /model.22/Mul_5 - Validate: dataset length = 128, metric value = 0.353 - INFO:nncf:Accuracy drop with the new quantization scope is 0.013362079004897942 (DropType.ABSOLUTE) + INFO:nncf:Accuracy drop with the new quantization scope is 0.013359187935064742 (DropType.ABSOLUTE) INFO:nncf:Reverted 1 operations to the floating-point precision: /model.1/conv/Conv/WithoutBiases - Validate: dataset length = 128, metric value = 0.353 - INFO:nncf:Accuracy drop with the new quantization scope is 0.013092546237331526 (DropType.ABSOLUTE) + INFO:nncf:Accuracy drop with the new quantization scope is 0.01287864227202773 (DropType.ABSOLUTE) INFO:nncf:Reverted 1 operations to the floating-point precision: /model.2/cv1/conv/Conv/WithoutBiases - Validate: dataset length = 128, metric value = 0.359 - INFO:nncf:Algorithm completed: achieved required accuracy drop 0.006690894581248108 (DropType.ABSOLUTE) + INFO:nncf:Algorithm completed: achieved required accuracy drop 0.007027355074555763 (DropType.ABSOLUTE) INFO:nncf:3 out of 91 were reverted back to the floating-point precision: /model.22/Mul_5 /model.1/conv/Conv/WithoutBiases @@ -830,7 +319,7 @@ we use the value 25 to speed up the execution. Compare Accuracy and Performance of the Original and Quantized Models -############################################################################################################################### +--------------------------------------------------------------------- Now we can compare metrics of the Original non-quantized OpenVINO IR model and Quantized OpenVINO IR model to make sure that the @@ -853,9 +342,9 @@ OpenVINO IR model and Quantized OpenVINO IR model to make sure that the .. parsed-literal:: Validate: dataset length = 128, metric value = 0.368 - Validate: dataset length = 128, metric value = 0.361 + Validate: dataset length = 128, metric value = 0.360 [Original OpenVino]: 0.3677 - [Quantized OpenVino]: 0.3605 + [Quantized OpenVino]: 0.3602 And compare performance. @@ -882,7 +371,74 @@ And compare performance. .. parsed-literal:: - /bin/bash: benchmark_app: command not found + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.2.0-12713-47c2a91b6b6 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.2.0-12713-47c2a91b6b6 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 27.11 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [?,3,?,?] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [?,116,?] + [ INFO ] output1 (node: output1) : f32 / [...] / [?,32,8..,8..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'images': [1,3,640,640] + [ INFO ] Reshape model took 13.41 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,8400] + [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,160,160] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 274.70 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 60000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 42.88 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 7716 iterations + [ INFO ] Duration: 60104.07 ms + [ INFO ] Latency: + [ INFO ] Median: 88.61 ms + [ INFO ] Average: 93.27 ms + [ INFO ] Min: 52.72 ms + [ INFO ] Max: 181.74 ms + [ INFO ] Throughput: 128.38 FPS .. code:: ipython3 @@ -893,5 +449,72 @@ And compare performance. .. parsed-literal:: - /bin/bash: benchmark_app: command not found + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.2.0-12713-47c2a91b6b6 + [ INFO ] + [ INFO ] Device info: + [ INFO ] CPU + [ INFO ] Build ................................. 2023.2.0-12713-47c2a91b6b6 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 32.74 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [?,3,?,?] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [?,116,?] + [ INFO ] output1 (node: output1) : f32 / [...] / [?,32,8..,8..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'images': [1,3,640,640] + [ INFO ] Reshape model took 18.09 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,8400] + [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,160,160] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 574.58 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] NUM_STREAMS: 12 + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] PERF_COUNT: False + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 60000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 31.29 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 15900 iterations + [ INFO ] Duration: 60077.25 ms + [ INFO ] Latency: + [ INFO ] Median: 42.02 ms + [ INFO ] Average: 45.18 ms + [ INFO ] Min: 25.42 ms + [ INFO ] Max: 117.81 ms + [ INFO ] Throughput: 264.66 FPS diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output.rst b/docs/notebooks/123-detectron2-to-openvino-with-output.rst new file mode 100644 index 00000000000000..03019a26cbc2f5 --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output.rst @@ -0,0 +1,433 @@ +Convert Detectron2 Models to OpenVINO™ +======================================== + +`Detectron2 `__ is +Facebook AI Research’s library that provides state-of-the-art detection +and segmentation algorithms. It is the successor of +`Detectron `__ and +`maskrcnn-benchmark `__. +It supports a number of computer vision research projects and production +applications. + +In this tutorial we consider how to convert and run Detectron2 models +using OpenVINO™. We will use ``Faster R-CNN FPN x1`` model and +``Mask R-CNN FPN x3`` pretrained on +`COCO `__ dataset as examples for object +detection and instance segmentation respectively. + +**Table of contents:** +-- + +- `Prerequisites <#prerequisites>`__ +- `Define helpers for PyTorch model initialization and conversion <#define-helpers-for-pytorch-model-initialization-and-conversion>`__ +- `Prepare input data <#prepare-input-data>`__ +- `Object Detection <#object-detection>`__ +- `Download PyTorch Detection model <#download-pytorch-detection-model>`__ +- `Convert Detection Model to OpenVINO Intermediate Representation <#convert-detection-model-to-openvino-intermediate-representation>`__ +- `Select inference device <#select-inference-device>`__ +- `Run Detection model inference <#run-detection-model-inference>`__ +- `Instance Segmentation <#instance-segmentation>`__ +- `Download PyTorch Instance Segmentation model <#download-pytorch-instance-segmentation-model>`__ +- `Convert Instance Segmentation Model to OpenVINO Intermediate Representation <#convert-instance-segmentation-model-to-openvino-intermediate-representation>`__ +- `Select inference device <#select-inference-device>`__ +- `Run Instance Segmentation model inference <#run-instance-segmentation-model-inference>`__ + +Prerequisites +------------------------------------------------------- + +Install required packages for running model + +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision + %pip install -q "git+https://github.com/facebookresearch/detectron2.git" + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Define helpers for PyTorch model initialization and conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Detectron2 provides universal and configurable API for working with +models, it means that all steps required for model creation, conversion +and inference will be common for all models, that is why it is enough to +define helper functions once, then reuse them for different models. For +obtaining models we will use `Detectron2 Model +Zoo `__ +API. ``detecton_zoo.get`` function allow to download and instantiate +model based on its config file. Configuration file is playing key role +in interaction with models in Detectron2 project and describes model +architecture and training and validation processes. +``detectron_zoo.get_config`` function can be used for finding and +reading model config. + +.. code:: ipython3 + + import detectron2.model_zoo as detectron_zoo + + + def get_model_and_config(model_name:str): + """ + Helper function for downloading PyTorch model and its configuration from Detectron2 Model Zoo + + Parameters: + model_name (str): model_id from Detectron2 Model Zoo + Returns: + model (torch.nn.Module): Pretrained model instance + cfg (Config): Configuration for model + """ + cfg = detectron_zoo.get_config(model_name + '.yaml', trained=True) + model = detectron_zoo.get(model_name + '.yaml', trained=True) + return model, cfg + +Detectron2 library is based on PyTorch. Starting from 2023.0 release +OpenVINO supports PyTorch models conversion directly via Model +Conversion API. ``ov.convert_model`` function can be used for converting +PyTorch model to OpenVINO Model object instance, that ready to use for +loading on device and then running inference or can be saved on disk for +next deployment using ``ov.save_model`` function. + +Detectron2 models use custom complex data structures inside that brings +some difficulties for exporting models in different formats and +frameworks including OpenVINO. For avoid these issues, +``detectron2.export.TracingAdapter`` provided as part of Detectron2 +deployment API. ``TracingAdapter`` is a model wrapper class that +simplify model’s structure making it more export-friendly. + +.. code:: ipython3 + + from detectron2.modeling import GeneralizedRCNN + from detectron2.export import TracingAdapter + import torch + import openvino as ov + import warnings + from typing import List, Dict + + def convert_detectron2_model(model:torch.nn.Module, sample_input:List[Dict[str, torch.Tensor]]): + """ + Function for converting Detectron2 models, creates TracingAdapter for making model tracing-friendly, + prepares inputs and converts model to OpenVINO Model + + Parameters: + model (torch.nn.Module): Model object for conversion + sample_input (List[Dict[str, torch.Tensor]]): sample input for tracing + Returns: + ov_model (ov.Model): OpenVINO Model + """ + # prepare input for tracing adapter + tracing_input = [{'image': sample_input[0]["image"]}] + + # override model forward and disable postprocessing if required + if isinstance(model, GeneralizedRCNN): + def inference(model, inputs): + # use do_postprocess=False so it returns ROI mask + inst = model.inference(inputs, do_postprocess=False)[0] + return [{"instances": inst}] + else: + inference = None # assume that we just call the model directly + + # create traceable model + traceable_model = TracingAdapter(model, tracing_input, inference) + warnings.filterwarnings("ignore") + # convert PyTorch model to OpenVINO model + ov_model = ov.convert_model(traceable_model, example_input=sample_input[0]["image"]) + return ov_model + +Prepare input data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For running model conversion and inference we need to provide example +input. The cells below download sample image and apply preprocessing +steps based on model specific transformations defined in model config. + +.. code:: ipython3 + + import requests + from pathlib import Path + from PIL import Image + + MODEL_DIR = Path("model") + DATA_DIR = Path("data") + + MODEL_DIR.mkdir(exist_ok=True) + DATA_DIR.mkdir(exist_ok=True) + + input_image_url = "https://farm9.staticflickr.com/8040/8017130856_1b46b5f5fc_z.jpg" + + image_file = DATA_DIR / "example_image.jpg" + + if not image_file.exists(): + image = Image.open(requests.get(input_image_url, stream=True).raw) + image.save(image_file) + else: + image = Image.open(image_file) + + image + + + + +.. image:: 123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.png + + + +.. code:: ipython3 + + import detectron2.data.transforms as T + from detectron2.data import detection_utils + import torch + + def get_sample_inputs(image_path, cfg): + # get a sample data + original_image = detection_utils.read_image(image_path, format=cfg.INPUT.FORMAT) + # Do same preprocessing as DefaultPredictor + aug = T.ResizeShortestEdge([cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST) + height, width = original_image.shape[:2] + image = aug.get_transform(original_image).apply_image(original_image) + image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) + + inputs = {"image": image, "height": height, "width": width} + + # Sample ready + sample_inputs = [inputs] + return sample_inputs + +Now, when all components required for model conversion are prepared, we +can consider how to use them on specific examples. + +Object Detection +---------------------------------------------------------- + +Download PyTorch Detection model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Download faster_rcnn_R_50_FPN_1x from Detectron Model Zoo. + +.. code:: ipython3 + + model_name = 'COCO-Detection/faster_rcnn_R_50_FPN_1x' + model, cfg = get_model_and_config(model_name) + sample_input = get_sample_inputs(image_file, cfg) + +Convert Detection Model to OpenVINO Intermediate Representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Convert model using ``convert_detectron2_model`` function and +``sample_input`` prepared above. After conversion, model saved on disk +using ``ov.save_model`` function and can be found in ``model`` +directory. + +.. code:: ipython3 + + model_xml_path = MODEL_DIR / (model_name.split("/")[-1] + '.xml') + if not model_xml_path.exists(): + ov_model = convert_detectron2_model(model, sample_input) + ov.save_model(ov_model, MODEL_DIR / (model_name.split("/")[-1] + '.xml')) + else: + ov_model = model_xml_path + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Run Detection model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Load our converted model on selected device and run inference on sample +input. + +.. code:: ipython3 + + compiled_model = core.compile_model(ov_model, device.value) + +.. code:: ipython3 + + results = compiled_model(sample_input[0]["image"]) + +Tracing adapter simplifies model input and output format. After +conversion, model has multiple outputs in following format: 1. Predicted +boxes is floating-point tensor in format [``N``, 4], where N is number +of detected boxes. 2. Predicted classes is integer tensor in format +[``N``], where N is number of predicted objects that defines which label +each object belongs. The values range of predicted classes tensor is [0, +``num_labels``], where ``num_labels`` is number of classes supported of +model (in our case 80). 3. Predicted scores is floating-point tensor in +format [``N``], where ``N`` is number of predicted objects that defines +confidence of each prediction. 4. Input image size is integer tensor +with values [``H``, ``W``], where ``H`` is height of input data and +``W`` is width of input data, used for rescaling predictions on +postprocessing step. + +For reusing Detectron2 API for postprocessing and visualization, we +provide helpers for wrapping output in original Detectron2 format. + +.. code:: ipython3 + + from detectron2.structures import Instances, Boxes + from detectron2.modeling.postprocessing import detector_postprocess + from detectron2.utils.visualizer import ColorMode, Visualizer + from detectron2.data import MetadataCatalog + import numpy as np + + def postprocess_detection_result(outputs:Dict, orig_height:int, orig_width:int, conf_threshold:float = 0.0): + """ + Helper function for postprocessing prediction results + + Parameters: + outputs (Dict): OpenVINO model output dictionary + orig_height (int): original image height before preprocessing + orig_width (int): original image width before preprocessing + conf_threshold (float, optional, defaults 0.0): confidence threshold for valid prediction + Returns: + prediction_result (instances): postprocessed predicted instances + """ + boxes = outputs[0] + classes = outputs[1] + has_mask = len(outputs) >= 5 + masks = None if not has_mask else outputs[2] + scores = outputs[2 if not has_mask else 3] + model_input_size = (int(outputs[3 if not has_mask else 4][0]), int(outputs[3 if not has_mask else 4][1])) + filtered_detections = scores >= conf_threshold + boxes = Boxes(boxes[filtered_detections]) + scores = scores[filtered_detections] + classes = classes[filtered_detections] + out_dict = {"pred_boxes": boxes, "scores": scores, "pred_classes": classes} + if masks is not None: + masks = masks[filtered_detections] + out_dict["pred_masks"] = torch.from_numpy(masks) + instances = Instances(model_input_size, **out_dict) + return detector_postprocess(instances, orig_height, orig_width) + + def draw_instance_prediction(img:np.ndarray, results:Instances, cfg:"Config"): + """ + Helper function for visualization prediction results + + Parameters: + img (np.ndarray): original image for drawing predictions + results (instances): model predictions + cfg (Config): model configuration + Returns: + img_with_res: image with results + """ + metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) + visualizer = Visualizer(img, metadata, instance_mode=ColorMode.IMAGE) + img_with_res = visualizer.draw_instance_predictions(results) + return img_with_res + + +.. code:: ipython3 + + results = postprocess_detection_result(results, sample_input[0]["height"], sample_input[0]["width"], conf_threshold=0.05) + img_with_res = draw_instance_prediction(np.array(image), results, cfg) + Image.fromarray(img_with_res.get_image()) + + + + +.. image:: 123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.png + + + +Instance Segmentation +--------------------- + +As it was discussed above, Detectron2 provides generic approach for +working with models for different use cases. The steps that required to +convert and run models pretrained for Instance Segmentation use case +will be very similar to Object Detection. + +Download Instance Segmentation PyTorch model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + model_name = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x" + model, cfg = get_model_and_config(model_name) + sample_input = get_sample_inputs(image_file, cfg) + +Convert Instance Segmentation Model to OpenVINO Intermediate Representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + model_xml_path = MODEL_DIR / (model_name.split("/")[-1] + '.xml') + + if not model_xml_path.exists(): + ov_model = convert_detectron2_model(model, sample_input) + ov.save_model(ov_model, MODEL_DIR / (model_name.split("/")[-1] + '.xml')) + else: + ov_model = model_xml_path + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Run Instance Segmentation model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In comparison with Object Detection, Instance Segmentation models have +additional output that represents instance masks for each object. Our +postprocessing function handle this difference. + +.. code:: ipython3 + + compiled_model = core.compile_model(ov_model, device.value) + +.. code:: ipython3 + + results = compiled_model(sample_input[0]["image"]) + results = postprocess_detection_result(results, sample_input[0]["height"], sample_input[0]["width"], conf_threshold=0.05) + img_with_res = draw_instance_prediction(np.array(image), results, cfg) + Image.fromarray(img_with_res.get_image()) + + + + +.. image:: 123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.png + + diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.jpg new file mode 100644 index 00000000000000..21179d56bf5ead --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:856bd51bd1a5dd45f52f6f0f939390f11c5c35b4af4781679a617cb9fe451a41 +size 57591 diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.png new file mode 100644 index 00000000000000..22b92c9c1d9db7 --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3846096706af5b748d9e673831136adb1a5d912a91783b6c3b700edbd0d8359 +size 508592 diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.jpg new file mode 100644 index 00000000000000..f589819418c7ae --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8668a57174bdee831756b00f1cc479c75ad4515151d4cbf5adfa8a87be595d77 +size 53100 diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.png new file mode 100644 index 00000000000000..9516a78e6dc303 --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_32_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2913494596627acf2e77e9846889891f2dd348d6162dfdfade8ec0857698361 +size 456596 diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.jpg b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.jpg new file mode 100644 index 00000000000000..3754e67394bee6 --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6e3fc4cbbcd709eea9f7a8301b958e102846528f38e96d1c5f597bd7e396b3c +size 46858 diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.png b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.png new file mode 100644 index 00000000000000..939857cbf951af --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output_files/123-detectron2-to-openvino-with-output_8_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab03000d5af3fdd1a22993c61583d1ee07eeb16064eb03f458a9b5649a3dc27 +size 503218 diff --git a/docs/notebooks/123-detectron2-to-openvino-with-output_files/index.html b/docs/notebooks/123-detectron2-to-openvino-with-output_files/index.html new file mode 100644 index 00000000000000..150ba32dd39328 --- /dev/null +++ b/docs/notebooks/123-detectron2-to-openvino-with-output_files/index.html @@ -0,0 +1,12 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/123-detectron2-to-openvino-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/123-detectron2-to-openvino-with-output_files/


../
+123-detectron2-to-openvino-with-output_22_0.jpg    31-Oct-2023 00:35               57591
+123-detectron2-to-openvino-with-output_22_0.png    31-Oct-2023 00:35              508592
+123-detectron2-to-openvino-with-output_32_0.jpg    31-Oct-2023 00:35               53100
+123-detectron2-to-openvino-with-output_32_0.png    31-Oct-2023 00:35              456596
+123-detectron2-to-openvino-with-output_8_0.jpg     31-Oct-2023 00:35               46858
+123-detectron2-to-openvino-with-output_8_0.png     31-Oct-2023 00:35              503218
+

+ diff --git a/docs/notebooks/124-hugging-face-hub-with-output.rst b/docs/notebooks/124-hugging-face-hub-with-output.rst new file mode 100644 index 00000000000000..07e51a7b72dfff --- /dev/null +++ b/docs/notebooks/124-hugging-face-hub-with-output.rst @@ -0,0 +1,405 @@ +🤗 Hugging Face Model Hub with OpenVINO™ +========================================= + +The Hugging Face (HF) `Model Hub `__ is a +central repository for pre-trained deep learning models. It allows +exploration and provides access to thousands of models for a wide range +of tasks, including text classification, question answering, and image +classification. Hugging Face provides Python packages that serve as APIs +and tools to easily download and fine tune state-of-the-art pretrained +models, namely +`transformers `__ and +`diffusers `__ packages. + +|image0| + +Throughout this notebook we will learn: 1. How to load a HF pipeline +using the ``transformers`` package and then convert it to OpenVINO. 2. +How to load the same pipeline using Optimum Intel package. + +Contents: + +- `Converting a Model from the HF Transformers Package <#converting-a-model-from-the-hf-transformers-package>`__ +- `Installing Requirements <#installing-requirements>`__ +- `Imports <#imports>`__ +- `Initializing a Model Using the HF Transformers Package <#initializing-a-model-using-the-hf-transformers-package>`__ +- `Original Model inference <#original-model-inference>`__ +- `Converting the Model to OpenVINO IR format <#converting-the-model-to-openvino-ir-format>`__ +- `Converted Model Inference <#converted-model-inference>`__ +- `Converting a Model Using the Optimum Intel Package <#converting-a-model-using-the-optimum-intel-package>`__ +- `Installing Requirements <#install-requirements-for-optimum>`__ +- `Import Optimum <#import-optimum>`__ +- `Initialize and Convert the Model Automatically <#initialize-and-convert-the-model-automatically>`__ + +.. |image0| image:: https://github.com/huggingface/optimum-intel/raw/main/readme_logo.png + +Converting a Model from the HF Transformers Package +--------------------------------------------------- + +Hugging Face transformers package provides API for initializing a model +and loading a set of pre-trained weights using the model text handle. +Discovering a desired model name is straightforward with `HF website’s +Models page `__, one can choose a model +solving a particular machine learning problem and even sort the models +by popularity and novelty. + +Installing Requirements +~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers[torch] + %pip install -q ipywidgets + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Imports +~~~~~~~ + +.. code:: ipython3 + + from pathlib import Path + + import numpy as np + import torch + + from transformers import AutoModelForSequenceClassification + from transformers import AutoTokenizer + +Initializing a Model Using the HF Transformers Package +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We will use `roberta text sentiment +classification `__ +model in our example, it is a transformer-based encoder model pretrained +in a special way, please refer to the model card to learn more. + +Following the instructions on the model page, we use +``AutoModelForSequenceClassification`` to initialize the model and +perform inference with it. To find more information on HF pipelines and +model initialization please refer to `HF +tutorials `__. + +.. code:: ipython3 + + MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest" + + tokenizer = AutoTokenizer.from_pretrained(MODEL, return_dict=True) + + # The torchscript=True flag is used to ensure the model outputs are tuples + # instead of ModelOutput (which causes JIT errors). + model = AutoModelForSequenceClassification.from_pretrained(MODEL, torchscript=True) + + +.. parsed-literal:: + + Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias'] + - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). + - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + + +Original Model inference +~~~~~~~~~~~~~~~~~~~~~~~~ + +Let’s do a classification of a simple prompt below. + +.. code:: ipython3 + + text = "HF models run perfectly with OpenVINO!" + + encoded_input = tokenizer(text, return_tensors='pt') + output = model.forward(**encoded_input) + scores = output[0][0] + scores = torch.softmax(scores, dim=0).detach().numpy() + + def print_prediction(scores): + for i, descending_index in enumerate(scores.argsort()[::-1]): + label = model.config.id2label[descending_index] + score = np.round(float(scores[descending_index]), 4) + print(f"{i+1}) {label} {score}") + + print_prediction(scores) + + +.. parsed-literal:: + + 1) positive 0.9485 + 2) neutral 0.0484 + 3) negative 0.0031 + + +Converting the Model to OpenVINO IR format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We use the OpenVINO `Model conversion +API `__ +to convert the model (this one is implemented in PyTorch) to OpenVINO +Intermediate Representation (IR). + +Note how we reuse our real ``encoded_input``, passing it to the +``ov.convert_model`` function. It will be used for model tracing. + +.. code:: ipython3 + + import openvino as ov + + save_model_path = Path('./models/model.xml') + + if not save_model_path.exists(): + ov_model = ov.convert_model(model, example_input=dict(encoded_input)) + ov.save_model(ov_model, save_model_path) + +Converted Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, we pick a device to do the model inference + +.. code:: ipython3 + + import ipywidgets as widgets + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +OpenVINO model IR must be compiled for a specific device prior to the +model inference. + +.. code:: ipython3 + + compiled_model = core.compile_model(save_model_path, device.value) + + # Compiled model call is performed using the same parameters as for the original model + scores_ov = compiled_model(encoded_input.data)[0] + + scores_ov = torch.softmax(torch.tensor(scores_ov[0]), dim=0).detach().numpy() + + print_prediction(scores_ov) + + +.. parsed-literal:: + + 1) positive 0.9483 + 2) neutral 0.0485 + 3) negative 0.0031 + + +Note the prediction of the converted model match exactly the one of the +original model. + +This is a rather simple example as the pipeline includes just one +encoder model. Contemporary state of the art pipelines often consist of +several model, feel free to explore other OpenVINO tutorials: 1. `Stable +Diffusion +v2 `__ +2. `Zero-shot Image Classification with OpenAI +CLIP `__ +3. `Controllable Music Generation with +MusicGen `__ + +The workflow for the ``diffusers`` package is exactly the same. The +first example in the list above relies on the ``diffusers``. + +Converting a Model Using the Optimum Intel Package +-------------------------------------------------- + +🤗 Optimum Intel is the interface between the 🤗 Transformers and +Diffusers libraries and the different tools and libraries provided by +Intel to accelerate end-to-end pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. + +Install Requirements for Optimum +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + %pip install -q "optimum==1.13.0" + %pip install -q "optimum-intel"@git+https://github.com/huggingface/optimum-intel.git + %pip install -q onnx + + +.. parsed-literal:: + + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + + +.. parsed-literal:: + + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + + +.. parsed-literal:: + + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + + +Import Optimum +~~~~~~~~~~~~~~ + +Documentation for Optimum Intel states: >You can now easily perform +inference with OpenVINO Runtime on a variety of Intel processors (see +the full list of supported devices). For that, just replace the +``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` +class. + +You can find `Optimum Intel +documentation `__ +on the Hugging Face website. + +.. code:: ipython3 + + from optimum.intel.openvino import OVModelForSequenceClassification + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + 2023-10-30 23:06:03.589130: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 23:06:03.624230: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-30 23:06:04.183799: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + warnings.warn( + + +Initialize and Convert the Model Automatically +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To load a Transformers model and convert it to the OpenVINO format +on-the-fly, you can set ``export=True`` when loading your model. + +.. code:: ipython3 + + model = OVModelForSequenceClassification.from_pretrained(MODEL, export=True, device=device.value) + + # The save_pretrained() method saves the model weights to avoid conversion on the next load. + model.save_pretrained('./models') + + +.. parsed-literal:: + + Framework not specified. Using pt to export to ONNX. + Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias'] + - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). + - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + Using the export variant default. Available variants are: + - default: The default ONNX variant. + Using framework PyTorch: 2.1.0+cpu + Overriding 1 configuration item(s) + - use_cache -> False + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + Compiling the model to AUTO ... + Set CACHE_DIR to /tmp/tmpx5aqydhf/model_cache + + +Moreover, some models in the Hugging Face Models Hub are already +converted and ready to run! You can filter those models out by library +name, just type OpenVINO, or follow `this +link `__. + +The Optimum Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Model inference is exactly the same as for the original model! + +.. code:: ipython3 + + output = model.forward(**encoded_input) + scores = output[0][0] + scores = torch.softmax(scores, dim=0).detach().numpy() + + print_prediction(scores) + + +.. parsed-literal:: + + 1) positive 0.9485 + 2) neutral 0.0484 + 3) negative 0.0031 + + +You can find more examples of using Optimum Intel here: 1. `Accelerate +Inference of Sparse Transformer +Models `__ +2. `Grammatical Error Correction with +OpenVINO `__ +3. `Stable Diffusion v2.1 using Optimum-Intel +OpenVINO `__ +4. `Image generation with Stable Diffusion +XL `__ +5. `Instruction following using Databricks Dolly +2.0 `__ +6. `Create LLM-powered Chatbot using +OpenVINO `__ diff --git a/docs/notebooks/125-convnext-classification-with-output.rst b/docs/notebooks/125-convnext-classification-with-output.rst new file mode 100644 index 00000000000000..8dc9f2ad01aa10 --- /dev/null +++ b/docs/notebooks/125-convnext-classification-with-output.rst @@ -0,0 +1,268 @@ +Classification with ConvNeXt and OpenVINO +========================================= + +The +`torchvision.models `__ +subpackage contains definitions of models for addressing different +tasks, including: image classification, pixelwise semantic segmentation, +object detection, instance segmentation, person keypoint detection, +video classification, and optical flow. Throughout this notebook we will +show how to use one of them. + +The ConvNeXt model is based on the `A ConvNet for the +2020s `__ paper. The outcome of this +exploration is a family of pure ConvNet models dubbed ConvNeXt. +Constructed entirely from standard ConvNet modules, ConvNeXts compete +favorably with Transformers in terms of accuracy and scalability, +achieving 87.8% ImageNet top-1 accuracy and outperforming Swin +Transformers on COCO detection and ADE20K segmentation, while +maintaining the simplicity and efficiency of standard ConvNets. The +``torchvision.models`` subpackage +`contains `__ +several pretrained ConvNeXt model. In this tutorial we will use ConvNeXt +Tiny model. + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Get a test image <#get-a-test-image>`__ +- `Get a pretrained model. <#get-a-pretrained-model>`__ +- `Define a preprocessing and prepare an input + data <#define-a-preprocessing-and-prepare-an-input-data>`__ +- `Use the original model to run an + inference <#use-the-original-model-to-run-an-inference>`__ +- `Convert the model to OpenVINO Intermediate representation + format <#convert-the-model-to-openvino-intermediate-representation-format>`__ +- `Use the OpenVINO IR model to run an + inference <#use-the-openvino-ir-model-to-run-an-inference>`__ + +Prerequisites +------------- + +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Get a test image +---------------- + +First of all lets get a test image from an open dataset. + +.. code:: ipython3 + + import urllib.request + + from torchvision.io import read_image + import torchvision.transforms as transforms + + + img_path = 'cats_image.jpeg' + urllib.request.urlretrieve( + url='https://huggingface.co/datasets/huggingface/cats-image/resolve/main/cats_image.jpeg', + filename=img_path + ) + image = read_image(img_path) + display(transforms.ToPILImage()(image)) + + + +.. image:: 125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.png + + +Get a pretrained model +---------------------- + +Torchvision provides a mechanism of `listing and retrieving available +models `__. + +.. code:: ipython3 + + import torchvision.models as models + + # List available models + all_models = models.list_models() + # List of models by type. Classification models are in the parent module. + classification_models = models.list_models(module=models) + + print(classification_models) + + +.. parsed-literal:: + + ['alexnet', 'convnext_base', 'convnext_large', 'convnext_small', 'convnext_tiny', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_v2_l', 'efficientnet_v2_m', 'efficientnet_v2_s', 'googlenet', 'inception_v3', 'maxvit_t', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small', 'regnet_x_16gf', 'regnet_x_1_6gf', 'regnet_x_32gf', 'regnet_x_3_2gf', 'regnet_x_400mf', 'regnet_x_800mf', 'regnet_x_8gf', 'regnet_y_128gf', 'regnet_y_16gf', 'regnet_y_1_6gf', 'regnet_y_32gf', 'regnet_y_3_2gf', 'regnet_y_400mf', 'regnet_y_800mf', 'regnet_y_8gf', 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'resnext101_32x8d', 'resnext101_64x4d', 'resnext50_32x4d', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'squeezenet1_0', 'squeezenet1_1', 'swin_b', 'swin_s', 'swin_t', 'swin_v2_b', 'swin_v2_s', 'swin_v2_t', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 'vit_b_16', 'vit_b_32', 'vit_h_14', 'vit_l_16', 'vit_l_32', 'wide_resnet101_2', 'wide_resnet50_2'] + + +We will use ``convnext_tiny``. To get a pretrained model just use +``models.get_model("convnext_tiny", weights='DEFAULT')`` or a specific +method of ``torchvision.models`` for this model using `default +weights `__ +that is equivalent to ``ConvNeXt_Tiny_Weights.IMAGENET1K_V1``. If you +don’t specify ``weight`` or specify ``weights=None`` it will be a random +initialization. To get all available weights for the model you can call +``weights_enum = models.get_model_weights("convnext_tiny")``, but there +is only one for this model. You can find more information how to +initialize pre-trained models +`here `__. + +.. code:: ipython3 + + model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.DEFAULT) + +Define a preprocessing and prepare an input data +------------------------------------------------ + +You can use ``torchvision.transforms`` to make a preprocessing or +use\ `preprocessing transforms from the model +wight `__. + +.. code:: ipython3 + + import torch + + + preprocess = models.ConvNeXt_Tiny_Weights.DEFAULT.transforms() + + input_data = preprocess(image) + input_data = torch.stack([input_data], dim=0) + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( + + +Use the original model to run an inference +------------------------------------------ + +.. code:: ipython3 + + outputs = model(input_data) + +And print results + +.. code:: ipython3 + + import urllib.request + + + # download class number to class label mapping + imagenet_classes_file_path = "imagenet_2012.txt" + urllib.request.urlretrieve( + url="https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/datasets/imagenet/imagenet_2012.txt", + filename=imagenet_classes_file_path + ) + imagenet_classes = open(imagenet_classes_file_path).read().splitlines() + + + def print_results(outputs: torch.Tensor): + _, predicted_class = outputs.max(1) + predicted_probability = torch.softmax(outputs, dim=1)[0, predicted_class].item() + + print(f"Predicted Class: {predicted_class.item()}") + print(f"Predicted Label: {imagenet_classes[predicted_class.item()]}") + print(f"Predicted Probability: {predicted_probability}") + +.. code:: ipython3 + + print_results(outputs) + + +.. parsed-literal:: + + Predicted Class: 281 + Predicted Label: n02123045 tabby, tabby cat + Predicted Probability: 0.554813802242279 + + +Convert the model to OpenVINO Intermediate representation format +---------------------------------------------------------------- + +OpenVINO supports PyTorch through conversion to OpenVINO Intermediate +Representation (IR) format. To take the advantage of OpenVINO +optimization tools and features, the model should be converted using the +OpenVINO Converter tool (OVC). The ``openvino.convert_model`` function +provides Python API for OVC usage. The function returns the instance of +the OpenVINO Model class, which is ready for use in the Python +interface. However, it can also be saved on disk using +``openvino.save_model`` for future execution. + +.. code:: ipython3 + + from pathlib import Path + + import openvino as ov + + + ov_model_xml_path = Path('models/ov_convnext_model.xml') + + if not ov_model_xml_path.exists(): + ov_model_xml_path.parent.mkdir(parents=True, exist_ok=True) + converted_model = ov.convert_model(model, example_input=torch.randn(1, 3, 224, 224)) + # add transform to OpenVINO preprocessing converting + ov.save_model(converted_model, ov_model_xml_path) + else: + print(f"IR model {ov_model_xml_path} already exists.") + +When the ``openvino.save_model`` function is used, an OpenVINO model is +serialized in the file system as two files with ``.xml`` and ``.bin`` +extensions. This pair of files is called OpenVINO Intermediate +Representation format (OpenVINO IR, or just IR) and useful for efficient +model deployment. OpenVINO IR can be loaded into another application for +inference using the ``openvino.Core.read_model`` function. + +Select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + core = ov.Core() + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + core = ov.Core() + + compiled_model = core.compile_model(ov_model_xml_path, device_name=device.value) + +Use the OpenVINO IR model to run an inference +--------------------------------------------- + +.. code:: ipython3 + + outputs = compiled_model(input_data)[0] + print_results(torch.from_numpy(outputs)) + + +.. parsed-literal:: + + Predicted Class: 281 + Predicted Label: n02123045 tabby, tabby cat + Predicted Probability: 0.6132654547691345 + diff --git a/docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.jpg b/docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.jpg new file mode 100644 index 00000000000000..161db4d6d6bce0 --- /dev/null +++ b/docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4aeb8d18432656f38f690facffbfa90ad3966dc2113a675025d1231a7c5747b +size 63187 diff --git a/docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.png b/docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.png new file mode 100644 index 00000000000000..d86a9f8cfdfb9b --- /dev/null +++ b/docs/notebooks/125-convnext-classification-with-output_files/125-convnext-classification-with-output_4_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c6c291107839fe807c457757e888c4804eb5ae7dc9459c590ccde8eb216fc0 +size 723717 diff --git a/docs/notebooks/125-convnext-classification-with-output_files/index.html b/docs/notebooks/125-convnext-classification-with-output_files/index.html new file mode 100644 index 00000000000000..cafda335280dc6 --- /dev/null +++ b/docs/notebooks/125-convnext-classification-with-output_files/index.html @@ -0,0 +1,8 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/125-convnext-classification-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/125-convnext-classification-with-output_files/


../
+125-convnext-classification-with-output_4_0.jpg    31-Oct-2023 00:35               63187
+125-convnext-classification-with-output_4_0.png    31-Oct-2023 00:35              723717
+

+ diff --git a/docs/notebooks/126-tensorflow-hub-with-output.rst b/docs/notebooks/126-tensorflow-hub-with-output.rst new file mode 100644 index 00000000000000..2a6f3100d82de2 --- /dev/null +++ b/docs/notebooks/126-tensorflow-hub-with-output.rst @@ -0,0 +1,447 @@ +Convert of TensorFlow Hub models to OpenVINO Intermediate Representation (IR) +============================================================================= + +|Colab| |Binder| + +This tutorial demonstrates step-by-step instructions on how to convert +models loaded from TensorFlow Hub using OpenVINO Runtime. + +`TensorFlow Hub `__ is a library and online platform +developed by Google that simplifies machine learning model reuse and +sharing. It serves as a repository of pre-trained models, embeddings, +and reusable components, allowing researchers and developers to access +and integrate state-of-the-art machine learning models into their own +projects with ease. TensorFlow Hub provides a diverse range of models +for various tasks like image classification, text embedding, and more. +It streamlines the process of incorporating these models into TensorFlow +workflows, fostering collaboration and accelerating the development of +AI applications. This centralized hub enhances model accessibility and +promotes the rapid advancement of machine learning capabilities across +the community. + +You have the flexibility to run this tutorial notebook in its entirety +or selectively execute specific sections, as each section operates +independently. + +**Table of contents:** +--- + +- `Image classification <#image-classification>`__ +- `Install required packages <#install-required-packages>`__ +- `Import libraries <#import-libraries>`__ +- `Download the classifier <#download-the-classifier>`__ +- `Download a single image to try the model on <#download-a-single-image-to-try-the-model-on>`__ +- `Convert model to OpenVINO IR <#convert-model-to-openvino-ir>`__ +- `Select inference device <#select-inference-device>`__ +- `Inference <#inference>`__ +- `Image style transfer <#image-style-transfer>`__ +- `Install required packages <#install-required-packages>`__ +- `Load the model <#load-the-model>`__ +- `Convert the model to OpenVINO IR <#convert-the-model-to-openvino-ir>`__ +- `Select inference device <#select-inference-device>`__ +- `Inference <#inference>`__ + +.. |Colab| image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/main/notebooks/126-tensorflow-hub/126-tensorflow-hub.ipynb +.. |Binder| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=notebooks%2F126-tensorflow-hub%2F126-tensorflow-hub.ipynb + +Image classification +-------------------------------------------------------------- + +We will use the `MobileNet_v2 `__ +image classification model from `TensorFlow Hub `__. + +MobileNetV2 is a compact and efficient deep learning architecture +designed for mobile and embedded devices, developed by Google +researchers. It builds on the success of the original MobileNet by +introducing improvements in both speed and accuracy. MobileNetV2 employs +a streamlined architecture with inverted residual blocks, making it +highly efficient for real-time applications while minimizing +computational resources. This network excels in tasks like image +classification, object detection, and image segmentation, offering a +balance between model size and performance. MobileNetV2 has become a +popular choice for on-device AI applications, enabling faster and more +efficient deep learning inference on smartphones and edge devices. + +More information about model can be found on `Model page on TensorFlow +Hub `__ + +Install required packages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + %pip install -q tensorflow_hub tensorflow pillow numpy matplotlib + %pip install -q "openvino==2023.2.0.dev20230922" + + +.. parsed-literal:: + + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 4.24.4 which is incompatible. + tf2onnx 1.15.1 requires protobuf~=3.20.2, but you have protobuf 4.24.4 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + openvino-dev 2023.1.0 requires openvino==2023.1.0, but you have openvino 2023.2.0.dev20230922 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + + +Import libraries +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + from pathlib import Path + import os + from urllib.request import urlretrieve + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" + + import tensorflow_hub as hub + import tensorflow as tf + import PIL + import numpy as np + import matplotlib.pyplot as plt + + import openvino as ov + + tf.get_logger().setLevel("ERROR") + +.. code:: ipython3 + + IMAGE_SHAPE = (224, 224) + IMAGE_URL, IMAGE_PATH = "https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg", "data/grace_hopper.jpg" + MODEL_URL, MODEL_PATH = "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/5", "models/mobilenet_v2_100_224.xml" + +Download the classifier +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Select a MobileNetV2 pre-trained model `from TensorFlow +Hub `__ +and wrap it as a Keras layer with ``hub.KerasLayer``. + +.. code:: ipython3 + + model = hub.KerasLayer(MODEL_URL, input_shape=IMAGE_SHAPE + (3,)) + +Download a single image to try the model on +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The input ``images`` are expected to have color values in the range +[0,1], following the `common image input +conventions `__. +For this model, the size of the input images is fixed to ``height`` x +``width`` = 224 x 224 pixels. + +.. code:: ipython3 + + Path(IMAGE_PATH).parent.mkdir(parents=True, exist_ok=True) + grace_hopper, _ = urlretrieve(IMAGE_URL, IMAGE_PATH) + grace_hopper = PIL.Image.open(grace_hopper).resize(IMAGE_SHAPE) + grace_hopper + + + + +.. image:: 126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.png + + + +Normalize the image to [0,1] range. + +.. code:: ipython3 + + grace_hopper = np.array(grace_hopper) / 255.0 + grace_hopper.shape + + + + +.. parsed-literal:: + + (224, 224, 3) + + + +Convert model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We will convert the loaded model to OpenVINO IR using +``ov.convert_model`` function. We pass the model object to it, no +additional arguments required. Then, we save the model to disk using +``ov.save_model`` function. + +.. code:: ipython3 + + if not Path(MODEL_PATH).exists(): + converted_model = ov.convert_model(model) + ov.save_model(converted_model, MODEL_PATH) + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + compiled_model = core.compile_model(MODEL_PATH, device_name=device.value) + +Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add a batch dimension (with ``np.newaxis``) and pass the image to the +model: + +.. code:: ipython3 + + output = compiled_model(grace_hopper[np.newaxis, ...])[0] + output.shape + + + + +.. parsed-literal:: + + (1, 1001) + + + +The result is a 1001-element vector of logits, rating the probability of +each class for the image. + +The top class ID can be found with ``np.argmax``: + +.. code:: ipython3 + + predicted_class = np.argmax(output[0], axis=-1) + predicted_class + + + + +.. parsed-literal:: + + 653 + + + +Take the ``predicted_class`` ID (such as ``653``) and fetch the ImageNet +dataset labels to decode the predictions: + +.. code:: ipython3 + + labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt') + imagenet_labels = np.array(open(labels_path).read().splitlines()) + plt.imshow(grace_hopper) + plt.axis('off') + predicted_class_name = imagenet_labels[predicted_class] + _ = plt.title("Prediction: " + predicted_class_name.title()) + + + +.. image:: 126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_26_0.png + + +Image style transfer +-------------------------------------------------------------- + +We will use `arbitrary image stylization +model `__ from `TensorFlow +Hub `__. + +The model contains conditional instance normalization (CIN) layers + +The CIN network consists of two main components: a feature extractor and +a stylization module. The feature extractor extracts a set of features +from the content image. The stylization module then uses these features +to generate a stylized image. + +The stylization module is a stack of convolutional layers. Each +convolutional layer is followed by a CIN layer. The CIN layer takes the +features from the previous layer and the CIN parameters from the style +image as input and produces a new set of features as output. + +The output of the stylization module is a stylized image. The stylized +image has the same content as the original content image, but the style +has been transferred from the style image. + +The CIN network is able to stylize images in real time because it is +very efficient. + +More model information can be found on `Model page on TensorFlow +Hub `__. + +Install required packages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + %pip install -q tensorflow tensorflow_hub "opencv-python" numpy matplotlib + %pip install -q "openvino==2023.2.0.dev20230922" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + import os + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" + from urllib.request import urlretrieve + from pathlib import Path + + import openvino as ov + + import tensorflow_hub as hub + import tensorflow as tf + import cv2 + import numpy as np + import matplotlib.pyplot as plt + +.. code:: ipython3 + + CONTENT_IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/2/26/YellowLabradorLooking_new.jpg" + CONTENT_IMAGE_PATH = "./data/YellowLabradorLooking_new.jpg" + + STYLE_IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/b/b4/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg" + STYLE_IMAGE_PATH = "./data/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg" + + MODEL_URL = "https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2" + MODEL_PATH = "./models/arbitrary-image-stylization-v1-256.xml" + +Load the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We load the model from TensorFlow Hub using ``hub.KerasLayer``. Since +the model has multiple inputs (content image and style image), we need +to build it by calling with placeholders and wrap in ``tf.keras.Model`` +function. + +.. code:: ipython3 + + inputs = { + "placeholder": tf.keras.layers.Input(shape=(None, None, 3)), + "placeholder_1": tf.keras.layers.Input(shape=(None, None, 3)), + } + model = hub.KerasLayer(MODEL_URL, signature="serving_default", signature_outputs_as_dict=True) # define the signature to allow passing inputs as a dictionary + outputs = model(inputs) + model = tf.keras.Model(inputs=inputs, outputs=outputs) + +Convert the model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We convert the loaded model to OpenVINO IR using ``ov.convert_model`` +function. We pass our model to the function, no additional arguments +needed. After converting, we save the model to disk using +``ov.save_model`` function. + +.. code:: ipython3 + + if not Path(MODEL_PATH).exists(): + Path(MODEL_PATH).parent.mkdir(parents=True, exist_ok=True) + converted_model = ov.convert_model(model) + ov.save_model(converted_model, MODEL_PATH) + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + compiled_model = core.compile_model(MODEL_PATH, device_name=device.value) + +Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + def download_image(src, dst): + if not Path(dst).exists(): + Path(dst).parent.mkdir(parents=True, exist_ok=True) + urlretrieve(src, dst) + image = cv2.imread(dst) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert image color to RGB space + image = image / 255 # Normalize to [0, 1] interval + image = image.astype(np.float32) + return image + +.. code:: ipython3 + + content_image = download_image(CONTENT_IMAGE_URL, CONTENT_IMAGE_PATH) + style_image = download_image(STYLE_IMAGE_URL, STYLE_IMAGE_PATH) + style_image = cv2.resize(style_image, (256,256)) # model was trained on 256x256 images + +.. code:: ipython3 + + result = compiled_model([content_image[np.newaxis, ...], style_image[np.newaxis, ...]])[0] + +.. code:: ipython3 + + title2img = { + "Source image": content_image, + "Reference style": style_image, + "Result": result[0], + } + plt.figure(figsize=(12, 12)) + for i, (title, img) in enumerate(title2img.items()): + ax = plt.subplot(1, 3, i + 1) + ax.set_title(title) + plt.imshow(img) + plt.axis("off") + + + +.. image:: 126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_45_0.png + diff --git a/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.jpg b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.jpg new file mode 100644 index 00000000000000..586d32b4520f7b --- /dev/null +++ b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:835d5d1db7220a784e62a9a45d4bf138a0ef3ff8c5e735edb665a39ffd249cd0 +size 10479 diff --git a/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.png b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.png new file mode 100644 index 00000000000000..4c831c921e04da --- /dev/null +++ b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_11_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae8ae23cf544122970c0bf2b1e065fb9677943c9553059e85f5b98a217bdcd8 +size 92843 diff --git a/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_26_0.png b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_26_0.png new file mode 100644 index 00000000000000..7ed328b470ce0f --- /dev/null +++ b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_26_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0078b72e3696c4265712121725b2db212016519c0e609c0ffafdd98e1dc970 +size 203738 diff --git a/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_45_0.png b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_45_0.png new file mode 100644 index 00000000000000..90d22510a83fc0 --- /dev/null +++ b/docs/notebooks/126-tensorflow-hub-with-output_files/126-tensorflow-hub-with-output_45_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6266c3c305c2278d74aa8d9e46c57f6c7d62dfacb92d2d616fc5e00e4cce6c +size 538743 diff --git a/docs/notebooks/126-tensorflow-hub-with-output_files/index.html b/docs/notebooks/126-tensorflow-hub-with-output_files/index.html new file mode 100644 index 00000000000000..5bc07f4507b316 --- /dev/null +++ b/docs/notebooks/126-tensorflow-hub-with-output_files/index.html @@ -0,0 +1,10 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/126-tensorflow-hub-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/126-tensorflow-hub-with-output_files/


../
+126-tensorflow-hub-with-output_11_0.jpg            31-Oct-2023 00:35               10479
+126-tensorflow-hub-with-output_11_0.png            31-Oct-2023 00:35               92843
+126-tensorflow-hub-with-output_26_0.png            31-Oct-2023 00:35              203738
+126-tensorflow-hub-with-output_45_0.png            31-Oct-2023 00:35              538743
+

+ diff --git a/docs/notebooks/201-vision-monodepth-with-output.rst b/docs/notebooks/201-vision-monodepth-with-output.rst index e6e3b3f65561eb..bd1172c37c0cec 100644 --- a/docs/notebooks/201-vision-monodepth-with-output.rst +++ b/docs/notebooks/201-vision-monodepth-with-output.rst @@ -11,7 +11,7 @@ OpenVINO. Model information can be found monodepth What is Monodepth? -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +~~~~~~~~~~~~~~~~~~ Monocular Depth Estimation is the task of estimating scene depth using a single image. It has many potential applications in robotics, 3D @@ -30,38 +30,41 @@ Transactions on Pattern Analysis and Machine Intelligence, doi: **Table of contents:** -- `Preparation <#preparation>`__ - - `Install requirements <#install-requirements>`__ - - `Imports <#imports>`__ - - `Download the model <#download-the-model>`__ +- `Preparation <#preparation>`__ -- `Functions <#functions>`__ -- `Select inference device <#select-inference-device>`__ -- `Load the Model <#load-the-model>`__ -- `Monodepth on Image <#monodepth-on-image>`__ + - `Install requirements <#install-requirements>`__ + - `Imports <#imports>`__ + - `Download the model <#download-the-model>`__ - - `Load, resize and reshape input image <#load-resize-and-reshape-input-image>`__ - - `Do inference on the image <#do-inference-on-the-image>`__ - - `Display monodepth image <#display-monodepth-image>`__ +- `Functions <#functions>`__ +- `Select inference device <#select-inference-device>`__ +- `Load the Model <#load-the-model>`__ +- `Monodepth on Image <#monodepth-on-image>`__ -- `Monodepth on Video <#monodepth-on-video>`__ + - `Load, resize and reshape input + image <#load-resize-and-reshape-input-image>`__ + - `Do inference on the image <#do-inference-on-the-image>`__ + - `Display monodepth image <#display-monodepth-image>`__ - - `Video Settings <#video-settings>`__ - - `Load the Video <#load-the-video>`__ - - `Do Inference on a Video and Create Monodepth Video <#do-inference-on-a-video-and-create-monodepth-video>`__ - - `Display Monodepth Video <#display-monodepth-video>`__ +- `Monodepth on Video <#monodepth-on-video>`__ -Preparation -############################################################################################################################### + - `Video Settings <#video-settings>`__ + - `Load the Video <#load-the-video>`__ + - `Do Inference on a Video and Create Monodepth + Video <#do-inference-on-a-video-and-create-monodepth-video>`__ + - `Display Monodepth Video <#display-monodepth-video>`__ -Install requirements -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q matplotlib opencv-python requests tqdm + %pip install -q "openvino>=2023.1.0" + %pip install -q matplotlib opencv-python requests tqdm # Fetch `notebook_utils` module import urllib.request @@ -71,16 +74,22 @@ Install requirements ) +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + .. parsed-literal:: - ('notebook_utils.py', ) + ('notebook_utils.py', ) -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -104,8 +113,8 @@ Imports from notebook_utils import download_file, load_image -Download the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -133,8 +142,8 @@ Download the model model/MiDaS_small.bin: 0%| | 0.00/31.6M [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/201-vision-monodepth-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/201-vision-monodepth-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/201-vision-monodepth-with-output_files/


../
-201-vision-monodepth-with-output_18_0.png          16-Aug-2023 01:31              959858
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/201-vision-monodepth-with-output_files/


../
+201-vision-monodepth-with-output_18_0.png          31-Oct-2023 00:35              959858
 

diff --git a/docs/notebooks/202-vision-superresolution-image-with-output.rst b/docs/notebooks/202-vision-superresolution-image-with-output.rst index ec5de075ca5038..ae8261674fb22f 100644 --- a/docs/notebooks/202-vision-superresolution-image-with-output.rst +++ b/docs/notebooks/202-vision-superresolution-image-with-output.rst @@ -16,49 +16,68 @@ pp. 2777-2784, doi: 10.1109/ICPR.2018.8545760. **Table of contents:** -- `Preparation <#preparation>`__ - - `Install requirements <#install-requirements>`__ - - `Imports <#imports>`__ - - `Settings <#settings>`__ +- `Preparation <#preparation>`__ - - `Select inference device <#select-inference-device>`__ + - `Install requirements <#install-requirements>`__ + - `Imports <#imports>`__ + - `Settings <#settings>`__ - - `Functions <#functions>`__ + - `Select inference device <#select-inference-device>`__ -- `Load the Superresolution Model <#load-the-superresolution-model>`__ -- `Load and Show the Input Image <#load-and-show-the-input-image>`__ -- `Superresolution on a Crop of the Image <#superresolution-on-a-crop-of-the-image>`__ + - `Functions <#functions>`__ - - `Crop the Input Image once. <#crop-the-input-image-once>`__ - - `Reshape/Resize Crop for Model Input <#reshape-resize-crop-for-model-input>`__ - - `Do Inference <#do-inference>`__ - - `Show and Save Results <#show-and-save-results>`__ +- `Load the Superresolution + Model <#load-the-superresolution-model>`__ +- `Load and Show the Input + Image <#load-and-show-the-input-image>`__ +- `Superresolution on a Crop of the + Image <#superresolution-on-a-crop-of-the-image>`__ - - `Save Superresolution and Bicubic Image Crop <#save-superresolution-and-bicubic-image-crop>`__ - - `Write Animated GIF with Bicubic/Superresolution Comparison <#write-animated-gif-with-bicubic-superresolution-comparison>`__ - - `Create a Video with Sliding Bicubic/Superresolution Comparison <#create-a-video-with-sliding-bicubic-superresolution-comparison>`__ + - `Crop the Input Image + once. <#crop-the-input-image-once>`__ + - `Reshape/Resize Crop for Model + Input <#reshaperesize-crop-for-model-input>`__ + - `Do Inference <#do-inference>`__ + - `Show and Save Results <#show-and-save-results>`__ -- `Superresolution on full input image <#superresolution-on-full-input-image>`__ + - `Save Superresolution and Bicubic Image + Crop <#save-superresolution-and-bicubic-image-crop>`__ + - `Write Animated GIF with Bicubic/Superresolution + Comparison <#write-animated-gif-with-bicubicsuperresolution-comparison>`__ + - `Create a Video with Sliding Bicubic/Superresolution + Comparison <#create-a-video-with-sliding-bicubicsuperresolution-comparison>`__ - - `Compute patches <#compute-patches>`__ - - `Do Inference <#do-inference>`__ - - `Save superresolution image and the bicubic image <#save-superresolution-image-and-the-bicubic-image>`__ +- `Superresolution on full input + image <#superresolution-on-full-input-image>`__ -Preparation -############################################################################################################################### + - `Compute patches <#compute-patches>`__ + - `Do Inference <#do-inference>`__ + - `Save superresolution image and the bicubic + image <#save-superresolution-image-and-the-bicubic-image>`__ -Install requirements -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q opencv-python - !pip install -q pillow matplotlib + %pip install -q "openvino>=2023.1.0" + %pip install -q opencv-python + %pip install -q pillow matplotlib + + +.. parsed-literal:: -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -84,13 +103,13 @@ Imports path.parent.mkdir(parents=True, exist_ok=True) urllib.request.urlretrieve(url, path) -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select inference device -------------------------------------------------------------------------------------------------------------------------------- +Select inference device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -138,8 +157,8 @@ Select device from dropdown list for running inference using OpenVINO: else: print(f'{model_name} already downloaded to {base_model_dir}') -Functions -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -199,8 +218,8 @@ Functions """ return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB) -Load the Superresolution Model -############################################################################################################################### +Load the Superresolution Model +------------------------------------------------------------------------ The Super Resolution model expects two inputs: the input image and a bicubic interpolation of the input image to the target size of @@ -247,12 +266,10 @@ information about the network inputs and outputs. The image sides are upsampled by a factor of 4. The new image is 16 times as large as the original image -Load and Show the Input Image -############################################################################################################################### - -.. note:: +Load and Show the Input Image +----------------------------------------------------------------------- - For the best results, use raw images (like ``TIFF``, + **NOTE**: For the best results, use raw images (like ``TIFF``, ``BMP`` or ``PNG``). Compressed images (like ``JPEG``) may appear distorted after processing with the super resolution model. @@ -284,11 +301,11 @@ Load and Show the Input Image .. image:: 202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_15_1.png -Superresolution on a Crop of the Image -############################################################################################################################### +Superresolution on a Crop of the Image +-------------------------------------------------------------------------------- -Crop the Input Image once. -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Crop the Input Image once. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Crop the network input size. Give the X (width) and Y (height) coordinates for the top left corner of the crop. Set the ``CROP_FACTOR`` @@ -336,8 +353,8 @@ as the crop size. .. image:: 202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_17_1.png -Reshape/Resize Crop for Model Input -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Reshape/Resize Crop for Model Input +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The input image is resized to a network input size, and reshaped to (N,C,H,W) (N=number of images, C=number of channels, H=height, W=width). @@ -359,8 +376,8 @@ interpolation. This bicubic image is the second input to the network. input_image_original = np.expand_dims(image_crop.transpose(2, 0, 1), axis=0) input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0) -Do Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Do inference and convert the inference result to an ``RGB`` image. @@ -376,8 +393,8 @@ Do inference and convert the inference result to an ``RGB`` image. # Get inference result as numpy array and reshape to image shape and data type result_image = convert_result_to_image(result) -Show and Save Results -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Show and Save Results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Show the bicubic image and the enhanced superresolution image. @@ -402,8 +419,8 @@ Show the bicubic image and the enhanced superresolution image. .. image:: 202-vision-superresolution-image-with-output_files/202-vision-superresolution-image-with-output_23_1.png -Save Superresolution and Bicubic Image Crop -------------------------------------------------------------------------------------------------------------------------------- +Save Superresolution and Bicubic Image Crop +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -434,8 +451,8 @@ Save Superresolution and Bicubic Image Crop Images written to directory: output -Write Animated GIF with Bicubic/Superresolution Comparison -------------------------------------------------------------------------------------------------------------------------------- +Write Animated GIF with Bicubic/Superresolution Comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -472,8 +489,8 @@ Write Animated GIF with Bicubic/Superresolution Comparison -Create a Video with Sliding Bicubic/Superresolution Comparison -------------------------------------------------------------------------------------------------------------------------------- +Create a Video with Sliding Bicubic/Superresolution Comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This may take a while. For the video, the superresolution and bicubic image are resized by a factor of 2 to improve processing speed. This @@ -540,8 +557,8 @@ the ``Files`` tool. The video has been saved to output/flag_crop_comparison_2x.avi
-Superresolution on full input image -############################################################################################################################### +Superresolution on full input image +----------------------------------------------------------------------------- Superresolution on the full image is done by dividing the image into patches of equal size, doing superresolution on each path, and then @@ -551,8 +568,8 @@ near the border of the image are ignored. Adjust the ``CROPLINES`` setting in the next cell if you see boundary effects. -Compute patches -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compute patches +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -596,8 +613,8 @@ Compute patches The output image will have a width of 11280 and a height of 7280 -Do Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The code below reads one patch of the image at a time. Each patch is reshaped to the network input shape and upsampled with bicubic @@ -714,12 +731,12 @@ as total time to process each patch. .. parsed-literal:: - Processed 42 patches in 4.76 seconds. Total patches per second (including processing): 8.82. - Inference patches per second: 17.20 + Processed 42 patches in 4.68 seconds. Total patches per second (including processing): 8.97. + Inference patches per second: 17.57 -Save superresolution image and the bicubic image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Save superresolution image and the bicubic image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 diff --git a/docs/notebooks/202-vision-superresolution-image-with-output_files/index.html b/docs/notebooks/202-vision-superresolution-image-with-output_files/index.html index 321ed65740ab8f..cdd097d584c23b 100644 --- a/docs/notebooks/202-vision-superresolution-image-with-output_files/index.html +++ b/docs/notebooks/202-vision-superresolution-image-with-output_files/index.html @@ -1,10 +1,10 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/202-vision-superresolution-image-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/202-vision-superresolution-image-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/202-vision-superresolution-image-with-output_files/


../
-202-vision-superresolution-image-with-output_15..> 16-Aug-2023 01:31              272963
-202-vision-superresolution-image-with-output_17..> 16-Aug-2023 01:31              356735
-202-vision-superresolution-image-with-output_23..> 16-Aug-2023 01:31             2896276
-202-vision-superresolution-image-with-output_27..> 16-Aug-2023 01:31             3207711
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/202-vision-superresolution-image-with-output_files/


../
+202-vision-superresolution-image-with-output_15..> 31-Oct-2023 00:35              272963
+202-vision-superresolution-image-with-output_17..> 31-Oct-2023 00:35              356735
+202-vision-superresolution-image-with-output_23..> 31-Oct-2023 00:35             2896276
+202-vision-superresolution-image-with-output_27..> 31-Oct-2023 00:35             3207711
 

diff --git a/docs/notebooks/202-vision-superresolution-video-with-output.rst b/docs/notebooks/202-vision-superresolution-video-with-output.rst index b78b51398fadad..bf00da7c4390ff 100644 --- a/docs/notebooks/202-vision-superresolution-video-with-output.rst +++ b/docs/notebooks/202-vision-superresolution-video-with-output.rst @@ -14,46 +14,56 @@ Resolution,” `__ 2018 24th International Conference on Pattern Recognition (ICPR), 2018, pp. 2777-2784, doi: 10.1109/ICPR.2018.8545760. -.. note:: - - The Single Image Super Resolution (SISR) model used in this + **NOTE**: The Single Image Super Resolution (SISR) model used in this demo is not optimized for a video. Results may vary depending on the video. **Table of contents:** -- `Preparation <#preparation>`__ - - `Install requirements <#install-requirements>`__ - - `Imports <#imports>`__ - - `Settings <#settings>`__ +- `Preparation <#preparation>`__ + + - `Install requirements <#install-requirements>`__ + - `Imports <#imports>`__ + - `Settings <#settings>`__ - - `Select inference device <#select-inference-device>`__ + - `Select inference device <#select-inference-device>`__ - - `Functions <#functions>`__ + - `Functions <#functions>`__ -- `Load the Superresolution Model <#load-the-superresolution-model>`__ -- `Superresolution on Video <#superresolution-on-video>`__ +- `Load the Superresolution + Model <#load-the-superresolution-model>`__ +- `Superresolution on Video <#superresolution-on-video>`__ - - `Settings <#settings>`__ - - `Download and Prepare Video <#download-and-prepare-video>`__ - - `Do Inference <#do-inference>`__ - - `Show Side-by-Side Video of Bicubic and Superresolution Version <#show-side-by-side-video-of-bicubic-and-superresolution-version>`__ + - `Settings <#settings>`__ + - `Download and Prepare + Video <#download-and-prepare-video>`__ + - `Do Inference <#do-inference>`__ + - `Show Side-by-Side Video of Bicubic and Superresolution + Version <#show-side-by-side-video-of-bicubic-and-superresolution-version>`__ -Preparation -############################################################################################################################### +Preparation +----------------------------------------------------- -Install requirements -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q opencv-python - !pip install -q "pytube>=12.1.0" + %pip install -q "openvino>=2023.1.0" + %pip install -q opencv-python + %pip install -q "pytube>=12.1.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -83,13 +93,13 @@ Imports path.parent.mkdir(parents=True, exist_ok=True) urllib.request.urlretrieve(url, path) -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select inference device -------------------------------------------------------------------------------------------------------------------------------- +Select inference device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -143,8 +153,8 @@ Select device from dropdown list for running inference using OpenVINO: single-image-super-resolution-1032 already downloaded to model -Functions -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -162,8 +172,8 @@ Functions result = result.astype(np.uint8) return result -Load the Superresolution Model -############################################################################################################################### +Load the Superresolution Model +------------------------------------------------------------------------ Load the model in OpenVINO Runtime with ``core.read_model`` and compile it for the specified device with ``core.compile_model``. @@ -211,8 +221,8 @@ resolution version of the image in 1920x1080. The image sides are upsampled by a factor of 4. The new image is 16 times as large as the original image -Superresolution on Video -############################################################################################################################### +Superresolution on Video +------------------------------------------------------------------ Download a YouTube video with ``PyTube`` and enhance the video quality with superresolution. @@ -220,14 +230,12 @@ with superresolution. By default, only the first 100 frames of the video are processed. Change ``NUM_FRAMES`` in the cell below to modify this. -.. note:: - - The resulting video does not contain audio. The input video + **NOTE**: The resulting video does not contain audio. The input video should be a landscape video and have an input resolution of 360p (640x360) for the 1032 model, or 480p (720x480) for the 1033 model. -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -240,8 +248,8 @@ Settings # If you have FFMPEG installed, you can change FOURCC to `*"THEO"` to improve video writing speed. FOURCC = cv2.VideoWriter_fourcc(*"vp09") -Download and Prepare Video -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download and Prepare Video +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -327,8 +335,8 @@ the superresolution side by side. frameSize=(target_width * 2, target_height), ) -Do Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Read video frames and enhance them with superresolution. Save the superresolution video, the bicubic video and the comparison video to a @@ -444,17 +452,17 @@ video. .. parsed-literal:: - Processed frame 100. Inference time: 0.05 seconds (19.34 FPS) + Processed frame 100. Inference time: 0.05 seconds (19.45 FPS) .. parsed-literal:: Video's saved to output directory. - Processed 100 frames in 235.00 seconds. Total FPS (including video processing): 0.43. Inference FPS: 17.29. + Processed 100 frames in 235.05 seconds. Total FPS (including video processing): 0.43. Inference FPS: 18.35. -Show Side-by-Side Video of Bicubic and Superresolution Version -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Show Side-by-Side Video of Bicubic and Superresolution Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 diff --git a/docs/notebooks/203-meter-reader-with-output.rst b/docs/notebooks/203-meter-reader-with-output.rst index cf8fa5961634f5..5dbc17fc108e2c 100644 --- a/docs/notebooks/203-meter-reader-with-output.rst +++ b/docs/notebooks/203-meter-reader-with-output.rst @@ -21,26 +21,38 @@ to build up a multiple inference task pipeline: **Table of contents:** -- `Import <#import>`__ -- `Prepare the Model and Test Image <#prepare-the-model-and-test-image>`__ -- `Configuration <#configuration>`__ -- `Load the Models <#load-the-models>`__ -- `Data Process <#data-process>`__ -- `Main Function <#main-function>`__ - - - `Initialize the model and parameters. <#initialize-the-model-and-parameters>`__ - - `Run meter detection model <#run-meter-detection-model>`__ - - `Run meter segmentation model <#run-meter-segmentation-model>`__ - - `Postprocess the models result and calculate the final readings <#postprocess-the-models-result-and-calculate-the-final-readings>`__ - - `Get the reading result on the meter picture <#get-the-reading-result-on-the-meter-picture>`__ + +- `Import <#import>`__ +- `Prepare the Model and Test + Image <#prepare-the-model-and-test-image>`__ +- `Configuration <#configuration>`__ +- `Load the Models <#load-the-models>`__ +- `Data Process <#data-process>`__ +- `Main Function <#main-function>`__ + + - `Initialize the model and + parameters. <#initialize-the-model-and-parameters>`__ + - `Run meter detection model <#run-meter-detection-model>`__ + - `Run meter segmentation + model <#run-meter-segmentation-model>`__ + - `Postprocess the models result and calculate the final + readings <#postprocess-the-models-result-and-calculate-the-final-readings>`__ + - `Get the reading result on the meter + picture <#get-the-reading-result-on-the-meter-picture>`__ .. code:: ipython3 # Install openvino package - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" matplotlib + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + -Import -############################################################################################################################### +Import +------------------------------------------------ .. code:: ipython3 @@ -57,8 +69,8 @@ Import sys.path.append("../utils") from notebook_utils import download_file, segmentation_map_to_image -Prepare the Model and Test Image -############################################################################################################################### +Prepare the Model and Test Image +-------------------------------------------------------------------------- Download PPYOLOv2 and DeepLabV3P pre-trained models from PaddlePaddle community. @@ -133,8 +145,8 @@ community. Test Image Saved to "./data". -Configuration -############################################################################################################################### +Configuration +------------------------------------------------------- Add parameter configuration for reading calculation. @@ -162,8 +174,8 @@ Add parameter configuration for reading calculation. SEG_LABEL = {'background': 0, 'pointer': 1, 'scale': 2} -Load the Models -############################################################################################################################### +Load the Models +--------------------------------------------------------- Define a common class for model loading and inference @@ -205,8 +217,8 @@ Define a common class for model loading and inference result = self.compiled_model(input_image)[self.output_layer] return result -Data Process -############################################################################################################################### +Data Process +------------------------------------------------------ Including the preprocessing and postprocessing tasks of each model. @@ -535,13 +547,13 @@ Including the preprocessing and postprocessing tasks of each model. readings.append(reading) return readings -Main Function -############################################################################################################################### +Main Function +------------------------------------------------------- -Initialize the model and parameters. -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Initialize the model and parameters. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -601,7 +613,7 @@ bounds of input batch size. .. parsed-literal:: - + @@ -609,8 +621,8 @@ bounds of input batch size. .. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_16_1.png -Run meter detection model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run meter detection model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Detect the location of the meter and prepare the ROI images for segmentation. @@ -654,8 +666,8 @@ segmentation. .. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_18_1.png -Run meter segmentation model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run meter segmentation model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Get the results of segmentation task on detected ROI. @@ -694,8 +706,8 @@ Get the results of segmentation task on detected ROI. .. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_20_1.png -Postprocess the models result and calculate the final readings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Postprocess the models result and calculate the final readings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use OpenCV function to find the location of the pointer in a scale map. @@ -731,8 +743,8 @@ Use OpenCV function to find the location of the pointer in a scale map. .. image:: 203-meter-reader-with-output_files/203-meter-reader-with-output_22_1.png -Get the reading result on the meter picture -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Get the reading result on the meter picture +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -763,4 +775,4 @@ Get the reading result on the meter picture Try it with your meter photos! -############################################################################################################################### +------------------------------ diff --git a/docs/notebooks/203-meter-reader-with-output_files/index.html b/docs/notebooks/203-meter-reader-with-output_files/index.html index d2e984d5de65f5..7357da586d265d 100644 --- a/docs/notebooks/203-meter-reader-with-output_files/index.html +++ b/docs/notebooks/203-meter-reader-with-output_files/index.html @@ -1,11 +1,11 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/203-meter-reader-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/203-meter-reader-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/203-meter-reader-with-output_files/


../
-203-meter-reader-with-output_15_1.png              16-Aug-2023 01:31              170121
-203-meter-reader-with-output_17_1.png              16-Aug-2023 01:31              190271
-203-meter-reader-with-output_19_1.png              16-Aug-2023 01:31               26914
-203-meter-reader-with-output_21_1.png              16-Aug-2023 01:31                8966
-203-meter-reader-with-output_23_1.png              16-Aug-2023 01:31              170338
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/203-meter-reader-with-output_files/


../
+203-meter-reader-with-output_16_1.png              31-Oct-2023 00:35              170121
+203-meter-reader-with-output_18_1.png              31-Oct-2023 00:35              190271
+203-meter-reader-with-output_20_1.png              31-Oct-2023 00:35               26914
+203-meter-reader-with-output_22_1.png              31-Oct-2023 00:35                8966
+203-meter-reader-with-output_24_1.png              31-Oct-2023 00:35              170338
 

diff --git a/docs/notebooks/204-segmenter-semantic-segmentation-with-output.rst b/docs/notebooks/204-segmenter-semantic-segmentation-with-output.rst index b2c11965c9bdeb..3a53e7b87d63fe 100644 --- a/docs/notebooks/204-segmenter-semantic-segmentation-with-output.rst +++ b/docs/notebooks/204-segmenter-semantic-segmentation-with-output.rst @@ -24,28 +24,23 @@ Segmenter `__. More about the model and its details can be found in the following paper: `Segmenter: Transformer for Semantic Segmentation `__ or in the -`repository `__. - -**Table of contents:** - -- `Get and prepare PyTorch model <#get-and-prepare-pytorch-model>`__ - - - `Prerequisites <#prerequisites>`__ - - `Loading PyTorch model <#loading-pytorch-model>`__ - -- `Preparing preprocessing and visualization functions <#preparing-preprocessing-and-visualization-functions>`__ - - - `Preprocessing <#preprocessing>`__ - - `Visualization <#visualization>`__ - -- `Validation of inference of original model <#validation-of-inference-of-original-model>`__ -- `Export to ONNX <#export-to-onnx>`__ -- `Convert ONNX model to OpenVINO Intermediate Representation (IR) <#convert-onnx-model-to-openvino-intermediate-representation-ir>`__ -- `Verify converted model inference <#verify-converted-model-inference>`__ - - - `Select inference device <#select-inference-device>`__ - -- `Benchmarking performance of converted model <#benchmarking-performance-of-converted-model>`__ +`repository `__. #### Table of +content: - `Get and prepare PyTorch +model <#get-and-prepare-pytorch-model>`__ - +`Prerequisites <#prerequisites>`__ - `Loading PyTorch +model <#loading-pytorch-model>`__ - `Preparing preprocessing and +visualization +functions <#preparing-preprocessing-and-visualization-functions>`__ +- `Preprocessing <#preprocessing>`__ - +`Visualization <#visualization>`__ - `Validation of inference of +original model <#validation-of-inference-of-original-model>`__ - +`Convert PyTorch model to OpenVINO Intermediate Representation +(IR) <#convert-pytorch-model-to-openvino-intermediate-representation-ir>`__ +- `Verify converted model +inference <#verify-converted-model-inference>`__ - `Select +inference device <#select-inference-device>`__ - `Benchmarking +performance of converted +model <#benchmarking-performance-of-converted-model>`__ .. |Segmenteer diagram| image:: https://user-images.githubusercontent.com/24582831/148507554-87eb80bd-02c7-4c31-b102-c6141e231ec8.png @@ -55,13 +50,12 @@ notebook consists of the following steps: - Preparing PyTorch Segmenter model - Preparing preprocessing and visualization functions - Validating inference of original model -- Converting PyTorch model to ONNX -- Converting ONNX to OpenVINO IR +- Converting PyTorch model to OpenVINO IR - Validating inference of the converted model - Benchmark performance of the converted model -Get and prepare PyTorch model -############################################################################################################################### +Get and prepare PyTorch model +----------------------------------------------------------------------- The first thing we’ll need to do is clone `repository `__ containing model @@ -75,21 +69,23 @@ The code from the repository already contains functions that create model and load weights, but we will need to download config and trained weights (checkpoint) file and add some additional helper functions. -Prerequisites -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Prerequisites +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 # Installing requirements - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q timm "mmsegmentation==0.30.0" einops "mmcv==1.7.1" "timm == 0.4.12" onnx + %pip install -q "openvino>=2023.1.0" + %pip install -q timm "mmsegmentation==0.30.0" einops "mmcv==1.7.1" "timm == 0.4.12" .. parsed-literal:: + Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. black 21.7b0 requires tomli<2.0.0,>=0.2.6, but you have tomli 2.0.1 which is incompatible. - + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -130,7 +126,7 @@ config for our model. Cloning into 'segmenter'... remote: Enumerating objects: 268, done. remote: Total 268 (delta 0), reused 0 (delta 0), pack-reused 268 - Receiving objects: 100% (268/268), 15.34 MiB | 3.91 MiB/s, done. + Receiving objects: 100% (268/268), 15.34 MiB | 3.50 MiB/s, done. Resolving deltas: 100% (117/117), done. @@ -163,11 +159,11 @@ config for our model. model/variant.yml: 0%| | 0.00/940 [00:00`__ +`torch.nn.Module `__ class, initialized by a state dictionary containing model weights. Typical steps to get the model are therefore: @@ -208,18 +204,18 @@ Load normalization settings from config file. .. parsed-literal:: No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mmcv/__init__.py:20: UserWarning: On January 1, 2023, MMCV will release v2.0.0, in which it will remove components related to the training process and add a data transformation module. In addition, it will rename the package names mmcv to mmcv-lite and mmcv-full to mmcv. See https://github.com/open-mmlab/mmcv/blob/master/docs/en/compatibility.md for more details. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mmcv/__init__.py:20: UserWarning: On January 1, 2023, MMCV will release v2.0.0, in which it will remove components related to the training process and add a data transformation module. In addition, it will rename the package names mmcv to mmcv-lite and mmcv-full to mmcv. See https://github.com/open-mmlab/mmcv/blob/master/docs/en/compatibility.md for more details. warnings.warn( -Preparing preprocessing and visualization functions -############################################################################################################################### +Preparing preprocessing and visualization functions +--------------------------------------------------------------------------------------------- Now we will define utility functions for preprocessing and visualizing the results. -Preprocessing -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Inference input is tensor with shape ``[1, 3, H, W]`` in ``B, C, H, W`` format, where: @@ -263,8 +259,8 @@ normalized with given mean and standard deviation provided in return im -Visualization -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Visualization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Inference output contains labels assigned to each pixel, so the output in our case is ``[150, H, W]`` in ``CL, H, W`` format where: @@ -307,8 +303,8 @@ corresponding to the inferred labels. return pil_blend -Validation of inference of original model -############################################################################################################################### +Validation of inference of original model +----------------------------------------------------------------------------------- Now that we have everything ready, we can perform segmentation on example image ``coco_hollywood.jpg``. @@ -359,11 +355,11 @@ We can see that model segments the image into meaningful parts. Since we are using tiny variant of model, the result is not as good as it is with larger models, but it already shows nice segmentation performance. -Export to ONNX -############################################################################################################################### +Convert PyTorch model to OpenVINO Intermediate Representation (IR) +------------------------------------------------------------------------------------------------------------ Now that we’ve verified that the inference of PyTorch model works, we -will first export it to ONNX format. +will convert it to OpenVINO IR format. To do this, we first get input dimensions from the model configuration file and create torch dummy input. Input dimensions are in our case @@ -374,17 +370,21 @@ file and create torch dummy input. Input dimensions are in our case - ``H`` - model input image height - ``W`` - model input image width -Note that H and W are here fixed to 512, as this is required by the -model. Resizing is done inside the inference function from the -original repository. +.. + + Note that H and W are here fixed to 512, as this is required by the + model. Resizing is done inside the inference function from the + original repository. -After that, we use ``export`` function from PyTorch to convert the model -to ONNX. The process can generate some warnings, but they are not a -problem. +After that, we use ``ov.convert_model`` function from PyTorch to convert +the model to OpenVINO model, which is ready to use in Python interface +but can also be serialized to OpenVINO IR format for future execution +using ``ov.save_model``. The process can generate some warnings, but +they are not a problem. .. code:: ipython3 - import torch.onnx + import openvino as ov # get input sizes from config file batch_size = 2 @@ -394,63 +394,29 @@ problem. # make dummy input with correct shapes obtained from config file dummy_input = torch.randn(batch_size, channels, image_size, image_size) - onnx_path = MODEL_DIR / "segmenter.onnx" - - # export to onnx format - torch.onnx.export(pytorch_model, - dummy_input, - onnx_path, - input_names=["input"], - output_names=["output"]) - - # if we wanted dynamic batch size (sometimes required by infer function) we could add additional parameter - # dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}} + model = ov.convert_model(pytorch_model, example_input=dummy_input, input=([batch_size, channels, image_size, image_size], )) + # serialize model for saving IR + ov.save_model(model, MODEL_DIR / "segmenter.xml") .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:69: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:69: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if H % patch_size > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:71: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:71: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if W % patch_size > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/vit.py:122: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/vit.py:122: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x.shape[1] != pos_embed.shape[1]: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/decoder.py:100: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/decoder.py:100: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! masks = rearrange(masks, "b (h w) n -> b n h w", h=int(GS)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if extra_h > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/204-segmenter-semantic-segmentation/./segmenter/segm/model/utils.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if extra_w > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - - -Convert ONNX model to OpenVINO Intermediate Representation (IR) -############################################################################################################################### - -While ONNX models are directly supported by OpenVINO runtime, it can be -useful to convert them to IR format to take advantage of OpenVINO -optimization tools and features. The ``ov.convert_model`` function of -`model conversion -API `__ -can be used. The function returns instance of OpenVINO Model class, -which is ready to use in Python interface but can also be serialized to -OpenVINO IR format for future execution. -.. code:: ipython3 - import openvino as ov - - model = ov.convert_model(str(MODEL_DIR / "segmenter.onnx")) - # serialize model for saving IR - ov.save_model(model, str(MODEL_DIR / "segmenter.xml")) - -Verify converted model inference -############################################################################################################################### +Verify converted model inference +-------------------------------------------------------------------------- To test that model was successfully converted, we can use same inference function from original repository, but we need to make custom class. @@ -517,10 +483,10 @@ any additional custom code required to process input. Now that we have created ``SegmenterOV`` helper class, we can use it in inference function. -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -572,33 +538,31 @@ Select device from dropdown list for running inference using OpenVINO: -.. image:: 204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_34_0.png +.. image:: 204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_32_0.png As we can see, we get the same results as with original model. -Benchmarking performance of converted model -############################################################################################################################### +Benchmarking performance of converted model +------------------------------------------------------------------------------------- Finally, use the OpenVINO `Benchmark Tool `__ to measure the inference performance of the model. -.. note:: - - For more accurate performance, it is recommended to run + NOTE: For more accurate performance, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on CPU for one minute. Change ``CPU`` to ``GPU`` to benchmark on GPU. Run ``benchmark_app --help`` to see an overview of all command-line options. +.. - -Keep in mind that the authors of original paper used V100 GPU, which -is significantly more powerful than the CPU used to obtain the -following throughput. Therefore, FPS can’t be compared directly. + Keep in mind that the authors of original paper used V100 GPU, which + is significantly more powerful than the CPU used to obtain the + following throughput. Therefore, FPS can’t be compared directly. .. code:: ipython3 @@ -621,5 +585,77 @@ following throughput. Therefore, FPS can’t be compared directly. .. parsed-literal:: - /bin/bash: benchmark_app: command not found + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ WARNING ] Default duration 120 seconds is used for unknown device AUTO + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.2.0-12538-e7c1344d3c3 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2023.2.0-12538-e7c1344d3c3 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 24.01 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] im (node: im) : f32 / [...] / [2,3,512,512] + [ INFO ] Model outputs: + [ INFO ] y (node: aten::upsample_bilinear2d/Interpolate) : f32 / [...] / [2,150,512,512] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 2 + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] im (node: im) : u8 / [N,C,H,W] / [2,3,512,512] + [ INFO ] Model outputs: + [ INFO ] y (node: aten::upsample_bilinear2d/Interpolate) : f32 / [...] / [2,150,512,512] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 387.83 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] NETWORK_NAME: Model0 + [ INFO ] NUM_STREAMS: 6 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'im'!. This input will be filled with random values! + [ INFO ] Fill input 'im' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 208.27 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 1392 iterations + [ INFO ] Duration: 120914.63 ms + [ INFO ] Latency: + [ INFO ] Median: 520.24 ms + [ INFO ] Average: 520.33 ms + [ INFO ] Min: 364.28 ms + [ INFO ] Max: 586.27 ms + [ INFO ] Throughput: 23.02 FPS diff --git a/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_34_0.jpg b/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_32_0.jpg similarity index 100% rename from docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_34_0.jpg rename to docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_32_0.jpg diff --git a/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_34_0.png b/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_32_0.png similarity index 100% rename from docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_34_0.png rename to docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/204-segmenter-semantic-segmentation-with-output_32_0.png diff --git a/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/index.html b/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/index.html index bb43538de1d7c6..634657998713a6 100644 --- a/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/index.html +++ b/docs/notebooks/204-segmenter-semantic-segmentation-with-output_files/index.html @@ -1,10 +1,10 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/204-segmenter-semantic-segmentation-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/204-segmenter-semantic-segmentation-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/204-segmenter-semantic-segmentation-with-output_files/


../
-204-segmenter-semantic-segmentation-with-output..> 16-Aug-2023 01:31               72352
-204-segmenter-semantic-segmentation-with-output..> 16-Aug-2023 01:31              909669
-204-segmenter-semantic-segmentation-with-output..> 16-Aug-2023 01:31               72356
-204-segmenter-semantic-segmentation-with-output..> 16-Aug-2023 01:31              909691
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/204-segmenter-semantic-segmentation-with-output_files/


../
+204-segmenter-semantic-segmentation-with-output..> 31-Oct-2023 00:35               72352
+204-segmenter-semantic-segmentation-with-output..> 31-Oct-2023 00:35              909669
+204-segmenter-semantic-segmentation-with-output..> 31-Oct-2023 00:35               72372
+204-segmenter-semantic-segmentation-with-output..> 31-Oct-2023 00:35              909654
 

diff --git a/docs/notebooks/205-vision-background-removal-with-output.rst b/docs/notebooks/205-vision-background-removal-with-output.rst index 7604ff5432fd08..46c28a42cd3d2a 100644 --- a/docs/notebooks/205-vision-background-removal-with-output.rst +++ b/docs/notebooks/205-vision-background-removal-with-output.rst @@ -16,43 +16,55 @@ The model source is available **Table of contents:** -- `Preparation <#preparation>`__ - - `Install requirements <#install-requirements>`__ - - `Import the PyTorch Library and U^2-Net <#import-the-pytorch-library-and-u2-net>`__ - - `Settings <#settings>`__ - - `Load the U^2-Net Model <#load-the-u2-net-model>`__ +- `Preparation <#preparation>`__ -- `Convert PyTorch U^2-Net model to OpenVINO IR <#convert-pytorch-u2-net-model-to-openvino-ir>`__ + - `Install requirements <#install-requirements>`__ + - `Import the PyTorch Library and + U\ :math:`^2`-Net <#import-the-pytorch-library-and-u-net>`__ + - `Settings <#settings>`__ + - `Load the U\ :math:`^2`-Net + Model <#load-the-u-net-model>`__ - - `Convert Pytorch model to OpenVINO IR Format <#convert-pytorch-model-to-openvino-ir-format>`__ +- `Convert PyTorch U\ :math:`^2`-Net model to OpenVINO + IR <#convert-pytorch-u-net-model-to-openvino-ir>`__ -- `Load and Pre-Process Input Image <#load-and-pre-process-input-image>`__ -- `Select inference device <#select-inference-device>`__ -- `Do Inference on OpenVINO IR Model <#do-inference-on-openvino-ir-model>`__ -- `Visualize Results <#visualize-results>`__ + - `Convert Pytorch model to OpenVINO IR + Format <#convert-pytorch-model-to-openvino-ir-format>`__ - - `Add a Background Image <#add-a-background-image>`__ +- `Load and Pre-Process Input + Image <#load-and-pre-process-input-image>`__ +- `Select inference device <#select-inference-device>`__ +- `Do Inference on OpenVINO IR + Model <#do-inference-on-openvino-ir-model>`__ +- `Visualize Results <#visualize-results>`__ -- `References <#references>`__ + - `Add a Background Image <#add-a-background-image>`__ -Preparation -############################################################################################################################### +- `References <#references>`__ -Install requirements -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q torch onnx opencv-python matplotlib - !pip install -q gdown + %pip install -q "openvino>=2023.1.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch opencv-python matplotlib + %pip install -q gdown + +.. parsed-literal:: -.. _import-the-pytorch-library-and-u2-net: + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. -Import the PyTorch Library and U\ :math:`^2`-Net -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Import the PyTorch Library and U\ :math:`^2`-Net +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -87,8 +99,8 @@ Import the PyTorch Library and U\ :math:`^2`-Net from notebook_utils import load_image from model.u2net import U2NET, U2NETP -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tutorial supports using the original U\ :math:`^2`-Net salient object detection model, as well as the smaller U2NETP version. Two sets @@ -127,11 +139,8 @@ detection and human segmentation. MODEL_DIR = "model" model_path = Path(MODEL_DIR) / u2net_model.name / Path(u2net_model.name).with_suffix(".pth") - -.. _load-the-u2-net-model: - -Load the U\ :math:`^2`-Net Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load the U\ :math:`^2`-Net Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The U\ :math:`^2`-Net human segmentation model weights are stored on Google Drive. They will be downloaded if they are not present yet. The @@ -159,7 +168,7 @@ next cell loads the model and the pre-trained weights. Downloading... From: https://drive.google.com/uc?id=1rbSTGKAE-MTxBYHd-51l2hMOQPT_7EPy To: <_io.BufferedWriter name='model/u2net_lite/u2net_lite.pth'> - 100%|██████████| 4.68M/4.68M [00:01<00:00, 4.03MB/s] + 100%|██████████| 4.68M/4.68M [00:01<00:00, 3.92MB/s] .. parsed-literal:: @@ -175,8 +184,6 @@ next cell loads the model and the pre-trained weights. # Load the weights. print(f"Loading model weights from: '{model_path}'") net.load_state_dict(state_dict=torch.load(model_path, map_location="cpu")) - - torch.onnx.export(net, torch.zeros((1,3,512,512)), "u2net.onnx") .. parsed-literal:: @@ -184,37 +191,33 @@ next cell loads the model and the pre-trained weights. Loading model weights from: 'model/u2net_lite/u2net_lite.pth' -.. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/functional.py:3734: UserWarning: nn.functional.upsample is deprecated. Use nn.functional.interpolate instead. - warnings.warn("nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.") - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/functional.py:1967: UserWarning: nn.functional.sigmoid is deprecated. Use torch.sigmoid instead. - warnings.warn("nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.") - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( +.. parsed-literal:: + + -.. _convert-pytorch-u2-net-model-to-openvino-ir: -Convert PyTorch U\ :math:`^2`-Net model to OpenVINO IR -############################################################################################################################### -Convert Pytorch model to OpenVINO IR Format -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Convert PyTorch U\ :math:`^2`-Net model to OpenVINO IR +------------------------------------------------------------------------------------------------ We use model conversion Python API to convert the Pytorch model to OpenVINO IR format. Executing the following command may take a while. .. code:: ipython3 - model_ir = ov.convert_model("u2net.onnx") + model_ir = ov.convert_model(net, example_input=torch.zeros((1,3,512,512)), input=([1, 3, 512, 512])) + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/functional.py:3769: UserWarning: nn.functional.upsample is deprecated. Use nn.functional.interpolate instead. + warnings.warn("nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.") + -Load and Pre-Process Input Image -############################################################################################################################### +Load and Pre-Process Input Image +-------------------------------------------------------------------------- While OpenCV reads images in ``BGR`` format, the OpenVINO IR model expects images in ``RGB``. Therefore, convert the images to ``RGB``, @@ -249,10 +252,10 @@ repository `__ and multiplied by input_image = (input_image - input_mean) / input_scale -Select inference device -############################################################################################################################### +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -277,8 +280,8 @@ Select device from dropdown list for running inference using OpenVINO: -Do Inference on OpenVINO IR Model -############################################################################################################################### +Do Inference on OpenVINO IR Model +--------------------------------------------------------------------------- Load the OpenVINO IR model to OpenVINO Runtime and do inference. @@ -303,11 +306,11 @@ Load the OpenVINO IR model to OpenVINO Runtime and do inference. .. parsed-literal:: - Inference finished. Inference time: 0.117 seconds, FPS: 8.56. + Inference finished. Inference time: 0.116 seconds, FPS: 8.62. -Visualize Results -############################################################################################################################### +Visualize Results +----------------------------------------------------------- Show the original image, the segmentation result, and the original image with the background removed. @@ -337,8 +340,8 @@ with the background removed. .. image:: 205-vision-background-removal-with-output_files/205-vision-background-removal-with-output_22_0.png -Add a Background Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Add a Background Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In the segmentation result, all foreground pixels have a value of 1, all background pixels a value of 0. Replace the background image as follows: @@ -402,8 +405,8 @@ background pixels a value of 0. Replace the background image as follows: The generated image coco_hollywood-wall.jpg is saved in the directory output. You can also download the image by clicking on this link: output/coco_hollywood-wall.jpg
-References -############################################################################################################################### +References +---------------------------------------------------- - `PIP install openvino-dev `__ diff --git a/docs/notebooks/205-vision-background-removal-with-output_files/index.html b/docs/notebooks/205-vision-background-removal-with-output_files/index.html index c264929e7deeb8..30853a2bc86a4d 100644 --- a/docs/notebooks/205-vision-background-removal-with-output_files/index.html +++ b/docs/notebooks/205-vision-background-removal-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/205-vision-background-removal-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/205-vision-background-removal-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/205-vision-background-removal-with-output_files/


../
-205-vision-background-removal-with-output_22_0.png 16-Aug-2023 01:31              279567
-205-vision-background-removal-with-output_24_0.png 16-Aug-2023 01:31              927148
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/205-vision-background-removal-with-output_files/


../
+205-vision-background-removal-with-output_22_0.png 31-Oct-2023 00:35              279572
+205-vision-background-removal-with-output_24_0.png 31-Oct-2023 00:35              927043
 

diff --git a/docs/notebooks/206-vision-paddlegan-anime-with-output.rst b/docs/notebooks/206-vision-paddlegan-anime-with-output.rst index 59b8ea9d1d3e91..fe600a809f1f09 100644 --- a/docs/notebooks/206-vision-paddlegan-anime-with-output.rst +++ b/docs/notebooks/206-vision-paddlegan-anime-with-output.rst @@ -16,61 +16,73 @@ documentation `__ - - `Install requirements <#install-requirements>`__ - - `Imports <#imports>`__ - - `Settings <#settings>`__ - - `Functions <#functions>`__ +- `Preparation <#preparation>`__ -- `Inference on PaddleGAN Model <#inference-on-paddlegan-model>`__ + - `Install requirements <#install-requirements>`__ + - `Imports <#imports>`__ + - `Settings <#settings>`__ + - `Functions <#functions>`__ - - `Show Inference Results on PaddleGAN model <#show-inference-results-on-paddlegan-model>`__ +- `Inference on PaddleGAN + Model <#inference-on-paddlegan-model>`__ -- `Model Conversion to ONNX and OpenVINO IR <#model-conversion-to-onnx-and-openvino-ir>`__ + - `Show Inference Results on PaddleGAN + model <#show-inference-results-on-paddlegan-model>`__ - - `Convert to ONNX <#convert-to-onnx>`__ - - `Convert to OpenVINO IR <#convert-to-openvino-ir>`__ +- `Model Conversion to ONNX and OpenVINO + IR <#model-conversion-to-onnx-and-openvino-ir>`__ -- `Show Inference Results on OpenVINO IR and PaddleGAN Models <#show-inference-results-on-openvino-ir-and-paddlegan-models>`__ + - `Convert to ONNX <#convert-to-onnx>`__ + - `Convert to OpenVINO IR <#convert-to-openvino-ir>`__ - - `Create Postprocessing Functions <#create-postprocessing-functions>`__ - - `Do Inference on OpenVINO IR Model <#do-inference-on-openvino-ir-model>`__ +- `Show Inference Results on OpenVINO IR and PaddleGAN + Models <#show-inference-results-on-openvino-ir-and-paddlegan-models>`__ - - `Select inference device <#select-inference-device>`__ + - `Create Postprocessing + Functions <#create-postprocessing-functions>`__ + - `Do Inference on OpenVINO IR + Model <#do-inference-on-openvino-ir-model>`__ -- `Performance Comparison <#performance-comparison>`__ -- `References <#references>`__ + - `Select inference device <#select-inference-device>`__ -Preparation -############################################################################################################################### +- `Performance Comparison <#performance-comparison>`__ +- `References <#references>`__ -Install requirements -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- + +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" - !pip install -q "paddlepaddle==2.5.0" "paddle2onnx>=0.6" - !pip install -q "git+https://github.com/PaddlePaddle/PaddleGAN.git" --no-deps + %pip install -q "paddlepaddle>=2.5.1" "paddle2onnx>=0.6" + %pip install -q "git+https://github.com/PaddlePaddle/PaddleGAN.git" --no-deps - !pip install -q opencv-python matplotlib scikit-learn scikit-image - !pip install -q "imageio==2.9.0" "imageio-ffmpeg" "numba>=0.53.1" easydict munch natsort + %pip install -q opencv-python matplotlib scikit-learn scikit-image + %pip install -q "imageio==2.9.0" "imageio-ffmpeg" "numba>=0.53.1" easydict munch natsort .. parsed-literal:: + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. paddleclas 2.5.1 requires faiss-cpu==1.7.1.post2, but you have faiss-cpu 1.7.4 which is incompatible. paddleclas 2.5.1 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. ppgan 2.1.0 requires librosa==0.8.1, but you have librosa 0.10.1 which is incompatible. - ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.0.76 which is incompatible. + ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.1.78 which is incompatible. scikit-image 0.21.0 requires imageio>=2.27, but you have imageio 2.9.0 which is incompatible. - + Note: you may need to restart the kernel to use updated packages. + -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -113,8 +125,8 @@ Imports ) raise -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -128,8 +140,8 @@ Settings ir_path = model_path.with_suffix(".xml") onnx_path = model_path.with_suffix(".onnx") -Functions -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -143,8 +155,8 @@ Functions image = cv2.resize(image, (max_width, new_height)) return image -Inference on PaddleGAN Model -############################################################################################################################### +Inference on PaddleGAN Model +---------------------------------------------------------------------- The PaddleGAN `documentation `__ @@ -161,7 +173,7 @@ source of the function. .. parsed-literal:: - [09/08 23:30:31] ppgan INFO: Found /opt/home/k8sworker/.cache/ppgan/animeganv2_hayao.pdparams + [10/30 23:17:56] ppgan INFO: Found /opt/home/k8sworker/.cache/ppgan/animeganv2_hayao.pdparams .. code:: ipython3 @@ -239,8 +251,8 @@ cell. The anime image was saved to output/coco_bricks_anime_pg.jpg -Show Inference Results on PaddleGAN model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Show Inference Results on PaddleGAN model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -256,15 +268,15 @@ Show Inference Results on PaddleGAN model .. image:: 206-vision-paddlegan-anime-with-output_files/206-vision-paddlegan-anime-with-output_15_0.png -Model Conversion to ONNX and OpenVINO IR -############################################################################################################################### +Model Conversion to ONNX and OpenVINO IR +---------------------------------------------------------------------------------- Convert the PaddleGAN model to OpenVINO IR by first converting PaddleGAN to ONNX with ``paddle2onnx`` and then converting the ONNX model to OpenVINO IR with model conversion API. -Convert to ONNX -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert to ONNX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Exporting to ONNX requires specifying an input shape with PaddlePaddle ``InputSpec`` and calling ``paddle.onnx.export``. Then, check the input @@ -296,23 +308,23 @@ succeeds, the output of the next cell will include .. parsed-literal:: - 2023-09-08 23:30:39 [INFO] Static PaddlePaddle model saved in model/paddle_model_static_onnx_temp_dir. + 2023-10-30 23:18:04 [INFO] Static PaddlePaddle model saved in model/paddle_model_static_onnx_temp_dir. [Paddle2ONNX] Start to parse PaddlePaddle model... [Paddle2ONNX] Model file path: model/paddle_model_static_onnx_temp_dir/model.pdmodel [Paddle2ONNX] Paramters file path: model/paddle_model_static_onnx_temp_dir/model.pdiparams [Paddle2ONNX] Start to parsing Paddle model... + 2023-10-30 23:18:04 [INFO] ONNX model saved in model/paddlegan_anime.onnx. [Paddle2ONNX] Use opset_version = 11 for ONNX export. [Paddle2ONNX] PaddlePaddle model is exported as ONNX format now. - 2023-09-08 23:30:39 [INFO] ONNX model saved in model/paddlegan_anime.onnx. .. parsed-literal:: - I0908 23:30:39.290753 670433 interpretercore.cc:237] New Executor is Running. + I1030 23:18:04.775377 1175742 interpretercore.cc:237] New Executor is Running. -Convert to OpenVINO IR -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The OpenVINO IR format enables storing the preprocessing normalization in the model file. It is then no longer necessary to normalize input @@ -374,8 +386,8 @@ API `__ - `Paddle2ONNX `__ diff --git a/docs/notebooks/206-vision-paddlegan-anime-with-output_files/206-vision-paddlegan-anime-with-output_37_0.png b/docs/notebooks/206-vision-paddlegan-anime-with-output_files/206-vision-paddlegan-anime-with-output_37_0.png index be078e36f1b340..25ac5ed16fd616 100644 --- a/docs/notebooks/206-vision-paddlegan-anime-with-output_files/206-vision-paddlegan-anime-with-output_37_0.png +++ b/docs/notebooks/206-vision-paddlegan-anime-with-output_files/206-vision-paddlegan-anime-with-output_37_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcdc8be2139b3dc2632e54917fca3d650ed325ed0cbc018f564e5a5fe69675e3 -size 1931633 +oid sha256:7bb732dc0d04eaa6c89b6832d1a2b86bc4896aecee27193ee9fcaf123d0a28d8 +size 1931637 diff --git a/docs/notebooks/206-vision-paddlegan-anime-with-output_files/index.html b/docs/notebooks/206-vision-paddlegan-anime-with-output_files/index.html index c060ec41c78c94..52d4339a4a4478 100644 --- a/docs/notebooks/206-vision-paddlegan-anime-with-output_files/index.html +++ b/docs/notebooks/206-vision-paddlegan-anime-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/206-vision-paddlegan-anime-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/206-vision-paddlegan-anime-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/206-vision-paddlegan-anime-with-output_files/


../
-206-vision-paddlegan-anime-with-output_15_0.png    16-Aug-2023 01:31             1810982
-206-vision-paddlegan-anime-with-output_37_0.png    16-Aug-2023 01:31             1931653
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/206-vision-paddlegan-anime-with-output_files/


../
+206-vision-paddlegan-anime-with-output_15_0.png    31-Oct-2023 00:35             1810982
+206-vision-paddlegan-anime-with-output_37_0.png    31-Oct-2023 00:35             1931637
 

diff --git a/docs/notebooks/207-vision-paddlegan-superresolution-with-output.rst b/docs/notebooks/207-vision-paddlegan-superresolution-with-output.rst index d91bd4632495b5..ee4b4ed06b07cf 100644 --- a/docs/notebooks/207-vision-paddlegan-superresolution-with-output.rst +++ b/docs/notebooks/207-vision-paddlegan-superresolution-with-output.rst @@ -18,49 +18,62 @@ This notebook works best with small images (up to 800x600 resolution). **Table of contents:** -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Inference on PaddlePaddle Model <#inference-on-paddlepaddle-model>`__ - - `Investigate PaddleGAN Model <#investigate-paddlegan-model>`__ - - `Do Inference <#do-inference>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Inference on PaddlePaddle + Model <#inference-on-paddlepaddle-model>`__ -- `Convert PaddleGAN Model to ONNX and OpenVINO IR <#convert-paddlegan-model-to-onnx-and-openvino-ir>`__ + - `Investigate PaddleGAN + Model <#investigate-paddlegan-model>`__ + - `Do Inference <#do-inference>`__ - - `Convert PaddlePaddle Model to ONNX <#convert-paddlepaddle-model-to-onnx>`__ - - `Convert ONNX Model to OpenVINO IR with Model Conversion Python API <#convert-onnx-model-to-openvino-ir-with-model-conversion-python-api>`__ +- `Convert PaddleGAN Model to ONNX and OpenVINO + IR <#convert-paddlegan-model-to-onnx-and-openvino-ir>`__ -- `Do Inference on OpenVINO IR Model <#do-inference-on-openvino-ir-model>`__ + - `Convert PaddlePaddle Model to + ONNX <#convert-paddlepaddle-model-to-onnx>`__ + - `Convert ONNX Model to OpenVINO IR with Model Conversion Python + API <#convert-onnx-model-to-openvino-ir-with-model-conversion-python-api>`__ - - `Select inference device <#select-inference-device>`__ - - `Show an Animated GIF <#show-an-animated-gif>`__ - - `Create a Comparison Video <#create-a-comparison-video>`__ +- `Do Inference on OpenVINO IR + Model <#do-inference-on-openvino-ir-model>`__ -Imports -############################################################################################################################### + - `Select inference device <#select-inference-device>`__ + - `Show an Animated GIF <#show-an-animated-gif>`__ + - `Create a Comparison Video <#create-a-comparison-video>`__ + + - `Download the video <#download-the-video>`__ + +Imports +------------------------------------------------- .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" - !pip install -q "paddlepaddle==2.5.0rc0" "paddle2onnx>=0.6" + %pip install -q "paddlepaddle>=2.5.1" "paddle2onnx>=0.6" - !pip install -q "imageio==2.9.0" "imageio-ffmpeg" "numba>=0.53.1" "easydict" "munch" "natsort" - !pip install -q "git+https://github.com/PaddlePaddle/PaddleGAN.git" --no-deps - !pip install -q scikit-image + %pip install -q "imageio==2.9.0" "imageio-ffmpeg" "numba>=0.53.1" "easydict" "munch" "natsort" + %pip install -q "git+https://github.com/PaddlePaddle/PaddleGAN.git" --no-deps + %pip install -q scikit-image .. parsed-literal:: + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - ppgan 2.1.0 requires imageio==2.9.0, but you have imageio 2.31.3 which is incompatible. + ppgan 2.1.0 requires imageio==2.9.0, but you have imageio 2.31.6 which is incompatible. ppgan 2.1.0 requires librosa==0.8.1, but you have librosa 0.10.1 which is incompatible. - ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.0.76 which is incompatible. - + ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.1.78 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 - import sys import time import warnings from pathlib import Path @@ -76,11 +89,16 @@ Imports from paddle.static import InputSpec from ppgan.apps import RealSRPredictor - sys.path.append("../utils") - from notebook_utils import NotebookAlert + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import NotebookAlert, download_file -Settings -############################################################################################################################### +Settings +-------------------------------------------------- .. code:: ipython3 @@ -94,11 +112,11 @@ Settings ir_path = model_path.with_suffix(".xml") onnx_path = model_path.with_suffix(".onnx") -Inference on PaddlePaddle Model -############################################################################################################################### +Inference on PaddlePaddle Model +------------------------------------------------------------------------- -Investigate PaddleGAN Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Investigate PaddleGAN Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `PaddleGAN documentation `__ explains @@ -116,7 +134,7 @@ source code. .. parsed-literal:: - [09/08 23:31:15] ppgan INFO: Found /opt/home/k8sworker/.cache/ppgan/DF2K_JPEG.pdparams + [10/30 23:18:37] ppgan INFO: Found /opt/home/k8sworker/.cache/ppgan/DF2K_JPEG.pdparams .. code:: ipython3 @@ -154,13 +172,28 @@ To get more information about how the model looks like, use the # sr.model?? -Do Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To show inference on the PaddlePaddle model, set ``PADDLEGAN_INFERENCE`` to ``True`` in the cell below. Keep in mind that performing inference may take some time. +.. code:: ipython3 + + # Load the image from openvino storage + IMAGE_PATH = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_tulips.jpg", + directory="data" + ) + + + +.. parsed-literal:: + + data/coco_tulips.jpg: 0%| | 0.00/63.6k [00:00`__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Convert ONNX Model to OpenVINO IR with `Model Conversion Python API `__ -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. code:: ipython3 print("Exporting ONNX model to OpenVINO IR... This may take a few minutes.") @@ -263,8 +296,8 @@ Convert ONNX Model to OpenVINO IR with `Model Conversion Python API 800: NotebookAlert( @@ -320,12 +351,12 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - + -.. image:: 207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_25_1.png +.. image:: 207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_26_1.png .. code:: ipython3 @@ -349,7 +380,7 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Inference duration: 3.26 seconds + Inference duration: 3.27 seconds .. code:: ipython3 @@ -372,16 +403,16 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - + -.. image:: 207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_29_1.png +.. image:: 207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_30_1.png -Show an Animated GIF -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Show an Animated GIF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To visualize the difference between the bicubic image and the superresolution image, create an animated GIF image that switches @@ -409,13 +440,13 @@ between both versions. -.. image:: 207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_31_0.png +.. image:: 207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_32_0.png :width: 960px -Create a Comparison Video -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create a Comparison Video +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Create a video with a “slider”, showing the bicubic image to the right and the superresolution image on the left. @@ -429,7 +460,6 @@ open it directly from the images directory, and play it locally. .. code:: ipython3 FOURCC = cv2.VideoWriter_fourcc(*"MJPG") - IMAGE_PATH = Path(IMAGE_PATH) result_video_path = OUTPUT_DIR / Path(f"{IMAGE_PATH.stem}_comparison_paddlegan.avi") video_target_height, video_target_width = ( image_super.shape[0] // 2, @@ -472,10 +502,26 @@ open it directly from the images directory, and play it locally. progress_bar.update() out_video.release() clear_output() + +Download the video +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Please, click the link below to download the video or just run cell if +you use the Google Colab + +.. code:: ipython3 + + if 'google.colab' in str(get_ipython()): + # Save a file + from google.colab import files - video_link = FileLink(result_video_path) - video_link.html_link_str = "%s" - display(HTML(f"The video has been saved to {video_link._repr_html_()}")) + # Save the file to the local file system + with open(result_video_path, 'r') as f: + files.download(result_video_path) + else: + video_link = FileLink(result_video_path) + video_link.html_link_str = "%s" + display(HTML(f"The video has been saved to {video_link._repr_html_()}")) diff --git a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_25_1.png b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_26_1.png similarity index 100% rename from docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_25_1.png rename to docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_26_1.png diff --git a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_29_1.png b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_29_1.png deleted file mode 100644 index 44a761a1162526..00000000000000 --- a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_29_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f48ad1228e9709e05ef8ecea327b59a021d0a9e94eed951aa1fe481188b9a2ae -size 476190 diff --git a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_30_1.png b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_30_1.png new file mode 100644 index 00000000000000..55c66d3708dab6 --- /dev/null +++ b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_30_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37b3df35c381bd12ba64534d20be93e3eb4bdfdf793b07a6de4c32972e9c530 +size 476190 diff --git a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_31_0.png b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_31_0.png deleted file mode 100644 index bcf9970059c0bb..00000000000000 --- a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_31_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e07b52d5b664f1e00daebc13d9d7bbbecc479445b43fd04b5f5488cdfa8ab739 -size 2835354 diff --git a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_32_0.png b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_32_0.png new file mode 100644 index 00000000000000..062dba32f88d54 --- /dev/null +++ b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/207-vision-paddlegan-superresolution-with-output_32_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8755f958d2167fe3f50bd0fd5a306613a2a3800cc15b6b2b3ebcb30cb7b60bae +size 2835305 diff --git a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/index.html b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/index.html index 27b987aa18cbb3..88abb21de2e406 100644 --- a/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/index.html +++ b/docs/notebooks/207-vision-paddlegan-superresolution-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/207-vision-paddlegan-superresolution-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/207-vision-paddlegan-superresolution-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/207-vision-paddlegan-superresolution-with-output_files/


../
-207-vision-paddlegan-superresolution-with-outpu..> 16-Aug-2023 01:31              436999
-207-vision-paddlegan-superresolution-with-outpu..> 16-Aug-2023 01:31              476190
-207-vision-paddlegan-superresolution-with-outpu..> 16-Aug-2023 01:31             2835354
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/207-vision-paddlegan-superresolution-with-output_files/


../
+207-vision-paddlegan-superresolution-with-outpu..> 31-Oct-2023 00:35              436999
+207-vision-paddlegan-superresolution-with-outpu..> 31-Oct-2023 00:35              476190
+207-vision-paddlegan-superresolution-with-outpu..> 31-Oct-2023 00:35             2835305
 

diff --git a/docs/notebooks/208-optical-character-recognition-with-output.rst b/docs/notebooks/208-optical-character-recognition-with-output.rst index eaa1776aaae672..15e9bad1737d39 100644 --- a/docs/notebooks/208-optical-character-recognition-with-output.rst +++ b/docs/notebooks/208-optical-character-recognition-with-output.rst @@ -21,40 +21,50 @@ information, refer to the **Table of contents:** -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Download Models <#download-models>`__ -- `Convert Models <#convert-models>`__ -- `Select inference device <#select-inference-device>`__ -- `Object Detection <#object-detection>`__ - - `Load a Detection Model <#load-a-detection-model>`__ - - `Load an Image <#load-an-image>`__ - - `Do Inference <#do-inference>`__ - - `Get Detection Results <#get-detection-results>`__ +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Download Models <#download-models>`__ +- `Convert Models <#convert-models>`__ +- `Select inference device <#select-inference-device>`__ +- `Object Detection <#object-detection>`__ -- `Text Recognition <#text-recognition>`__ + - `Load a Detection Model <#load-a-detection-model>`__ + - `Load an Image <#load-an-image>`__ + - `Do Inference <#do-inference>`__ + - `Get Detection Results <#get-detection-results>`__ - - `Load Text Recognition Model <#load-text-recognition-model>`__ - - `Do Inference <#do-the-inference>`__ +- `Text Recognition <#text-recognition>`__ -- `Show Results <#show-results>`__ + - `Load Text Recognition + Model <#load-text-recognition-model>`__ + - `Do Inference <#do-inference>`__ - - `Show Detected Text Boxes and OCR Results for the Image <#show-detected-text-boxes-and-ocr-results-for-the-image>`__ - - `Show the OCR Result per Bounding Box <#show-the-ocr-result-per-bounding-box>`__ - - `Print Annotations in Plain Text Format <#print-annotations-in-plain-text-format>`__ +- `Show Results <#show-results>`__ + + - `Show Detected Text Boxes and OCR Results for the + Image <#show-detected-text-boxes-and-ocr-results-for-the-image>`__ + - `Show the OCR Result per Bounding + Box <#show-the-ocr-result-per-bounding-box>`__ + - `Print Annotations in Plain Text + Format <#print-annotations-in-plain-text-format>`__ .. code:: ipython3 # Install openvino-dev package - !pip install -q "openvino-dev==2023.1.0.dev20230811" + %pip install -q "openvino-dev>=2023.1.0" onnx + + +.. parsed-literal:: -Imports -############################################################################################################################### + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 - import sys from pathlib import Path import cv2 @@ -64,11 +74,16 @@ Imports from IPython.display import Markdown, display from PIL import Image - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) from notebook_utils import load_image -Settings -############################################################################################################################### +Settings +-------------------------------------------------- .. code:: ipython3 @@ -81,8 +96,8 @@ Settings model_dir.mkdir(exist_ok=True) -Download Models -############################################################################################################################### +Download Models +--------------------------------------------------------- The next cells will run Model Downloader to download the detection and recognition models. If the models have been downloaded before, they will @@ -299,8 +314,8 @@ text-recognition-resnet-fc. # for line in download_result: # print(line) -Convert Models -############################################################################################################################### +Convert Models +-------------------------------------------------------- The downloaded detection model is an Intel model, which is already in OpenVINO Intermediate Representation (OpenVINO IR) format. The text @@ -332,27 +347,27 @@ Converting text-recognition-resnet-fc… .. parsed-literal:: ========== Converting text-recognition-resnet-fc to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-path=/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/models/public/text-recognition-resnet-fc --model-path=model/public/text-recognition-resnet-fc --model-name=get_model --import-module=model '--model-param=file_config=r"model/public/text-recognition-resnet-fc/vedastr/configs/resnet_fc.py"' '--model-param=weights=r"model/public/text-recognition-resnet-fc/vedastr/ckpt/resnet_fc.pth"' --input-shape=1,1,32,100 --input-names=input --output-names=output --output-file=model/public/text-recognition-resnet-fc/resnet_fc.onnx ONNX check passed successfully. ========== Converting text-recognition-resnet-fc to IR (FP16) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/text-recognition-resnet-fc/FP16 --model_name=text-recognition-resnet-fc --input=input '--mean_values=input[127.5]' '--scale_values=input[127.5]' --output=output --input_model=model/public/text-recognition-resnet-fc/resnet_fc.onnx '--layout=input(NCHW)' '--input_shape=[1, 1, 32, 100]' --compress_to_fp16=True [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/208-optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/208-optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/208-optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/208-optical-character-recognition/model/public/text-recognition-resnet-fc/FP16/text-recognition-resnet-fc.bin -Select inference device -############################################################################################################################### +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -376,14 +391,14 @@ Select device from dropdown list for running inference using OpenVINO: -Object Detection -############################################################################################################################### +Object Detection +---------------------------------------------------------- Load a detection model, load an image, do inference and get the detection inference result. -Load a Detection Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load a Detection Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -394,13 +409,13 @@ Load a Detection Model detection_input_layer = detection_compiled_model.input(0) -Load an Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load an Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 # The `image_file` variable can point to a URL or a local image. - image_file = "../data/image/intel_rnb.jpg" + image_file = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg" image = load_image(image_file) @@ -420,8 +435,8 @@ Load an Image .. image:: 208-optical-character-recognition-with-output_files/208-optical-character-recognition-with-output_16_0.png -Do Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Text boxes are detected in the images and returned as blobs of data in the shape of ``[100, 5]``. Each description of detection has the @@ -435,8 +450,8 @@ the shape of ``[100, 5]``. Each description of detection has the # Remove zero only boxes. boxes = boxes[~np.all(boxes == 0, axis=1)] -Get Detection Results -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Get Detection Results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -504,14 +519,14 @@ Get Detection Results return rgb_image -Text Recognition -############################################################################################################################### +Text Recognition +---------------------------------------------------------- Load the text recognition model and do inference on the detected boxes from the detection model. -Load Text Recognition Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load Text Recognition Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -527,8 +542,8 @@ Load Text Recognition Model # Get the height and width of the input layer. _, _, H, W = recognition_input_layer.shape -Do Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -573,11 +588,11 @@ Do Inference boxes_with_annotations = list(zip(boxes, annotations)) -Show Results -############################################################################################################################### +Show Results +------------------------------------------------------ -Show Detected Text Boxes and OCR Results for the Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Show Detected Text Boxes and OCR Results for the Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Visualize the result by drawing boxes around recognized text and showing the OCR result from the text recognition model. @@ -592,8 +607,8 @@ the OCR result from the text recognition model. .. image:: 208-optical-character-recognition-with-output_files/208-optical-character-recognition-with-output_26_0.png -Show the OCR Result per Bounding Box -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Show the OCR Result per Bounding Box +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Depending on the image, the OCR result may not be readable in the image with boxes, as displayed in the cell above. Use the code below to @@ -653,8 +668,8 @@ center robert -Print Annotations in Plain Text Format -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Print Annotations in Plain Text Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Print annotations for detected text based on their position in the input image, starting from the upper left corner. diff --git a/docs/notebooks/208-optical-character-recognition-with-output_files/index.html b/docs/notebooks/208-optical-character-recognition-with-output_files/index.html index 09bf331cf607a2..f1976061cd720e 100644 --- a/docs/notebooks/208-optical-character-recognition-with-output_files/index.html +++ b/docs/notebooks/208-optical-character-recognition-with-output_files/index.html @@ -1,20 +1,20 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/208-optical-character-recognition-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/208-optical-character-recognition-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/208-optical-character-recognition-with-output_files/


../
-208-optical-character-recognition-with-output_1..> 16-Aug-2023 01:31              305482
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31              923631
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                1996
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31               11367
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                1990
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31               11142
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                1630
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                8428
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                 949
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                2274
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                 817
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                1559
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                 838
-208-optical-character-recognition-with-output_2..> 16-Aug-2023 01:31                1487
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/208-optical-character-recognition-with-output_files/


../
+208-optical-character-recognition-with-output_1..> 31-Oct-2023 00:35              305482
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35              923631
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                1996
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35               11367
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                1990
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35               11142
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                1630
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                8428
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                 949
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                2274
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                 817
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                1559
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                 838
+208-optical-character-recognition-with-output_2..> 31-Oct-2023 00:35                1487
 

diff --git a/docs/notebooks/209-handwritten-ocr-with-output.rst b/docs/notebooks/209-handwritten-ocr-with-output.rst index 56413111683090..64e1668efdfb7f 100644 --- a/docs/notebooks/209-handwritten-ocr-with-output.rst +++ b/docs/notebooks/209-handwritten-ocr-with-output.rst @@ -20,49 +20,66 @@ Zoo `__. **Table of contents:** -- `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Select a Language <#select-a-language>`__ -- `Download the Model <#download-the-model>`__ -- `Load the Model and Execute <#load-the-model-and-execute>`__ -- `Select inference device <#select-inference-device>`__ -- `Fetch Information About Input and Output Layers <#fetch-information-about-input-and-output-layers>`__ -- `Load an Image <#load-an-image>`__ -- `Visualize Input Image <#visualize-input-image>`__ -- `Prepare Charlist <#prepare-charlist>`__ -- `Run Inference <#run-inference>`__ -- `Process the Output Data <#process-the-output-data>`__ -- `Print the Output <#print-the-output>`__ + +- `Imports <#imports>`__ +- `Settings <#settings>`__ +- `Select a Language <#select-a-language>`__ +- `Download the Model <#download-the-model>`__ +- `Load the Model and Execute <#load-the-model-and-execute>`__ +- `Select inference device <#select-inference-device>`__ +- `Fetch Information About Input and Output + Layers <#fetch-information-about-input-and-output-layers>`__ +- `Load an Image <#load-an-image>`__ +- `Visualize Input Image <#visualize-input-image>`__ +- `Prepare Charlist <#prepare-charlist>`__ +- `Run Inference <#run-inference>`__ +- `Process the Output Data <#process-the-output-data>`__ +- `Print the Output <#print-the-output>`__ .. code:: ipython3 # Install openvino-dev package - !pip install -q "openvino-dev==2023.1.0.dev20230811" + %pip install -q "openvino>=2023.1.0" + %pip install -q matplotlib numpy + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. -Imports -############################################################################################################################### + +Imports +------------------------------------------------- .. code:: ipython3 from collections import namedtuple from itertools import groupby - from pathlib import Path import cv2 import matplotlib.pyplot as plt import numpy as np import openvino as ov + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file Settings -############################################################################################################################### +-------------------------------------------------- -Set up all constants and folders used in this notebook: +Set up all constants and folders used in this notebook .. code:: ipython3 # Directories where data will be placed. - model_folder = "model" - data_folder = "../data" + base_models_dir = "models" + data_folder = "data" charlist_folder = f"{data_folder}/text" # Precision used by the model. @@ -88,7 +105,7 @@ To group files, you have to define the collection. In this case, use ) Select a Language -############################################################################################################################### +----------------------------------------------------------- Depending on your choice you will need to change a line of code in the cell below. @@ -106,7 +123,7 @@ If you want to perform OCR on a text in Japanese, set selected_language = languages.get(language) Download the Model -############################################################################################################################### +------------------------------------------------------------ In addition to images and charlists, you need to download the model file. In the sections below, there are cells for downloading either the @@ -115,34 +132,36 @@ Chinese or Japanese model. If it is your first time running the notebook, the model will be downloaded. It may take a few minutes. -Use ``omz_downloader``, which is a command-line tool from the -``openvino-dev`` package. It automatically creates a directory structure -and downloads the selected model. +Use ``download_file`` function from the utils package, which +automatically creates a directory structure and downloads the selected +model file. .. code:: ipython3 - path_to_model_weights = Path(f'{model_folder}/intel/{selected_language.model_name}/{precision}/{selected_language.model_name}.bin') - if not path_to_model_weights.is_file(): - download_command = f'omz_downloader --name {selected_language.model_name} --output_dir {model_folder} --precision {precision}' - print(download_command) - ! $download_command + path_to_model = download_file( + url=f'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{selected_language.model_name}/{precision}/{selected_language.model_name}.xml', + directory=base_models_dir + ) + _ = download_file( + url=f'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{selected_language.model_name}/{precision}/{selected_language.model_name}.bin', + directory=base_models_dir + ) + .. parsed-literal:: - omz_downloader --name handwritten-simplified-chinese-recognition-0001 --output_dir model --precision FP16 - ################|| Downloading handwritten-simplified-chinese-recognition-0001 ||################ - - ========== Downloading model/intel/handwritten-simplified-chinese-recognition-0001/FP16/handwritten-simplified-chinese-recognition-0001.xml - - - ========== Downloading model/intel/handwritten-simplified-chinese-recognition-0001/FP16/handwritten-simplified-chinese-recognition-0001.bin - - + models/handwritten-simplified-chinese-recognition-0001.xml: 0%| | 0.00/108k [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/209-handwritten-ocr-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/209-handwritten-ocr-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/209-handwritten-ocr-with-output_files/


../
-209-handwritten-ocr-with-output_21_0.png           16-Aug-2023 01:31               53571
-209-handwritten-ocr-with-output_30_1.png           16-Aug-2023 01:31               53571
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/209-handwritten-ocr-with-output_files/


../
+209-handwritten-ocr-with-output_22_0.png           31-Oct-2023 00:35               53571
+209-handwritten-ocr-with-output_32_1.png           31-Oct-2023 00:35               53571
 

diff --git a/docs/notebooks/210-slowfast-video-recognition-with-output.rst b/docs/notebooks/210-slowfast-video-recognition-with-output.rst index b7f3fdf9ae7e83..db2faf795143ea 100644 --- a/docs/notebooks/210-slowfast-video-recognition-with-output.rst +++ b/docs/notebooks/210-slowfast-video-recognition-with-output.rst @@ -40,29 +40,32 @@ This tutorial consists of the following steps **Table of contents:** -- `Prepare PyTorch Model <#prepare-pytorch-model>`__ - - `Install necessary packages <#install-necessary-packages>`__ - - `Imports and Settings <#imports-and-settings>`__ +- `Prepare PyTorch Model <#prepare-pytorch-model>`__ -- `Export to ONNX <#export-to-onnx>`__ -- `Convert ONNX to OpenVINO™ Intermediate Representation <#convert-onnx-to-openvino-intermediate-representation>`__ -- `Select inference device <#select-inference-device>`__ -- `Verify Model Inference <#verify-model-inference>`__ + - `Install necessary + packages <#install-necessary-packages>`__ + - `Imports and Settings <#imports-and-settings>`__ -Prepare PyTorch Model -############################################################################################################################### +- `Export to ONNX <#export-to-onnx>`__ +- `Convert ONNX to OpenVINO™ Intermediate + Representation <#convert-onnx-to-openvino-intermediate-representation>`__ +- `Select inference device <#select-inference-device>`__ +- `Verify Model Inference <#verify-model-inference>`__ -Install necessary packages -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Prepare PyTorch Model +--------------------------------------------------------------- + +Install necessary packages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" + !pip install -q "openvino>=2023.1.0" !pip install -q fvcore -Imports and Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Imports and Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -164,20 +167,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(256, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(64, 64, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( + (1-2): 2 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(256, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -209,20 +199,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(32, 8, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(8, 8, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(8, 32, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( + (1-2): 2 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(32, 8, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) (norm_a): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -263,33 +240,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(512, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_a): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(128, 128, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(128, 512, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(512, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_a): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(128, 128, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(128, 512, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (3): ResBlock( + (1-3): 3 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(512, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) (norm_a): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -321,33 +272,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(64, 16, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(16, 16, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(16, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(64, 16, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(16, 16, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(16, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (3): ResBlock( + (1-3): 3 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(64, 16, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) (norm_a): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -388,59 +313,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(1024, 256, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(256, 256, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(256, 1024, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(1024, 256, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(256, 256, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(256, 1024, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (3): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(1024, 256, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(256, 256, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(256, 1024, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (4): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(1024, 256, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(256, 256, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(256, 1024, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (5): ResBlock( + (1-5): 5 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(1024, 256, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) (norm_a): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -472,59 +345,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(128, 32, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(32, 32, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(32, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(128, 32, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(32, 32, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(32, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (3): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(128, 32, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(32, 32, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(32, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (4): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(128, 32, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(32, 32, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(32, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (5): ResBlock( + (1-5): 5 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(128, 32, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) (norm_a): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -565,20 +386,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(2048, 512, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(512, 512, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(512, 2048, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( + (1-2): 2 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(2048, 512, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) (norm_a): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -610,20 +418,7 @@ each action. Read more about the dataset and the paper ) (activation): ReLU() ) - (1): ResBlock( - (branch2): BottleneckBlock( - (conv_a): Conv3d(256, 64, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) - (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_a): ReLU() - (conv_b): Conv3d(64, 64, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False) - (norm_b): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (act_b): ReLU() - (conv_c): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False) - (norm_c): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - ) - (activation): ReLU() - ) - (2): ResBlock( + (1-2): 2 x ResBlock( (branch2): BottleneckBlock( (conv_a): Conv3d(256, 64, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False) (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) @@ -914,8 +709,8 @@ inference using the same. The top 5 predictions can be seen below. Predicted labels: archery, throwing axe, playing paintball, golf driving, riding or walking with horse -Export to ONNX -############################################################################################################################### +Export to ONNX +-------------------------------------------------------- Now that we have obtained our trained model and checked inference with it, we export the PyTorch model to Open Neural Network Exchange(ONNX) @@ -938,8 +733,8 @@ quantization. export_params=True, ) -Convert ONNX to OpenVINO Intermediate Representation -############################################################################################################################### +Convert ONNX to OpenVINO Intermediate Representation +---------------------------------------------------------------------------------------------- Now that our ONNX model is ready, we can convert it to IR format. In this format, the network is represented using two files: an ``xml`` file @@ -970,10 +765,10 @@ using the ``weights`` parameter. # read converted model conv_model = core.read_model(str(IR_PATH)) -Select inference device -############################################################################################################################### +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -1002,8 +797,8 @@ Select device from dropdown list for running inference using OpenVINO: # load model on device compiled_model = core.compile_model(model=conv_model, device_name=device.value) -Verify Model Inference -############################################################################################################################### +Verify Model Inference +---------------------------------------------------------------- Using the compiled model, we run inference on the same sample video and print the top 5 predictions again. diff --git a/docs/notebooks/211-speech-to-text-with-output.rst b/docs/notebooks/211-speech-to-text-with-output.rst index 424c1983689e0d..6c2725f74baa62 100644 --- a/docs/notebooks/211-speech-to-text-with-output.rst +++ b/docs/notebooks/211-speech-to-text-with-output.rst @@ -12,39 +12,46 @@ available from `Open Model Zoo `__. **Table of contents:** +--- - `Imports <#imports>`__ -- `Settings <#settings>`__ -- `Download and Convert Public Model <#download-and-convert-public-model>`__ - - `Download Model <#download-model>`__ - - `Convert Model <#convert-model>`__ +- `Settings <#settings>`__ +- `Download and Convert Public + Model <#download-and-convert-public-model>`__ -- `Audio Processing <#audio-processing>`__ + - `Download Model <#download-model>`__ + - `Convert Model <#convert-model>`__ - - `Define constants <#define-constants>`__ - - `Available Audio Formats <#available-audio-formats>`__ - - `Load Audio File <#load-audio-file>`__ - - `Visualize Audio File <#visualize-audio-file>`__ - - `Change Type of Data <#change-type-of-data>`__ - - `Convert Audio to Mel Spectrum <#convert-audio-to-mel-spectrum>`__ - - `Run Conversion from Audio to Mel Format <#run-conversion-from-audio-to-mel-format>`__ - - `Visualize Mel Spectrogram <#visualize-mel-spectrogram>`__ - - `Adjust Mel scale to Input <#adjust-mel-scale-to-input>`__ +- `Audio Processing <#audio-processing>`__ -- `Load the Model <#load-the-model>`__ + - `Define constants <#define-constants>`__ + - `Available Audio Formats <#available-audio-formats>`__ + - `Load Audio File <#load-audio-file>`__ + - `Visualize Audio File <#visualize-audio-file>`__ + - `Change Type of Data <#change-type-of-data>`__ + - `Convert Audio to Mel + Spectrum <#convert-audio-to-mel-spectrum>`__ + - `Run Conversion from Audio to Mel + Format <#run-conversion-from-audio-to-mel-format>`__ + - `Visualize Mel Spectrogram <#visualize-mel-spectrogram>`__ + - `Adjust Mel scale to Input <#adjust-mel-scale-to-input>`__ - - `Do Inference <#do-inference>`__ - - `Read Output <#read-output>`__ - - `Implementation of Decoding <#implementation-of-decoding>`__ - - `Run Decoding and Print Output <#run-decoding-and-print-output>`__ +- `Load the Model <#load-the-model>`__ -Imports -############################################################################################################################### + - `Do Inference <#do-inference>`__ + - `Read Output <#read-output>`__ + - `Implementation of + Decoding <#implementation-of-decoding>`__ + - `Run Decoding and Print + Output <#run-decoding-and-print-output>`__ + +Imports +------------------------------------------------- .. code:: ipython3 - !pip install -q "librosa>=0.8.1" "openvino-dev==2023.1.0.dev20230811" "onnx" + %pip install -q "librosa>=0.8.1" "openvino-dev>=2023.1.0" "numpy<1.24" .. code:: ipython3 @@ -60,9 +67,16 @@ Imports import numpy as np import scipy import openvino as ov + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file -Settings -############################################################################################################################### +Settings +-------------------------------------------------- In this part, all variables used in the notebook are set. @@ -70,20 +84,20 @@ In this part, all variables used in the notebook are set. model_folder = "model" download_folder = "output" - data_folder = "../data" + data_folder = "data" precision = "FP16" model_name = "quartznet-15x5-en" -Download and Convert Public Model -############################################################################################################################### +Download and Convert Public Model +--------------------------------------------------------------------------- If it is your first run, models will be downloaded and converted here. It my take a few minutes. Use ``omz_downloader`` and ``omz_converter``, which are command-line tools from the ``openvino-dev`` package. -Download Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``omz_downloader`` tool automatically creates a directory structure and downloads the selected model. This step is skipped if the model is @@ -100,15 +114,8 @@ Representation (OpenVINO IR). if not path_to_model_weights.is_dir() or len(downloaded_model_file) == 0: download_command = f"omz_downloader --name {model_name} --output_dir {download_folder} --precision {precision}" ! $download_command - -Convert Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -In previous step, model was downloaded in PyTorch format. Currently, -PyTorch models supported in OpenVINO via ONNX exporting, -``torch.onnx.export`` function helps to trace PyTorch model to ONNX and -save it on disk. It is also recommended to convert model to OpenVINO -Intermediate Representation format for applying optimizations. + + sys.path.insert(0, str(path_to_model_weights)) .. code:: ipython3 @@ -116,7 +123,7 @@ Intermediate Representation format for applying optimizations. """ helper function for converting QuartzNet model to IR The function accepts path to directory with dowloaded packages, weights and configs using OMZ downloader, - initialize model, export it to ONNX and then convert to OpenVINO model and serialize it to IR. + initialize model and convert to OpenVINO model and serialize it to IR. Params: model_path: path to model modules, weights and configs downloaded via omz_downloader converted_model_path: path for saving converted model @@ -192,21 +199,10 @@ Intermediate Representation format for applying optimizations. model = QuartzNet(model_config, encoder_weights, decoder_weights) # turn model to inference mode model.eval() - # export model to ONNX with preserving dynamic shapes - onnx_model_path = model_path / "quartznet.onnx" - torch.onnx.export( - model, - torch.zeros([1, 64, 128]), - onnx_model_path, - opset_version=11, - input_names=["audio_signal"], - output_names=['output'], - dynamic_axes={"audio_signal": {0: "batch_size", 2: "wave_len"}, "output": {0: "batch_size", 2: "wave_len"}} - ) # convert model to OpenVINO Model using model conversion API - ov_model = ov.convert_model(str(onnx_model_path)) + ov_model = ov.convert_model(model, example_input=torch.zeros([1, 64, 128])) # save model in IR format for next usage - ov.save_model(ov_model, str(converted_model_path)) + ov.save_model(ov_model, converted_model_path) .. code:: ipython3 @@ -218,13 +214,29 @@ Intermediate Representation format for applying optimizations. downloaded_model_path = Path("output/public/quartznet-15x5-en/models") convert_model(downloaded_model_path, path_to_converted_model) -Audio Processing -############################################################################################################################### + +.. parsed-literal:: + + [NeMo W 2023-09-11 15:01:17 jasper:148] Turned off 170 masked convolutions + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + [NeMo W 2023-09-11 15:01:18 deprecated:66] Function ``local_parameters`` is deprecated. It is going to be removed in the 0.11 version. + + +Audio Processing +---------------------------------------------------------- Now that the model is converted, load an audio file. -Define constants -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Define constants +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ First, locate an audio file and define the alphabet used by the model. This tutorial uses the Latin alphabet beginning with a space symbol and @@ -236,8 +248,8 @@ could be any other character. audio_file_name = "edge_to_cloud.ogg" alphabet = " abcdefghijklmnopqrstuvwxyz'~" -Available Audio Formats -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Available Audio Formats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are multiple supported audio formats that can be used with the model: @@ -247,8 +259,8 @@ model: ``RF64``, ``SD2``, ``SDS``, ``IRCAM``, ``VOC``, ``W64``, ``WAV``, ``NIST``, ``WAVEX``, ``WVE``, ``XI`` -Load Audio File -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load Audio File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Load the file after checking a file extension. Pass ``sr`` (stands for a ``sampling rate``) as an additional parameter. The model supports files @@ -256,7 +268,13 @@ with a ``sampling rate`` of 16 kHz. .. code:: ipython3 - audio, sampling_rate = librosa.load(path=f'{data_folder}/audio/{audio_file_name}', sr=16000) + # Download the audio from the openvino_notebooks storage + file_name = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/audio/" + audio_file_name, + directory=data_folder + ) + + audio, sampling_rate = librosa.load(path=str(file_name), sr=16000) Now, you can play your audio file. @@ -271,15 +289,15 @@ Now, you can play your audio file. -Visualize Audio File -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Visualize Audio File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can visualize how your audio file presents on a wave plot and spectrogram. @@ -295,15 +313,8 @@ spectrogram. librosa.display.specshow(specto_audio, sr=sampling_rate, x_axis='time', y_axis='hz'); -.. parsed-literal:: - - /tmp/ipykernel_2115037/2518307745.py:2: FutureWarning: waveshow() keyword argument 'x_axis' has been renamed to 'axis' in version 0.10.0. - This alias will be removed in version 1.0. - librosa.display.waveshow(y=audio, sr=sampling_rate, max_points=50000, x_axis='time', offset=0.0); - - -.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_1.png +.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_0.png .. parsed-literal:: @@ -312,11 +323,11 @@ spectrogram. -.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_3.png +.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_2.png -Change Type of Data -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Change Type of Data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The file loaded in the previous step may contain data in ``float`` type with a range of values between -1 and 1. To generate a viable input, @@ -329,8 +340,8 @@ multiply each value by the max value of ``int16`` and convert it to audio = (audio * (2**15 - 1)) audio = audio.astype(np.int16) -Convert Audio to Mel Spectrum -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert Audio to Mel Spectrum +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Next, convert the pre-pre-processed audio to `Mel Spectrum `__. @@ -369,8 +380,8 @@ article `__. @@ -379,8 +390,8 @@ scale `__. mel_basis, spec = audio_to_mel(audio=audio.flatten(), sampling_rate=sampling_rate) -Visualize Mel Spectrogram -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Visualize Mel Spectrogram +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For more information about Mel spectrogram, refer to this `article `__. @@ -396,15 +407,15 @@ presents filter bank for converting Hz to Mels. -.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_0.png +.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_0.png -.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_1.png +.. image:: 211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_1.png -Adjust Mel scale to Input -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Adjust Mel scale to Input +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Before reading the network, make sure that the input is ready. @@ -412,8 +423,8 @@ Before reading the network, make sure that the input is ready. audio = mel_to_input(mel_basis=mel_basis, spec=spec) -Load the Model -############################################################################################################################### +Load the Model +-------------------------------------------------------- Now, you can read and load the network. @@ -453,6 +464,15 @@ Select device from dropdown list device + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + .. code:: ipython3 model = core.read_model( @@ -464,20 +484,18 @@ Select device from dropdown list model.reshape({model_input_layer: shape}) compiled_model = core.compile_model(model=model, device_name=device.value) -Do Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Everything is set up. Now, the only thing that remains is passing input to the previously loaded network and running inference. .. code:: ipython3 - output_layer_ir = compiled_model.output(0) - - character_probabilities = compiled_model([ov.Tensor(audio)])[output_layer_ir] + character_probabilities = compiled_model([ov.Tensor(audio)])[0] -Read Output -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Read Output +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After inference, you need to reach out the output. The default output format for ``QuartzNet 15x5`` are per-frame probabilities (after @@ -506,8 +524,8 @@ The last step is getting symbols from corresponding indexes in charlist. # Run argmax to pick most possible symbols character_probabilities = np.argmax(character_probabilities, axis=1) -Implementation of Decoding -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Implementation of Decoding +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To decode previously explained output, you need the `Connectionist Temporal Classification (CTC) @@ -525,8 +543,8 @@ function. This solution will remove consecutive letters from the output. previous_letter_id = letter_index return ''.join(transcription) -Run Decoding and Print Output -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run Decoding and Print Output +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_0.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_0.png new file mode 100644 index 00000000000000..4f676d7b16d948 --- /dev/null +++ b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657c9b26560f22401566023e5eac946ee9f3c6dbc32519f4bb0f3b61a75f21fb +size 21796 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_2.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_2.png new file mode 100644 index 00000000000000..6c8b8f4193c9b9 --- /dev/null +++ b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_20_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27930a9a6c4312452039729fe8664861399e2b785455d3ca92c6465ad0a0676b +size 85260 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_1.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_1.png deleted file mode 100644 index f9616580554a87..00000000000000 --- a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e27e4c367faece74f94169c8b40f3c54cb16ff9065dd30df00eefd540de9eaa -size 21971 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_3.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_3.png deleted file mode 100644 index d04fc3dcdc2cca..00000000000000 --- a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_21_3.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26bf520a2a210265b9b362dfb8eb4c98613a1d3e202a17dd8e6af07a99d785eb -size 87067 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_0.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_0.png new file mode 100644 index 00000000000000..168d3c8ebc15c4 --- /dev/null +++ b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e60e6c532cc6f5207444526b886985020c7593d4394a1fd590bf1b1b34a3cba +size 50178 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_1.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_1.png new file mode 100644 index 00000000000000..363c29cf77b5fc --- /dev/null +++ b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_28_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78aafab4432d290ad542ffeb57ed0669c94999753e265c02429d8f705f381947 +size 10083 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_0.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_0.png deleted file mode 100644 index 35098d0ac80a99..00000000000000 --- a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e659ba9a820760d48109eeb03a2692a1db921aa2f4b1081903159f8f9710bfb4 -size 50625 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_1.png b/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_1.png deleted file mode 100644 index 1442b74fe9b132..00000000000000 --- a/docs/notebooks/211-speech-to-text-with-output_files/211-speech-to-text-with-output_29_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a3869a2b00c3d2b86885bea76d84243834cfa4cbbf796e71c830323ceb8248d4 -size 10083 diff --git a/docs/notebooks/211-speech-to-text-with-output_files/index.html b/docs/notebooks/211-speech-to-text-with-output_files/index.html index 19fc8b722bf994..d87f4ec902390b 100644 --- a/docs/notebooks/211-speech-to-text-with-output_files/index.html +++ b/docs/notebooks/211-speech-to-text-with-output_files/index.html @@ -1,10 +1,10 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/211-speech-to-text-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/211-speech-to-text-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/211-speech-to-text-with-output_files/


../
-211-speech-to-text-with-output_21_1.png            16-Aug-2023 01:31               21971
-211-speech-to-text-with-output_21_3.png            16-Aug-2023 01:31               87067
-211-speech-to-text-with-output_29_0.png            16-Aug-2023 01:31               50625
-211-speech-to-text-with-output_29_1.png            16-Aug-2023 01:31               10083
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/211-speech-to-text-with-output_files/


../
+211-speech-to-text-with-output_20_0.png            31-Oct-2023 00:35               21796
+211-speech-to-text-with-output_20_2.png            31-Oct-2023 00:35               85260
+211-speech-to-text-with-output_28_0.png            31-Oct-2023 00:35               50178
+211-speech-to-text-with-output_28_1.png            31-Oct-2023 00:35               10083
 

diff --git a/docs/notebooks/212-pyannote-speaker-diarization-with-output.rst b/docs/notebooks/212-pyannote-speaker-diarization-with-output.rst index 78ea66c42e7ee4..c57a004003e4ad 100644 --- a/docs/notebooks/212-pyannote-speaker-diarization-with-output.rst +++ b/docs/notebooks/212-pyannote-speaker-diarization-with-output.rst @@ -39,39 +39,44 @@ card `__, **Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Prepare pipeline <#prepare-pipeline>`__ -- `Load test audio file <#load-test-audio-file>`__ -- `Run inference pipeline <#run-inference-pipeline>`__ -- `Convert model to OpenVINO Intermediate Representation format <#convert-model-to-openvino-intermediate-representation-format>`__ -- `Select inference device <#select-inference-device>`__ -- `Replace segmentation model with OpenVINO <#replace-segmentation-model-with-openvino>`__ -- `Run speaker diarization with OpenVINO <#run-speaker-diarization-with-openvino>`__ - -Prerequisites -############################################################################################################################### + +- `Prerequisites <#prerequisites>`__ +- `Prepare pipeline <#prepare-pipeline>`__ +- `Load test audio file <#load-test-audio-file>`__ +- `Run inference pipeline <#run-inference-pipeline>`__ +- `Convert model to OpenVINO Intermediate Representation + format <#convert-model-to-openvino-intermediate-representation-format>`__ +- `Select inference device <#select-inference-device>`__ +- `Replace segmentation model with + OpenVINO <#replace-segmentation-model-with-openvino>`__ +- `Run speaker diarization with + OpenVINO <#run-speaker-diarization-with-openvino>`__ + +Prerequisites +------------------------------------------------------- .. code:: ipython3 - !pip install -q -r requirements.txt + %pip install -q -r requirements.txt .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - onnx 1.14.1 requires protobuf>=3.20.2, but you have protobuf 3.20.1 which is incompatible. + onnx 1.15.0 requires protobuf>=3.20.2, but you have protobuf 3.20.1 which is incompatible. onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 3.20.1 which is incompatible. - paddlepaddle 2.5.0rc0 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.20.1 which is incompatible. - ppgan 2.1.0 requires imageio==2.9.0, but you have imageio 2.31.3 which is incompatible. + paddlepaddle 2.5.2 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.20.1 which is incompatible. + ppgan 2.1.0 requires imageio==2.9.0, but you have imageio 2.31.6 which is incompatible. ppgan 2.1.0 requires librosa==0.8.1, but you have librosa 0.9.2 which is incompatible. - ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.0.76 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.1 which is incompatible. + ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.1.78 which is incompatible. + tensorflow 2.13.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.1 which is incompatible. tf2onnx 1.15.1 requires protobuf~=3.20.2, but you have protobuf 3.20.1 which is incompatible. - + Note: you may need to restart the kernel to use updated packages. -Prepare pipeline -############################################################################################################################### + +Prepare pipeline +---------------------------------------------------------- Traditional Speaker Diarization systems can be generalized into a five-step process: @@ -113,9 +118,7 @@ method by providing a path to the directory with pipeline configuration or identification from `HuggingFace hub `__. -.. note:: - - This tutorial uses a non-official version of model + **Note**: This tutorial uses a non-official version of model ``philschmid/pyannote-speaker-diarization-endpoint``, provided only for demo purposes. The original model (``pyannote/speaker-diarization``) requires you to accept the model @@ -147,17 +150,8 @@ hub `__. pipeline = Pipeline.from_pretrained("philschmid/pyannote-speaker-diarization-endpoint") - -.. parsed-literal:: - - 2023-09-08 23:36:40.468953: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:36:40.503440: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:36:41.110289: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Load test audio file -############################################################################################################################### +Load test audio file +-------------------------------------------------------------- .. code:: ipython3 @@ -212,8 +206,8 @@ Load test audio file .. image:: 212-pyannote-speaker-diarization-with-output_files/212-pyannote-speaker-diarization-with-output_9_1.png -Run inference pipeline -############################################################################################################################### +Run inference pipeline +---------------------------------------------------------------- For running inference, we should provide a path to input audio to the pipeline @@ -234,7 +228,7 @@ pipeline .. parsed-literal:: - Diarization pipeline took 15.75 s + Diarization pipeline took 15.21 s The result of running the pipeline can be represented as a diagram @@ -273,8 +267,8 @@ We can also print each time frame and corresponding speaker: start=27.8s stop=29.5s speaker_SPEAKER_02 -Convert model to OpenVINO Intermediate Representation format -############################################################################################################################### +Convert model to OpenVINO Intermediate Representation format +------------------------------------------------------------------------------------------------------ For best results with OpenVINO, it is recommended to convert the model to OpenVINO IR format. OpenVINO supports PyTorch via ONNX conversion. We @@ -290,18 +284,17 @@ with ``openvino.runtime.serialize``. from pathlib import Path import torch - from openvino.tools import mo - from openvino.runtime import serialize, Core + import openvino as ov - core = Core() + core = ov.Core() ov_speaker_segmentation_path = Path("pyannote-segmentation.xml") if not ov_speaker_segmentation_path.exists(): onnx_path = ov_speaker_segmentation_path.with_suffix(".onnx") torch.onnx.export(pipeline._segmentation.model, torch.zeros((1, 1, 80000)), onnx_path, input_names=["chunks"], output_names=["outputs"], dynamic_axes={"chunks": {0: "batch_size", 2: "wave_len"}}) - ov_speaker_segmentation = mo.convert_model(onnx_path, compress_to_fp16=True) - serialize(ov_speaker_segmentation, str(ov_speaker_segmentation_path)) + ov_speaker_segmentation = ov.convert_model(onnx_path) + ov.save_model(ov_speaker_segmentation, str(ov_speaker_segmentation_path)) print(f"Model successfully converted to IR and saved to {ov_speaker_segmentation_path}") else: ov_speaker_segmentation = core.read_model(ov_speaker_segmentation_path) @@ -313,10 +306,10 @@ with ``openvino.runtime.serialize``. Model successfully converted to IR and saved to pyannote-segmentation.xml -Select inference device -############################################################################################################################### +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -340,14 +333,12 @@ Select device from dropdown list for running inference using OpenVINO: -Replace segmentation model with OpenVINO -############################################################################################################################### +Replace segmentation model with OpenVINO +---------------------------------------------------------------------------------- .. code:: ipython3 - from openvino.runtime import Core - - core = Core() + core = ov.Core() ov_seg_model = core.compile_model(ov_speaker_segmentation, device.value) infer_request = ov_seg_model.create_infer_request() @@ -372,8 +363,8 @@ Replace segmentation model with OpenVINO pipeline._segmentation.infer = infer_segm -Run speaker diarization with OpenVINO -############################################################################################################################### +Run speaker diarization with OpenVINO +------------------------------------------------------------------------------- .. code:: ipython3 @@ -388,7 +379,7 @@ Run speaker diarization with OpenVINO .. parsed-literal:: - Diarization pipeline took 15.15 s + Diarization pipeline took 14.49 s .. code:: ipython3 @@ -410,9 +401,9 @@ Run speaker diarization with OpenVINO .. parsed-literal:: - start=6.7s stop=7.1s speaker_SPEAKER_02 - start=7.6s stop=8.3s speaker_SPEAKER_00 - start=8.3s stop=10.0s speaker_SPEAKER_02 + start=6.7s stop=7.1s speaker_SPEAKER_00 + start=7.6s stop=8.6s speaker_SPEAKER_00 + start=8.6s stop=10.0s speaker_SPEAKER_02 start=9.8s stop=11.0s speaker_SPEAKER_00 start=10.6s stop=14.7s speaker_SPEAKER_02 start=14.3s stop=17.9s speaker_SPEAKER_01 diff --git a/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/212-pyannote-speaker-diarization-with-output_27_0.png b/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/212-pyannote-speaker-diarization-with-output_27_0.png index 393e9231034ff7..785fa693460f1b 100644 --- a/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/212-pyannote-speaker-diarization-with-output_27_0.png +++ b/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/212-pyannote-speaker-diarization-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e871c37b621a03d98fd4a99ffa6da32095fb3841234189f196be5cefcdbec98 -size 7960 +oid sha256:35c3eadc0eece1105b80f1041f7207593787e04fbd8b14125ac9ccd91aac0211 +size 7969 diff --git a/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/index.html b/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/index.html index 620ab266219afb..be3ff93140e94b 100644 --- a/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/index.html +++ b/docs/notebooks/212-pyannote-speaker-diarization-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/212-pyannote-speaker-diarization-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/212-pyannote-speaker-diarization-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/212-pyannote-speaker-diarization-with-output_files/


../
-212-pyannote-speaker-diarization-with-output_14..> 16-Aug-2023 01:31                7969
-212-pyannote-speaker-diarization-with-output_27..> 16-Aug-2023 01:31                7960
-212-pyannote-speaker-diarization-with-output_9_..> 16-Aug-2023 01:31               43095
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/212-pyannote-speaker-diarization-with-output_files/


../
+212-pyannote-speaker-diarization-with-output_14..> 31-Oct-2023 00:35                7969
+212-pyannote-speaker-diarization-with-output_27..> 31-Oct-2023 00:35                7969
+212-pyannote-speaker-diarization-with-output_9_..> 31-Oct-2023 00:35               43095
 

diff --git a/docs/notebooks/213-question-answering-with-output.rst b/docs/notebooks/213-question-answering-with-output.rst index 18951ef96c1162..08a51c84a0b11a 100644 --- a/docs/notebooks/213-question-answering-with-output.rst +++ b/docs/notebooks/213-question-answering-with-output.rst @@ -7,53 +7,93 @@ model `__. Final part -of this notebook provides live inference results from your inputs. +of this notebook provides live inference results from your inputs. **Table of contents:** +--- + +- `Imports <#imports>`__ +- `The model <#the-model>`__ +- `Download the model <#download-the-model>`__ +- `Load the model <#load-the-model>`__ +- `Select inference device <#select-inference-device>`__ +- `Processing <#processing>`__ +- `Preprocessing <#preprocessing>`__ +- `Postprocessing <#postprocessing>`__ +- `Main Processing Function <#main-processing-function>`__ +- `Run <#run>`__ +- `Run on local paragraphs <#run-on-local-paragraphs>`__ +- `Run on websites <#run-on-websites>`__ -- `Imports <#imports>`__ -- `The model <#the-model>`__ +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" - - `Download the model <#download-the-model>`__ - - `Load the model <#load-the-model>`__ - - - `Select inference device <#select-inference-device>`__ -- `Processing <#processing>`__ +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. - - `Preprocessing <#preprocessing>`__ - - `Postprocessing <#postprocessing>`__ - - `Main Processing Function <#main-processing-function>`__ - -- `Run <#Run>`__ - - `Run on local paragraphs <#run-on-local-paragraphs>`__ - - `Run on websites <#run-on-websites>`__ +Imports +------------------------------------------------- -Imports -############################################################################################################################### +.. code:: ipython3 + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + + from notebook_utils import download_file .. code:: ipython3 import operator import time from urllib import parse + from pathlib import Path import numpy as np - from openvino.runtime import Core + import openvino as ov + + download_file( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/213-question-answering/html_reader.py', + filename='html_reader.py' + ) import html_reader as reader + + download_file( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/213-question-answering/tokens_bert.py', + filename='tokens_bert.py' + ) import tokens_bert as tokens -The model -############################################################################################################################### -Download the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -Use ``omz_downloader``, which is a command-line tool from the -``openvino-dev`` package. The ``omz_downloader`` tool automatically -creates a directory structure and downloads the selected model. If the +.. parsed-literal:: + + html_reader.py: 0%| | 0.00/635 [00:00`__. - Create an inference pipeline for grammatical error checking +- Optimize grammar correction pipeline with + `NNCF `__ quantization +- Compare original and optimized pipelines from performance and + accuracy standpoints **Table of contents:** -- `How does it work? <#how-does-it-work>`__ -- `Prerequisites <#prerequisites>`__ -- `Download and Convert Models <#download-and-convert-models>`__ - - `Select inference device <#select-inference-device>`__ - - `Grammar Checker <#grammar-checker>`__ - - `Grammar Corrector <#grammar-corrector>`__ +- `How does it work? <#how-does-it-work>`__ +- `Prerequisites <#prerequisites>`__ +- `Download and Convert + Models <#download-and-convert-models>`__ -- `Prepare Demo Pipeline <#prepare-demo-pipeline>`__ + - `Select inference device <#select-inference-device>`__ + - `Grammar Checker <#grammar-checker>`__ + - `Grammar Corrector <#grammar-corrector>`__ -How does it work? -############################################################################################################################### +- `Prepare Demo Pipeline <#prepare-demo-pipeline>`__ +- `Quantization <#quantization>`__ + + - `Run Quantization <#run-quantization>`__ + - `Compare model size, performance and + accuracy <#compare-model-size-performance-and-accuracy>`__ + +- `Interactive demo <#interactive-demo>`__ + +How does it work? +------------------------------------------------------------ A Grammatical Error Correction task can be thought of as a sequence-to-sequence task where a model is trained to take a @@ -103,8 +116,8 @@ documentation `__ Now that we know more about FLAN-T5 and RoBERTa, let us get started. 🚀 -Prerequisites -############################################################################################################################### +Prerequisites +-------------------------------------------------------- First, we need to install the `Hugging Face Optimum `__ library @@ -116,18 +129,18 @@ documentation `__. .. code:: ipython3 - !pip install -q "git+https://github.com/huggingface/optimum-intel.git" "openvino>=2023.0.0" onnx onnxruntime gradio + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" "openvino>=2023.1.0" onnx onnxruntime gradio + %pip install -q "git+https://github.com/openvinotoolkit/nncf.git@9c671f0ae0a118e4bc2de8b09e66425931c0bfa4" datasets jiwer .. parsed-literal:: - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. -Download and Convert Models -############################################################################################################################### +Download and Convert Models +---------------------------------------------------------------------- Optimum Intel can be used to load optimized models from the `Hugging Face Hub `__ and @@ -164,10 +177,10 @@ Tokenizer class and pipelines API are compatible with Optimum models. .. parsed-literal:: - 2023-07-17 14:43:08.812267: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-17 14:43:08.850959: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-09-27 14:53:36.462575: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-27 14:53:36.496914: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-17 14:43:09.468643: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-09-27 14:53:37.063292: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -177,21 +190,22 @@ Tokenizer class and pipelines API are compatible with Optimum models. .. parsed-literal:: - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - comet_ml is installed but `COMET_API_KEY` is not set. + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda-11.7' + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + warnings.warn( -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - from openvino.runtime import Core + import openvino as ov - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -207,12 +221,12 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') -Grammar Checker -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grammar Checker +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -229,8 +243,25 @@ Grammar Checker .. parsed-literal:: - Compiling the model... - Set CACHE_DIR to roberta-base-cola/model_cache + Framework not specified. Using pt to export to ONNX. + Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias'] + - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). + - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + Using framework PyTorch: 1.13.1+cpu + Overriding 1 configuration item(s) + - use_cache -> False + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + Compiling the model to CPU ... + Set CACHE_DIR to /tmp/tmpcqv99eqb/model_cache Let us check model work, using inference pipeline for @@ -248,12 +279,6 @@ Hugging Face inference pipelines in this print(f'predicted score: {result["score"] :.2}') -.. parsed-literal:: - - Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers - pip install xformers. - - .. parsed-literal:: input text: They are moved by salar energy @@ -263,8 +288,8 @@ Hugging Face inference pipelines in this Great! Looks like the model can detect errors in the sample. -Grammar Corrector -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grammar Corrector +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The steps for loading the Grammar Corrector model are very similar, except for the model class that is used. Because FLAN-T5 is a @@ -287,7 +312,6 @@ to run it. .. parsed-literal:: - The argument `from_transformers` is deprecated, and will be removed in optimum 2.0. Use `export` instead Framework not specified. Using pt to export to ONNX. Using framework PyTorch: 1.13.1+cpu Overriding 1 configuration item(s) @@ -295,18 +319,16 @@ to run it. Using framework PyTorch: 1.13.1+cpu Overriding 1 configuration item(s) - use_cache -> True - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/modeling_utils.py:850: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/modeling_utils.py:875: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if causal_mask.shape[1] < attention_mask.shape[1]: Using framework PyTorch: 1.13.1+cpu Overriding 1 configuration item(s) - use_cache -> True - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/models/t5/modeling_t5.py:507: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/models/t5/modeling_t5.py:509: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif past_key_value.shape[2] != key_value_states.shape[1]: - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - Compiling the encoder... - Compiling the decoder... - Compiling the decoder... + Compiling the encoder to AUTO ... + Compiling the decoder to AUTO ... + Compiling the decoder to AUTO ... .. code:: ipython3 @@ -320,6 +342,14 @@ to run it. print(f'generated text: {result["generated_text"]}') +.. parsed-literal:: + + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/optimum/intel/openvino/modeling_seq2seq.py:339: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + last_hidden_state = torch.from_numpy(self.request(inputs, shared_memory=True)["last_hidden_state"]).to( + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/optimum/intel/openvino/modeling_seq2seq.py:416: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + self.request.start_async(inputs, shared_memory=True) + + .. parsed-literal:: input text: They are moved by salar energy @@ -328,8 +358,8 @@ to run it. Nice! The result looks pretty good! -Prepare Demo Pipeline -############################################################################################################################### +Prepare Demo Pipeline +---------------------------------------------------------------- Now let us put everything together and create the pipeline for grammar correction. The pipeline accepts input text, verifies its correctness, @@ -455,7 +485,7 @@ Let us see it in action. .. code:: ipython3 print(f"input text: {default_text}\n") - print(f'generated text: {corrected_text}') + print(f'generated text: {corrected_text}') .. parsed-literal:: @@ -465,29 +495,260 @@ Let us see it in action. generated text: Most of the course is about the semantic content of language but there are also interesting topics to be learned from the service features except statistics in characters in documents. At this point, she introduces herself as a native English speaker and goes on to say that if you continue to work on social science, you will continue to be successful. -Interactive demo -############################################################################################################################### +Quantization +------------------------------------------------------- + +`NNCF `__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. + +Grammar checker model takes up a tiny portion of the whole text +correction pipeline so we optimize only the grammar corrector model. +Grammar corrector itself consists of three models: encoder, first call +decoder and decoder with past. The last model’s share of inference +dominates the other ones. Because of this we quantize only it. + +The optimization process contains the following steps: + +1. Create a calibration dataset for quantization. +2. Run ``nncf.quantize()`` to obtain quantized models. +3. Serialize the ``INT8`` model using ``openvino.save_model()`` + function. + +Please select below whether you would like to run quantization to +improve model inference speed. + +.. code:: ipython3 + + to_quantize = widgets.Checkbox( + value=True, + description='Quantization', + disabled=False, + ) + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +Run Quantization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Below we retrieve the quantized model. Please see ``utils.py`` for +source code. Quantization is relatively time-consuming and will take +some time to complete. + +.. code:: ipython3 + + from utils import get_quantized_pipeline + + grammar_corrector_pipe_fp32 = grammar_corrector_pipe + grammar_corrector_pipe_int8 = None + if to_quantize.value: + quantized_model_path = Path("quantized_decoder_with_past") / "openvino_model.xml" + grammar_corrector_pipe_int8 = get_quantized_pipeline(grammar_corrector_pipe_fp32, grammar_corrector_tokenizer, core, grammar_corrector_dir, + quantized_model_path, device.value) + + + +.. parsed-literal:: + + Collecting calibration data: 0%| | 0/10 [00:00`__\ +dataset is used for testing. One dataset sample consists of a text with +errors as input and several corrected versions as labels. When measuring +accuracy we use mean ``(1 - WER)`` against corrected text versions, +where WER is Word Error Rate metric. + +.. code:: ipython3 + + from utils import calculate_inference_time_and_accuracy + + TEST_SUBSET_SIZE = 50 + + if to_quantize.value: + inference_time_fp32, accuracy_fp32 = calculate_inference_time_and_accuracy(grammar_corrector_pipe_fp32, TEST_SUBSET_SIZE) + print(f"Evaluation results of FP32 grammar correction pipeline. Accuracy: {accuracy_fp32:.2f}%. Time: {inference_time_fp32:.2f} sec.") + inference_time_int8, accuracy_int8 = calculate_inference_time_and_accuracy(grammar_corrector_pipe_int8, TEST_SUBSET_SIZE) + print(f"Evaluation results of INT8 grammar correction pipeline. Accuracy: {accuracy_int8:.2f}%. Time: {inference_time_int8:.2f} sec.") + print(f"Performance speedup: {inference_time_fp32 / inference_time_int8:.3f}") + print(f"Accuracy drop :{accuracy_fp32 - accuracy_int8:.2f}%.") + print(f"Model footprint reduction: {model_size_fp32 / model_size_int8:.3f}") + + + +.. parsed-literal:: + + Evaluation: 0%| | 0/50 [00:00 + diff --git a/docs/notebooks/215-image-inpainting-with-output.rst b/docs/notebooks/215-image-inpainting-with-output.rst index 5c1a0da0682c49..c9f63034534f66 100644 --- a/docs/notebooks/215-image-inpainting-with-output.rst +++ b/docs/notebooks/215-image-inpainting-with-output.rst @@ -1,5 +1,5 @@ Image In-painting with OpenVINO™ --------------------------------- +================================ This notebook demonstrates how to use an image in-painting model with OpenVINO, using `GMCNN @@ -7,24 +7,31 @@ model `__ from `Open Model Zoo `__. This model, given a tampered image, is able to create something very similar to the original image. The Following pipeline will be used in this notebook. - |pipeline| **Table of contents:** -- `Download the Model <#download-the-model>`__ -- `Convert Tensorflow model to OpenVINO IR format <#convert-tensorflow-model-to-openvino-ir-format>`__ -- `Load the model <#load-the-model>`__ -- `Determine the input shapes of the model <#determine-the-input-shapes-of-the-model>`__ -- `Create a square mask <#create-a-square-mask>`__ -- `Load and Resize the Image <#load-and-resize-the-image>`__ -- `Generating the Masked Image <#generating-the-masked-image>`__ -- `Preprocessing <#preprocessing>`__ -- `Inference <#inference>`__ -- `Save the Restored Image <#save-the-restored-image>`__ + +- `Download the Model <#download-the-model>`__ +- `Convert Tensorflow model to OpenVINO IR + format <#convert-tensorflow-model-to-openvino-ir-format>`__ +- `Load the model <#load-the-model>`__ +- `Determine the input shapes of the + model <#determine-the-input-shapes-of-the-model>`__ +- `Create a square mask <#create-a-square-mask>`__ +- `Load and Resize the Image <#load-and-resize-the-image>`__ +- `Generating the Masked + Image <#generating-the-masked-image>`__ +- `Preprocessing <#preprocessing>`__ +- `Inference <#inference>`__ +- `Save the Restored Image <#save-the-restored-image>`__ .. |pipeline| image:: https://user-images.githubusercontent.com/4547501/165792473-ba784c0d-0a37-409f-a5f6-bb1849c1d140.png +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" "opencv-python" "matplotlib" + .. code:: ipython3 import sys @@ -39,8 +46,8 @@ original image. The Following pipeline will be used in this notebook. sys.path.append("../utils") import notebook_utils as utils -Download the Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Download ``gmcnn-places2-tf``\ model (this step will be skipped if the model is already downloaded) and then unzip it. Downloaded model stored @@ -71,8 +78,8 @@ be obtained from original model checkpoint can be found in this Already downloaded -Convert Tensorflow model to OpenVINO IR format -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert Tensorflow model to OpenVINO IR format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The pre-trained model is in TensorFlow format. To use it with OpenVINO, convert it to OpenVINO IR format with model conversion API. For more @@ -98,8 +105,8 @@ This step is also skipped if the model is already converted. model/public/ir/frozen_model.xml already exists. -Load the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, load the OpenVINO IR model and perform as follows: @@ -148,8 +155,8 @@ Only a few lines of code are required to run the model: input_layer = compiled_model.input(0) output_layer = compiled_model.output(0) -Determine the input shapes of the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Determine the input shapes of the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Note that both input shapes are the same. However, the second input has 1 channel (monotone). @@ -158,8 +165,8 @@ Note that both input shapes are the same. However, the second input has N, H, W, C = input_layer.shape -Create a square mask -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Create a square mask +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Next, create a single channeled mask that will be laid on top of the original image. @@ -198,11 +205,11 @@ original image. -.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_14_0.png +.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_15_0.png -Load and Resize the Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load and Resize the Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This image will be altered by using the mask. You can process any image you like. Just change the URL below. @@ -227,11 +234,11 @@ you like. Just change the URL below. -.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_16_0.png +.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_17_0.png -Generating the Masked Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Generating the Masked Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This multiplication of the image and the mask gives the result of the masked image layered on top of the original image. The ``masked_image`` @@ -246,11 +253,11 @@ will be the first input to the GMCNN model. -.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_18_0.png +.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_19_0.png -Preprocessing -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The model expects the input dimensions to be ``NHWC``. @@ -262,8 +269,8 @@ The model expects the input dimensions to be ``NHWC``. masked_image = masked_image[None, ...] mask = mask[None, ...] -Inference -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Do inference with the given masked image and the mask. Then, show the restored image. @@ -277,11 +284,11 @@ restored image. -.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_22_0.png +.. image:: 215-image-inpainting-with-output_files/215-image-inpainting-with-output_23_0.png -Save the Restored Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Save the Restored Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Save the restored image to the data directory to download it. diff --git a/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_14_0.png b/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_15_0.png similarity index 100% rename from docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_14_0.png rename to docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_15_0.png diff --git a/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_16_0.png b/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_17_0.png similarity index 100% rename from docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_16_0.png rename to docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_17_0.png diff --git a/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_18_0.png b/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_19_0.png similarity index 100% rename from docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_18_0.png rename to docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_19_0.png diff --git a/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_22_0.png b/docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_23_0.png similarity index 100% rename from docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_22_0.png rename to docs/notebooks/215-image-inpainting-with-output_files/215-image-inpainting-with-output_23_0.png diff --git a/docs/notebooks/215-image-inpainting-with-output_files/index.html b/docs/notebooks/215-image-inpainting-with-output_files/index.html index 1afb401da23091..ea839b329370ad 100644 --- a/docs/notebooks/215-image-inpainting-with-output_files/index.html +++ b/docs/notebooks/215-image-inpainting-with-output_files/index.html @@ -1,10 +1,10 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/215-image-inpainting-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/215-image-inpainting-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/215-image-inpainting-with-output_files/


../
-215-image-inpainting-with-output_14_0.png          16-Aug-2023 01:31               16155
-215-image-inpainting-with-output_16_0.png          16-Aug-2023 01:31              544222
-215-image-inpainting-with-output_18_0.png          16-Aug-2023 01:31              493354
-215-image-inpainting-with-output_22_0.png          16-Aug-2023 01:31              586544
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/215-image-inpainting-with-output_files/


../
+215-image-inpainting-with-output_15_0.png          31-Oct-2023 00:35               16155
+215-image-inpainting-with-output_17_0.png          31-Oct-2023 00:35              544222
+215-image-inpainting-with-output_19_0.png          31-Oct-2023 00:35              493354
+215-image-inpainting-with-output_23_0.png          31-Oct-2023 00:35              586544
 

diff --git a/docs/notebooks/216-attention-center-with-output.rst b/docs/notebooks/216-attention-center-with-output.rst deleted file mode 100644 index de3d74679a708a..00000000000000 --- a/docs/notebooks/216-attention-center-with-output.rst +++ /dev/null @@ -1,309 +0,0 @@ -The attention center model with OpenVINO™ -========================================= - - - -This notebook demonstrates how to use the `attention center -model `__ with -OpenVINO. This model is in the `TensorFlow Lite -format `__, which is supported in -OpenVINO now by TFLite frontend. - -Eye tracking is commonly used in visual neuroscience and cognitive -science to answer related questions such as visual attention and -decision making. Computational models that predict where to look have -direct applications to a variety of computer vision tasks. The attention -center model takes an RGB image as input and return a 2D point as -output. This 2D point is the predicted center of human attention on the -image i.e. the most salient part of images, on which people pay -attention fist to. This allows find the most visually salient regions -and handle it as early as possible. For example, it could be used for -the latest generation image format (such as `JPEG -XL `__), which supports encoding the -parts that you pay attention to fist. It can help to improve user -experience, image will appear to load faster. - -Attention center model architecture is: > The attention center model is -a deep neural net, which takes an image as input, and uses a pre-trained -classification network, e.g, ResNet, MobileNet, etc., as the backbone. -Several intermediate layers that output from the backbone network are -used as input for the attention center prediction module. These -different intermediate layers contain different information e.g., -shallow layers often contain low level information like -intensity/color/texture, while deeper layers usually contain higher and -more semantic information like shape/object. All are useful for the -attention prediction. The attention center prediction applies -convolution, deconvolution and/or resizing operator together with -aggregation and sigmoid function to generate a weighting map for the -attention center. And then an operator (the Einstein summation operator -in our case) can be applied to compute the (gravity) center from the -weighting map. An L2 norm between the predicted attention center and the -ground-truth attention center can be computed as the training loss. -Source: `Google AI blog -post `__. - -.. figure:: https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjxLCDJHzJNjB_von-vFlq8TJJFA41aB85T-QE3ZNxW8kshAf3HOEyIEJ4uggXjbJmZhsdj7j6i6mvvmXtyaxXJPm3JHuKILNRTPfX9KvICbFBRD8KNuDVmLABzYuhQci3BT2BqV-wM54IxaoAV1YDBbnpJC92UZfEBGvakLusiqND2AaPpWPr2gJV1/s1600/image4.png - :alt: drawing - - drawing - -The attention center model has been trained with images from the `COCO -dataset `__ annotated with saliency from -the `SALICON dataset `__. - -**Table of contents**: - -- `Imports <#imports>`__ -- `Download the attention-center model <#download-the-attention-center-model>`__ - - - `Convert Tensorflow Lite model to OpenVINO IR format <#convert-tensorflow-lite-model-to-openvino-ir-format>`__ - -- `Select inference device <#select-inference-device>`__ -- `Prepare image to use with attention-center model <#prepare-image-to-use-with-attention-center-model>`__ -- `Load input image <#load-input-image>`__ -- `Get result with OpenVINO IR model <#get-result-with-openvino-ir-model>`__ - -Imports -############################################################################################################################### - - -.. code:: ipython3 - - import cv2 - - import numpy as np - import tensorflow as tf - from pathlib import Path - import matplotlib.pyplot as plt - - from openvino.tools import mo - from openvino.runtime import serialize, Core - - -.. parsed-literal:: - - 2023-08-15 23:14:52.395540: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-15 23:14:52.429075: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-15 23:14:52.969814: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Download the attention-center model -############################################################################################################################### - - -Download the model as part of `attention-center -repo `__. The repo -include model in folder ``./model``. - -.. code:: ipython3 - - if not Path('./attention-center').exists(): - ! git clone https://github.com/google/attention-center - - -.. parsed-literal:: - - Cloning into 'attention-center'... - remote: Enumerating objects: 168, done. - remote: Counting objects: 100% (168/168), done. - remote: Compressing objects: 100% (132/132), done. - remote: Total 168 (delta 73), reused 114 (delta 28), pack-reused 0 - Receiving objects: 100% (168/168), 26.22 MiB | 4.18 MiB/s, done. - Resolving deltas: 100% (73/73), done. - - -Convert Tensorflow Lite model to OpenVINO IR format -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -The attention-center model is pre-trained model in TensorFlow Lite -format. In this Notebook the model will be converted to OpenVINO IR -format with Model Optimizer. This step will be skipped if the model have -already been converted. For more information about Model Optimizer, -please, see the `Model Optimizer Developer -Guide `__. - -Also TFLite models format is supported in OpenVINO by TFLite frontend, -so the model can be passed directly to ``core.read_model()``. You can -find example in -`002-openvino-api `__. - -.. code:: ipython3 - - tflite_model_path = Path("./attention-center/model/center.tflite") - - ir_model_path = Path("./model/ir_center_model.xml") - - core = Core() - - if not ir_model_path.exists(): - model = mo.convert_model(tflite_model_path) - serialize(model, ir_model_path.as_posix()) - print("IR model saved to {}".format(ir_model_path)) - else: - print("Read IR model from {}".format(ir_model_path)) - model = core.read_model(ir_model_path) - - -.. parsed-literal:: - - IR model saved to model/ir_center_model.xml - - -Select inference device -############################################################################################################################### - - -Select device from dropdown list for running inference using OpenVINO: - -.. code:: ipython3 - - import ipywidgets as widgets - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value='AUTO', - description='Device:', - disabled=False, - ) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - compiled_model = core.compile_model(model=model, device_name=device.value) - -Prepare image to use with attention-center model -############################################################################################################################### - - -The attention-center model takes an RGB image with shape (480, 640) as -input. - -.. code:: ipython3 - - class Image(): - def __init__(self, model_input_image_shape, image_path=None, image=None): - self.model_input_image_shape = model_input_image_shape - self.image = None - self.real_input_image_shape = None - - if image_path is not None: - self.image = cv2.imread(str(image_path)) - self.real_input_image_shape = self.image.shape - elif image is not None: - self.image = image - self.real_input_image_shape = self.image.shape - else: - raise Exception("Sorry, image can't be found, please, specify image_path or image") - - def prepare_image_tensor(self): - rgb_image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) - resized_image = cv2.resize(rgb_image, (self.model_input_image_shape[1], self.model_input_image_shape[0])) - - image_tensor = tf.constant(np.expand_dims(resized_image, axis=0), - dtype=tf.float32) - return image_tensor - - def scalt_center_to_real_image_shape(self, predicted_center): - new_center_y = round(predicted_center[0] * self.real_input_image_shape[1] / self.model_input_image_shape[1]) - new_center_x = round(predicted_center[1] * self.real_input_image_shape[0] / self.model_input_image_shape[0]) - return (int(new_center_y), int(new_center_x)) - - def draw_attention_center_point(self, predicted_center): - image_with_circle = cv2.circle(self.image, - predicted_center, - radius=10, - color=(3, 3, 255), - thickness=-1) - return image_with_circle - - def print_image(self, predicted_center=None): - image_to_print = self.image - if predicted_center is not None: - image_to_print = self.draw_attention_center_point(predicted_center) - - plt.imshow(cv2.cvtColor(image_to_print, cv2.COLOR_BGR2RGB)) - -Load input image -############################################################################################################################### - - -Upload input image using file loading button - -.. code:: ipython3 - - import ipywidgets as widgets - - load_file_widget = widgets.FileUpload( - accept="image/*", multiple=False, description="Image file", - ) - - load_file_widget - - - - -.. parsed-literal:: - - FileUpload(value=(), accept='image/*', description='Image file') - - - -.. code:: ipython3 - - import io - import PIL - # read uploaded image - image = PIL.Image.open(io.BytesIO(load_file_widget.value[-1]['content'])) if load_file_widget.value else PIL.Image.open("../data/image/coco.jpg") - image.convert("RGB") - - input_image = Image((480, 640), image=(np.ascontiguousarray(image)[:, :, ::-1]).astype(np.uint8)) - image_tensor = input_image.prepare_image_tensor() - input_image.print_image() - - -.. parsed-literal:: - - 2023-08-15 23:15:04.645356: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. - Skipping registering GPU devices... - - - -.. image:: 216-attention-center-with-output_files/216-attention-center-with-output_14_1.png - - -Get result with OpenVINO IR model -############################################################################################################################### - - -.. code:: ipython3 - - output_layer = compiled_model.output(0) - - # make inference, get result in input image resolution - res = compiled_model([image_tensor])[output_layer] - # scale point to original image resulution - predicted_center = input_image.scalt_center_to_real_image_shape(res[0]) - print(f'Prediction attention center point {predicted_center}') - input_image.print_image(predicted_center) - - -.. parsed-literal:: - - Prediction attention center point (292, 277) - - - -.. image:: 216-attention-center-with-output_files/216-attention-center-with-output_16_1.png - diff --git a/docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_14_1.png b/docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_14_1.png deleted file mode 100644 index 15c5ba574c1614..00000000000000 --- a/docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_14_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7511b8a4e5b047600d5fed14fbc7e9653a868bc5253abf1e0c3ef649b47bc408 -size 387941 diff --git a/docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_16_1.png b/docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_16_1.png deleted file mode 100644 index bbb436b42bb05f..00000000000000 --- a/docs/notebooks/216-attention-center-with-output_files/216-attention-center-with-output_16_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:64c208355dc80bf51dfa3d483e48848442c8f6fcbb9846fc8a8e0d2fb0f468c2 -size 387905 diff --git a/docs/notebooks/216-attention-center-with-output_files/index.html b/docs/notebooks/216-attention-center-with-output_files/index.html deleted file mode 100644 index 96d527c4c5f8a1..00000000000000 --- a/docs/notebooks/216-attention-center-with-output_files/index.html +++ /dev/null @@ -1,8 +0,0 @@ - -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/216-attention-center-with-output_files/ - -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/216-attention-center-with-output_files/


../
-216-attention-center-with-output_14_1.png          16-Aug-2023 01:31              387941
-216-attention-center-with-output_16_1.png          16-Aug-2023 01:31              387905
-

- diff --git a/docs/notebooks/217-vision-deblur-with-output.rst b/docs/notebooks/217-vision-deblur-with-output.rst index 0bedb5c4b6fe12..997f500ee1c802 100644 --- a/docs/notebooks/217-vision-deblur-with-output.rst +++ b/docs/notebooks/217-vision-deblur-with-output.rst @@ -3,23 +3,28 @@ Deblur Photos with DeblurGAN-v2 and OpenVINO™ **Table of contents:** -- `What is deblurring? <#what-is-deblurring?>`__ -- `Preparations <#preparations>`__ - - `Imports <#imports>`__ - - `Settings <#settings>`__ - - `Select inference device <#select-inference-device>`__ - - `Download DeblurGAN-v2 Model <#download-deblurgan-v2-model>`__ - - `Prepare model <#prepare-model>`__ - - `Convert DeblurGAN-v2 Model to OpenVINO IR format <#convert-deblurgan-v2-model-to-openvino-ir-format>`__ +- `What is deblurring? <#what-is-deblurring>`__ +- `Preparations <#preparations>`__ -- `Load the Model <#load-the-model>`__ -- `Deblur Image <#deblur-image>`__ + - `Imports <#imports>`__ + - `Settings <#settings>`__ + - `Select inference device <#select-inference-device>`__ + - `Download DeblurGAN-v2 + Model <#download-deblurgan-v-model>`__ + - `Prepare model <#prepare-model>`__ + - `Convert DeblurGAN-v2 Model to OpenVINO IR + format <#convert-deblurgan-v-model-to-openvino-ir-format>`__ - - `Load, resize and reshape input image <#load,-resize-and-reshape-input-image>`__ - - `Do Inference on the Input Image <#do-inference-on-the-input-image>`__ - - `Display results <#display-results>`__ - - `Save the deblurred image <#save-the-deblurred-image>`__ +- `Load the Model <#load-the-model>`__ +- `Deblur Image <#deblur-image>`__ + + - `Load, resize and reshape input + image <#load-resize-and-reshape-input-image>`__ + - `Do Inference on the Input + Image <#do-inference-on-the-input-image>`__ + - `Display results <#display-results>`__ + - `Save the deblurred image <#save-the-deblurred-image>`__ This tutorial demonstrates Single Image Motion Deblurring with DeblurGAN-v2 in OpenVINO, by first converting the @@ -28,8 +33,8 @@ model to OpenVINO Intermediate Representation (OpenVINO IR) format. For more information about the model, see the `documentation `__. -What is deblurring? -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +What is deblurring? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Deblurring is the task of removing motion blurs that usually occur in photos shot with hand-held cameras when there are moving objects in the @@ -44,11 +49,22 @@ better. =2023.1.0" + -Imports -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -64,8 +80,8 @@ Imports sys.path.append("../utils") from notebook_utils import load_image -Settings -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -80,10 +96,10 @@ Settings precision = "FP16" -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -109,8 +125,8 @@ Select device from dropdown list for running inference using OpenVINO: -Download DeblurGAN-v2 Model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Download DeblurGAN-v2 Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Model defined in `VITA-Group/DeblurGANv2 `__ @@ -165,8 +181,8 @@ Downloading deblurgan-v2… -Prepare model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Prepare model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DeblurGAN-v2 is PyTorch model for converting it to OpenVINO Intermediate Representation format, we should first instantiate model class and load @@ -197,8 +213,8 @@ checkpoint weights. out = (out + 1) / 2 return out -Convert DeblurGAN-v2 Model to OpenVINO IR format -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert DeblurGAN-v2 Model to OpenVINO IR format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For best results with OpenVINO, it is recommended to convert the model to OpenVINO IR format. To convert the PyTorch model, we will use model @@ -220,25 +236,8 @@ Model conversion may take a while. ov_model = ov.convert_model(deblur_gan_model, example_input=torch.ones((1,3,736,1312), dtype=torch.float32), input=[[1,3,736,1312]]) ov.save_model(ov_model, model_xml_path, compress_to_fp16=(precision == "FP16")) - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - WARNING:nncf:NNCF provides best results with torch==2.0.1, while current torch version is 1.13.1+cpu. If you encounter issues, consider switching to torch==2.0.1 - - -.. parsed-literal:: - - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - - -.. parsed-literal:: - - WARNING:nncf:You are using DataParallel, which may cause significant performance issues with dynamic graph building. Consider using distributed training (DistributedDataParallel) instead. - - -Load the Model -############################################################################################################################### +Load the Model +-------------------------------------------------------- Load and compile the DeblurGAN-v2 model in the OpenVINO Runtime with ``core.read_model`` and compile it for the specified device with @@ -281,11 +280,11 @@ shape for the model. -Deblur Image -############################################################################################################################### +Deblur Image +------------------------------------------------------ -Load, resize and reshape input image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Load, resize and reshape input image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The input image is read by using the default ``load_image`` function from ``notebooks.utils``. Then, resized to meet the network expected @@ -327,11 +326,11 @@ height, and ``W`` is the width. -.. image:: 217-vision-deblur-with-output_files/217-vision-deblur-with-output_24_0.png +.. image:: 217-vision-deblur-with-output_files/217-vision-deblur-with-output_25_0.png -Do Inference on the Input Image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Do Inference on the Input Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Do the inference, convert the result to an image shape and resize it to the original image size. @@ -355,11 +354,11 @@ the original image size. -.. image:: 217-vision-deblur-with-output_files/217-vision-deblur-with-output_27_0.png +.. image:: 217-vision-deblur-with-output_files/217-vision-deblur-with-output_28_0.png -Display results -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Display results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -376,11 +375,11 @@ Display results -.. image:: 217-vision-deblur-with-output_files/217-vision-deblur-with-output_29_0.png +.. image:: 217-vision-deblur-with-output_files/217-vision-deblur-with-output_30_0.png -Save the deblurred image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Save the deblurred image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Save the output image of the DeblurGAN-v2 model in the current directory. diff --git a/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_24_0.png b/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_25_0.png similarity index 100% rename from docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_24_0.png rename to docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_25_0.png diff --git a/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_27_0.png b/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_27_0.png deleted file mode 100644 index 16e424835869c9..00000000000000 --- a/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_27_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:91b0a8b3e8c6f8d5187ace312da0c646b53f1572ecee7c58522bfd2edcc3093b -size 223250 diff --git a/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_28_0.png b/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_28_0.png new file mode 100644 index 00000000000000..ec240f838c6491 --- /dev/null +++ b/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_28_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010b6bf8ce88a578f3030bad0a9f05efd67f0ba33dd2682694f1e6eeff41aa9c +size 223269 diff --git a/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_29_0.png b/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_29_0.png deleted file mode 100644 index 6de672fa1c0470..00000000000000 --- a/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_29_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d744b651cd25842af9c672dedac4a3e17bee6b411e7750be7ee931dbece6edd -size 768425 diff --git a/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_30_0.png b/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_30_0.png new file mode 100644 index 00000000000000..96962132175c52 --- /dev/null +++ b/docs/notebooks/217-vision-deblur-with-output_files/217-vision-deblur-with-output_30_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1787ead6f2d6321a5d23497ec3cf96def373a0b1f01f2f28811138a072a356f6 +size 768416 diff --git a/docs/notebooks/217-vision-deblur-with-output_files/index.html b/docs/notebooks/217-vision-deblur-with-output_files/index.html index 5eb34511562b2e..704c951833f4a6 100644 --- a/docs/notebooks/217-vision-deblur-with-output_files/index.html +++ b/docs/notebooks/217-vision-deblur-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/217-vision-deblur-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/217-vision-deblur-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/217-vision-deblur-with-output_files/


../
-217-vision-deblur-with-output_24_0.png             16-Aug-2023 01:31              220275
-217-vision-deblur-with-output_27_0.png             16-Aug-2023 01:31              223269
-217-vision-deblur-with-output_29_0.png             16-Aug-2023 01:31              768422
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/217-vision-deblur-with-output_files/


../
+217-vision-deblur-with-output_25_0.png             31-Oct-2023 00:35              220275
+217-vision-deblur-with-output_28_0.png             31-Oct-2023 00:35              223269
+217-vision-deblur-with-output_30_0.png             31-Oct-2023 00:35              768416
 

diff --git a/docs/notebooks/218-vehicle-detection-and-recognition-with-output.rst b/docs/notebooks/218-vehicle-detection-and-recognition-with-output.rst index 62d21115c128f0..5a30010faed42c 100644 --- a/docs/notebooks/218-vehicle-detection-and-recognition-with-output.rst +++ b/docs/notebooks/218-vehicle-detection-and-recognition-with-output.rst @@ -19,30 +19,45 @@ As a result, you can get: **Table of contents:** -- `Imports <#imports>`__ -- `Download Models <#download-models>`__ -- `Load Models <#load-models>`__ - - `Get attributes from model <#get-attributes-from-model>`__ - - `Helper function <#helper-function>`__ - - `Read and display a test image <#read-and-display-a-test-image>`__ +- `Imports <#imports>`__ +- `Download Models <#download-models>`__ +- `Load Models <#load-models>`__ -- `Use the Detection Model to Detect Vehicles <#use-the-detection-model-to-detect-vehicles>`__ + - `Get attributes from model <#get-attributes-from-model>`__ + - `Helper function <#helper-function>`__ + - `Read and display a test + image <#read-and-display-a-test-image>`__ - - `Detection Processing <#detection-processing>`__ - - `Recognize vehicle attributes <#recognize-vehicle-attributes>`__ +- `Use the Detection Model to Detect + Vehicles <#use-the-detection-model-to-detect-vehicles>`__ - - `Recognition processing <#recognition-processing>`__ + - `Detection Processing <#detection-processing>`__ + - `Recognize vehicle + attributes <#recognize-vehicle-attributes>`__ - - `Combine two models <#combine-two-models>`__ + - `Recognition processing <#recognition-processing>`__ + + - `Combine two models <#combine-two-models>`__ .. |flowchart| image:: https://user-images.githubusercontent.com/47499836/157867076-9e997781-f9ef-45f6-9a51-b515bbf41048.png -Imports -############################################################################################################################### +Imports +------------------------------------------------- Import the required modules. +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + + .. code:: ipython3 import os @@ -53,24 +68,19 @@ Import the required modules. import cv2 import numpy as np import matplotlib.pyplot as plt - from openvino.runtime import Core + import openvino as ov sys.path.append("../utils") import notebook_utils as utils -Download Models -############################################################################################################################### +Download Models +--------------------------------------------------------- -Use ``omz_downloader`` - a command-line tool from the ``openvino-dev`` -package. The ``omz_downloader`` tool automatically creates a directory -structure and downloads the selected model. This step is skipped if the -model is already downloaded. The selected model comes from the public -directory, which means it must be converted into OpenVINO Intermediate -Representation (OpenVINO IR). +Download pretrained models from +https://storage.openvinotoolkit.org/repositories/open_model_zoo. If the +model is already downloaded, this step is skipped. -.. note:: - - To change the model, replace the name of the model in the + **Note**: To change the model, replace the name of the model in the code below, for example to ``"vehicle-detection-0201"`` or ``"vehicle-detection-0202"``. Keep in mind that they support different image input sizes in detection. Also, you can change the @@ -84,59 +94,61 @@ Representation (OpenVINO IR). .. code:: ipython3 # A directory where the model will be downloaded. - base_model_dir = "model" + base_model_dir = Path("model") # The name of the model from Open Model Zoo. detection_model_name = "vehicle-detection-0200" recognition_model_name = "vehicle-attributes-recognition-barrier-0039" # Selected precision (FP32, FP16, FP16-INT8) precision = "FP32" + base_model_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1" + # Check if the model exists. - detection_model_path = ( - f"model/intel/{detection_model_name}/{precision}/{detection_model_name}.xml" + detection_model_url = ( + f"{base_model_url}/{detection_model_name}/{precision}/{detection_model_name}.xml" ) - recognition_model_path = ( - f"model/intel/{recognition_model_name}/{precision}/{recognition_model_name}.xml" + recognition_model_url = ( + f"{base_model_url}/{recognition_model_name}/{precision}/{recognition_model_name}.xml" ) + detection_model_path = (base_model_dir / detection_model_name).with_suffix('.xml') + recognition_model_path = (base_model_dir / recognition_model_name).with_suffix('.xml') # Download the detection model. - if not os.path.exists(detection_model_path): - download_command = f"omz_downloader " \ - f"--name {detection_model_name} " \ - f"--precision {precision} " \ - f"--output_dir {base_model_dir}" - ! $download_command + if not detection_model_path.exists(): + utils.download_file(detection_model_url, detection_model_name + '.xml', base_model_dir) + utils.download_file(detection_model_url.replace('.xml', '.bin'), detection_model_name + '.bin', base_model_dir) # Download the recognition model. if not os.path.exists(recognition_model_path): - download_command = f"omz_downloader " \ - f"--name {recognition_model_name} " \ - f"--precision {precision} " \ - f"--output_dir {base_model_dir}" - ! $download_command + utils.download_file(recognition_model_url, recognition_model_name + '.xml', base_model_dir) + utils.download_file(recognition_model_url.replace('.xml', '.bin'), recognition_model_name + '.bin', base_model_dir) + .. parsed-literal:: - ################|| Downloading vehicle-detection-0200 ||################ - - ========== Downloading model/intel/vehicle-detection-0200/FP32/vehicle-detection-0200.xml - - - ========== Downloading model/intel/vehicle-detection-0200/FP32/vehicle-detection-0200.bin - - - ################|| Downloading vehicle-attributes-recognition-barrier-0039 ||################ - - ========== Downloading model/intel/vehicle-attributes-recognition-barrier-0039/FP32/vehicle-attributes-recognition-barrier-0039.xml - - - ========== Downloading model/intel/vehicle-attributes-recognition-barrier-0039/FP32/vehicle-attributes-recognition-barrier-0039.bin - - + model/vehicle-detection-0200.xml: 0%| | 0.00/181k [00:00 Tuple: @@ -195,8 +207,8 @@ specified device. output_keys = compiled_model.output(0) return input_keys, output_keys, compiled_model -Get attributes from model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Get attributes from model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use ``input_keys.shape`` to get data shapes. @@ -214,8 +226,8 @@ Use ``input_keys.shape`` to get data shapes. # Get input size - Recognition. height_re, width_re = list(input_key_re.shape)[2:] -Helper function -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Helper function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``plt_show()`` function is used to show image. @@ -232,8 +244,8 @@ The ``plt_show()`` function is used to show image. plt.axis("off") plt.imshow(raw_image) -Read and display a test image -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Read and display a test image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The input shape of detection model is ``[1, 3, 256, 256]``. Therefore, you need to resize the image to ``256 x 256``, and expand the batch @@ -261,11 +273,11 @@ channel with ``expand_dims`` function. -.. image:: 218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_13_0.png +.. image:: 218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_14_0.png -Use the Detection Model to Detect Vehicles -############################################################################################################################### +Use the Detection Model to Detect Vehicles +------------------------------------------------------------------------------------ .. figure:: https://user-images.githubusercontent.com/47499836/157867076-9e997781-f9ef-45f6-9a51-b515bbf41048.png :alt: pipline @@ -296,8 +308,8 @@ Delete unused dims and filter out results that are not used. # Remove zero only boxes. boxes = boxes[~np.all(boxes == 0, axis=1)] -Detection Processing -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Detection Processing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With the function below, you change the ratio to the real position in the image and filter out low-confidence results. @@ -345,8 +357,8 @@ the image and filter out low-confidence results. # Find the position of a car. car_position = crop_images(image_de, resized_image_de, boxes) -Recognize vehicle attributes -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Recognize vehicle attributes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Select one of the detected boxes. Then, crop to an area containing a vehicle to test with the recognition model. Again, you need to resize @@ -365,10 +377,10 @@ the input image and run inference. -.. image:: 218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_20_0.png +.. image:: 218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_21_0.png -Recognition processing +Recognition processing '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' The result contains colors of the vehicles (white, gray, yellow, red, @@ -417,8 +429,8 @@ determine the maximum probability as the result. Attributes:('Gray', 'Car') -Combine two models -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Combine two models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Congratulations! You successfully used a detection model to crop an image with a vehicle and recognize the attributes of a vehicle. @@ -479,5 +491,5 @@ image with a vehicle and recognize the attributes of a vehicle. -.. image:: 218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_26_0.png +.. image:: 218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_27_0.png diff --git a/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_13_0.png b/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_14_0.png similarity index 100% rename from docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_13_0.png rename to docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_14_0.png diff --git a/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_20_0.png b/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_21_0.png similarity index 100% rename from docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_20_0.png rename to docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_21_0.png diff --git a/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_26_0.png b/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_27_0.png similarity index 100% rename from docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_26_0.png rename to docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/218-vehicle-detection-and-recognition-with-output_27_0.png diff --git a/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/index.html b/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/index.html index cdb0d2e6548ff9..59d3b0f11aeae4 100644 --- a/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/index.html +++ b/docs/notebooks/218-vehicle-detection-and-recognition-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/218-vehicle-detection-and-recognition-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/218-vehicle-detection-and-recognition-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/218-vehicle-detection-and-recognition-with-output_files/


../
-218-vehicle-detection-and-recognition-with-outp..> 16-Aug-2023 01:31              172680
-218-vehicle-detection-and-recognition-with-outp..> 16-Aug-2023 01:31               19599
-218-vehicle-detection-and-recognition-with-outp..> 16-Aug-2023 01:31              175941
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/218-vehicle-detection-and-recognition-with-output_files/


../
+218-vehicle-detection-and-recognition-with-outp..> 31-Oct-2023 00:35              172680
+218-vehicle-detection-and-recognition-with-outp..> 31-Oct-2023 00:35               19599
+218-vehicle-detection-and-recognition-with-outp..> 31-Oct-2023 00:35              175941
 

diff --git a/docs/notebooks/219-knowledge-graphs-conve-with-output.rst b/docs/notebooks/219-knowledge-graphs-conve-with-output.rst index db8720e971a376..6929be7701ee58 100644 --- a/docs/notebooks/219-knowledge-graphs-conve-with-output.rst +++ b/docs/notebooks/219-knowledge-graphs-conve-with-output.rst @@ -17,28 +17,37 @@ sample dataset can be downloaded from: https://github.com/TimDettmers/ConvE/tree/master/countries/countries_S1 **Table of contents:** +--- -- `Windows specific settings <#windows-specific-settings>`__ +- `Windows specific settings <#windows-specific-settings>`__ - `Import the packages needed for successful execution <#import-the-packages-needed-for-successful-execution>`__ +- `Settings: Including path to the serialized model files and input data files <#settings-including-path-to-the-serialized-model-files-and-input-data-files>`__ +- `Download Model Checkpoint <#download-model-checkpoint>`__ +- `Defining the ConvE model class <#defining-the-conve-model-class>`__ +- `Defining the dataloader <#defining-the-dataloader>`__ +- `Evaluate the trained ConvE model <#evaluate-the-trained-conve-model>`__ +- `Prediction on the Knowledge graph. <#prediction-on-the-knowledge-graph>`__ +- `Convert the trained PyTorch model to IR format for OpenVINO inference <#convert-the-trained-pytorch-model-to-ir-format-for-openvino-inference>`__ +- `Evaluate the model performance with OpenVINO <#evaluate-the-model-performance-with-openvino>`__ +- `Select inference device <#select-inference-device>`__ +- `Determine the platform specific speedup obtained through OpenVINO graph optimizations <#determine-the-platform-specific-speedup-obtained-through-openvino-graph-optimizations>`__ +- `Benchmark the converted OpenVINO model using benchmark app <#benchmark-the-converted-openvino-model-using-benchmark-app>`__ +- `Conclusions <#conclusions>`__ +- `References <#references>`__ - - `Settings: Including path to the serialized model files and input data files <#settings:-including-path-to-the-serialized-model-files-and-input-data-files>`__ - - `Download Model Checkpoint <#download-model-checkpoint>`__ - - `Defining the ConvE model class <#defining-the-conve-model-class>`__ - - `Defining the dataloader <#defining-the-dataloader>`__ - - `Evaluate the trained ConvE model <#evaluate-the-trained-conve-model>`__ - - `Prediction on the Knowledge graph. <#prediction-on-the-knowledge-graph>`__ - - `Convert the trained PyTorch model to ONNX format for OpenVINO inference <#convert-the-trained-pytorch-model-to-onnx-format-for-openvino-inference>`__ - - `Evaluate the model performance with OpenVINO <#evaluate-the-model-performance-with-openvino>`__ +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: -- `Select inference device <#select-inference-device>`__ + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. - - `Determine the platform specific speedup obtained through OpenVINO graph optimizations <#determine-the-platform-specific-speedup-obtained-through-openvino-graph-optimizations>`__ - - `Benchmark the converted OpenVINO model using benchmark app <#benchmark-the-converted-openvino-model-using-benchmark-app>`__ - - `Conclusions <#conclusions>`__ - - `References <#references>`__ -Windows specific settings -############################################################################################################################### +Windows specific settings +------------------------------------------------------------------- .. code:: ipython3 @@ -80,8 +89,8 @@ Windows specific settings os.environ["LIB"] = os.pathsep.join(b.library_dirs) print(f"Added {vs_dir} to PATH") -Import the packages needed for successful execution -############################################################################################################################### +Import the packages needed for successful execution +--------------------------------------------------------------------------------------------- .. code:: ipython3 @@ -96,31 +105,24 @@ Import the packages needed for successful execution from torch.nn import functional as F, Parameter from torch.nn.init import xavier_normal_ - from openvino.runtime import Core + import openvino as ov - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ); from notebook_utils import download_file -Settings: Including path to the serialized model files and input data files -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Settings: Including path to the serialized model files and input data files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print(f"Using {device} device") - # Path to the pretrained model checkpoint modelpath = Path('models/conve.pt') - # Path to the file containing the entities and entity IDs - entdatapath = Path('../data/text/countries_S1/kg_training_entids.txt') - - # Path to the file containing the relations and relation IDs - reldatapath = Path('../data/text/countries_S1/kg_training_relids.txt') - - # Path to the test data file - testdatapath = Path('../data/json/countries_S1/e1rel_to_e2_ranking_test.json') - # Entity and relation embedding dimensions EMB_DIM = 300 @@ -133,17 +135,52 @@ Settings: Including path to the serialized model files and input data files output_dir.mkdir(exist_ok=True) - # Paths where PyTorch, ONNX and OpenVINO IR models will be stored - fp32_onnx_path = Path(output_dir / (base_model_name + "_fp32")).with_suffix(".onnx") + # Paths where PyTorch and OpenVINO IR models will be stored + ir_path = Path(output_dir / base_model_name).with_suffix(".xml") + +.. code:: ipython3 + + data_folder = "data" + + # Download the file containing the entities and entity IDs + entdatapath = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/countries_S1/kg_training_entids.txt", + directory=data_folder + ) + + # Download the file containing the relations and relation IDs + reldatapath = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/countries_S1/kg_training_relids.txt", + directory=data_folder + ) + + # Download the test data file + testdatapath = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/json/countries_S1/e1rel_to_e2_ranking_test.json", + directory=data_folder + ) + + + +.. parsed-literal:: + + data/kg_training_entids.txt: 0%| | 0.00/3.79k [00:00 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'e1'!. This input will be filled with random values! + [ WARNING ] No input files were given for input 'rel'!. This input will be filled with random values! + [ INFO ] Fill input 'e1' with random values + [ INFO ] Fill input 'rel' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 10000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 5.39 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 96648 iterations + [ INFO ] Duration: 10000.81 ms + [ INFO ] Latency: + [ INFO ] Median: 1.03 ms + [ INFO ] Average: 1.05 ms + [ INFO ] Min: 0.63 ms + [ INFO ] Max: 8.35 ms + [ INFO ] Throughput: 9664.02 FPS + + +Conclusions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In this notebook, we convert the trained PyTorch knowledge graph embeddings model to the OpenVINO format. We confirm that there are no @@ -507,10 +613,10 @@ evaluation on the knowledge graph. Then, we determine the platform specific speedup in runtime performance that can be obtained through OpenVINO graph optimizations. To learn more about the OpenVINO performance optimizations, refer to: -https://docs.openvino.ai/2023.0/openvino_docs_deployment_optimization_guide_dldt_optimization_guide.html +https://docs.openvino.ai/2023.0/openvino_docs_optimization_guide_dldt_optimization_guide.html -References -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +References +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1. Convolutional 2D Knowledge Graph Embeddings, Tim Dettmers et al. (https://arxiv.org/abs/1707.01476) diff --git a/docs/notebooks/220-cross-lingual-books-alignment-with-output.rst b/docs/notebooks/220-cross-lingual-books-alignment-with-output.rst index 1a2d6c982ec1bc..3bafb1d522c429 100644 --- a/docs/notebooks/220-cross-lingual-books-alignment-with-output.rst +++ b/docs/notebooks/220-cross-lingual-books-alignment-with-output.rst @@ -1,8 +1,6 @@ Cross-lingual Books Alignment with Transformers and OpenVINO™ ============================================================= - - Cross-lingual text alignment is the task of matching sentences in a pair of texts that are translations of each other. In this notebook, you’ll learn how to use a deep learning model to create a parallel book in @@ -18,19 +16,19 @@ part of the pipeline - getting vectors from sentences - using the OpenVINO™ framework. Pipeline -############################################################################################################################### +-------- The notebook guides you through the entire process of creating a parallel book: from obtaining raw texts to building a visualization of aligned sentences. Here is the pipeline diagram: -|image0| +|image1| Visualizing the result allows you to identify areas for improvement in the pipeline steps, as indicated in the diagram. Prerequisites -############################################################################################################################### +------------- - ``requests`` - for getting books - ``pysbd`` - for splitting sentences @@ -39,35 +37,34 @@ Prerequisites - ``seaborn`` - for alignment matrix visualization - ``ipywidgets`` - for displaying HTML and JS output in the notebook + **Table of contents**: - `Get Books <#get-books>`__ -- `Clean Text <#clean-text>`__ -- `Split Text <#split-text>`__ -- `Get Sentence Embeddings <#get-sentence-embeddings>`__ +- `Clean Text <#clean-text>`__ +- `Split Text <#split-text>`__ +- `Get Sentence Embeddings <#get-sentence-embeddings>`__ - - `Optimize the Model with OpenVINO <#optimize-the-model-with-openvino>`__ + - `Optimize the Model with + OpenVINO <#optimize-the-model-with-openvino>`__ -- `Calculate Sentence Alignment <#calculate-sentence-alignment>`__ -- `Postprocess Sentence Alignment <#postprocess-sentence-alignment>`__ -- `Visualize Sentence Alignment <#visualize-sentence-alignment>`__ -- `Speed up Embeddings Computation <#speed-up-embeddings-computation>`__ +- `Calculate Sentence + Alignment <#calculate-sentence-alignment>`__ +- `Postprocess Sentence + Alignment <#postprocess-sentence-alignment>`__ +- `Visualize Sentence + Alignment <#visualize-sentence-alignment>`__ +- `Speed up Embeddings + Computation <#speed-up-embeddings-computation>`__ -.. |image0| image:: https://user-images.githubusercontent.com/51917466/254582697-18f3ab38-e264-4b2c-a088-8e54b855c1b2.png +.. |image1| image:: https://user-images.githubusercontent.com/51917466/254582697-18f3ab38-e264-4b2c-a088-8e54b855c1b2.png .. code:: ipython3 - !pip install -q requests pysbd transformers[torch] "openvino_dev>=2023.0" seaborn ipywidgets - - -.. parsed-literal:: - - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - - -Get Books -############################################################################################################################### + !pip install -q --extra-index-url https://download.pytorch.org/whl/cpu requests pysbd transformers[torch] "openvino>=2023.1.0" seaborn ipywidgets +Get Books +--------------------------------------------------- The first step is to get the books that we will be working with. For this notebook, we will use English and German versions of Anna Karenina @@ -114,34 +111,31 @@ Let’s check that we got the right books by showing a part of the texts: .. parsed-literal:: -  - The Project Gutenberg eBook of Anna Karenina + The Project Gutenberg eBook of Anna Karenina - This ebook is for the use of anyone anywhere in the United States and - most other parts of the world at no cost and with almost no restrictions - whatsoever. You may copy it, give it away or re-use it under the terms - of the Project Gutenberg License included with this ebook or online - at www.gutenberg.org. If you are not located in the United States, - you will have to check the laws of the country where you are located + This ebook is for the use of anyone anywhere in the United States and + most other parts of the world at no cost and with almost no restrictions + whatsoever. You may copy it, give it away or re-use it under the terms + of the Project Gutenberg License included with this ebook or online + at www.gutenberg.org. If you are not located in the United States, + you will have to check the laws of the country where you are located before using this eBook. + Title: Anna Karenina - - Title: Anna Karenina - - Author: graf Leo Tolstoy - Translator: Constance Garnett - - - Release date: July 1, 1998 [eBook #1399]Most recently updated: April 9, 2023 - Language: English - - - - - *** START OF THE PROJECT GUTENBERG EBOOK ANNA KARENINA *** - + Author: graf Leo Tolstoy + + Translator: Constance Garnett + + Release date: July 1, 1998 [eBook #1399] + Most recently updated: April 9, 2023 + + Language: English + + + + \*\*\* START OF THE PROJECT GUTENBERG EBOOK ANNA KARENINA \*\*\* [Illustration] @@ -180,7 +174,8 @@ Let’s check that we got the right books by showing a part of the texts: discovered that the husband was carrying on an intrigue with a French girl, who had been a governess in their family, and she had announced to her husband that she could not go on living in the same house with - him. This + him. This position of affairs had now lasted three days, and not only + the husband and wife themselves, but all the me which in a raw format looks like this: @@ -194,7 +189,7 @@ which in a raw format looks like this: .. parsed-literal:: - '\ufeff\r\n The Project Gutenberg eBook of Anna Karenina\r\n \r\nThis ebook is for the use of anyone anywhere in the United States and \r\nmost other parts of the world at no cost and with almost no restrictions \r\nwhatsoever. You may copy it, give it away or re-use it under the terms \r\nof the Project Gutenberg License included with this ebook or online \r\nat www.gutenberg.org. If you are not located in the United States, \r\nyou will have to check the laws of the country where you are located \r\nbefore using this eBook.\r\n\r\n\r\n\r\n \r\n Title: Anna Karenina\r\n \r\n Author: graf Leo Tolstoy\r\n Translator: Constance Garnett\r\n\r\n \r\n Release date: July 1, 1998 [eBook #1399]Most recently updated: April 9, 2023\r\n Language: English\r\n \r\n \r\n \r\n \r\n *** START OF THE PROJECT GUTENBERG EBOOK ANNA KARENINA ***\r\n \r\n[Illustration]\r\n\r\n\r\n\r\n\r\n ANNA KARENINA \r\n\r\n by Leo Tolstoy \r\n\r\n Translated by Constance Garnett \r\n\r\nContents\r\n\r\n\r\n PART ONE\r\n PART TWO\r\n PART THREE\r\n PART FOUR\r\n PART FIVE\r\n PART SIX\r\n PART SEVEN\r\n PART EIGHT\r\n\r\n\r\n\r\n\r\nPART ONE\r\n\r\nChapter 1\r\n\r\n\r\nHappy families are all alike; every unhappy family is unhappy in its\r\nown way.\r\n\r\nEverything was in confusion in the Oblonskys’ house. The wife had\r\ndiscovered that the husband was carrying on an intrigue with a French\r\ngirl, who had been a governess in their family, and she had announced\r\nto her husband that she could not go on living in the same house with\r\nhim. This ' + '\ufeffThe Project Gutenberg eBook of Anna Karenina\r\n \r\nThis ebook is for the use of anyone anywhere in the United States and\r\nmost other parts of the world at no cost and with almost no restrictions\r\nwhatsoever. You may copy it, give it away or re-use it under the terms\r\nof the Project Gutenberg License included with this ebook or online\r\nat www.gutenberg.org. If you are not located in the United States,\r\nyou will have to check the laws of the country where you are located\r\nbefore using this eBook.\r\n\r\nTitle: Anna Karenina\r\n\r\n\r\nAuthor: graf Leo Tolstoy\r\n\r\nTranslator: Constance Garnett\r\n\r\nRelease date: July 1, 1998 [eBook #1399]\r\n Most recently updated: April 9, 2023\r\n\r\nLanguage: English\r\n\r\n\r\n\r\n\* START OF THE PROJECT GUTENBERG EBOOK ANNA KARENINA \*\r\n[Illustration]\r\n\r\n\r\n\r\n\r\n ANNA KARENINA \r\n\r\n by Leo Tolstoy \r\n\r\n Translated by Constance Garnett \r\n\r\nContents\r\n\r\n\r\n PART ONE\r\n PART TWO\r\n PART THREE\r\n PART FOUR\r\n PART FIVE\r\n PART SIX\r\n PART SEVEN\r\n PART EIGHT\r\n\r\n\r\n\r\n\r\nPART ONE\r\n\r\nChapter 1\r\n\r\n\r\nHappy families are all alike; every unhappy family is unhappy in its\r\nown way.\r\n\r\nEverything was in confusion in the Oblonskys’ house. The wife had\r\ndiscovered that the husband was carrying on an intrigue with a French\r\ngirl, who had been a governess in their family, and she had announced\r\nto her husband that she could not go on living in the same house with\r\nhim. This position of affairs had now lasted three days, and not only\r\nthe husband and wife themselves, but all the me' @@ -205,25 +200,22 @@ which in a raw format looks like this: -.. code:: - - '\ufeffThe Project Gutenberg EBook of Anna Karenina, 1. Band, by Leo N. Tolstoi\r\n\r\nThis eBook is for the use of anyone anywhere at no cost and with\r\nalmost no restrictions whatsoever. You may copy it, give it away or\r\nre-use it under the terms of the Project Gutenberg License included\r\nwith this eBook or online at www.gutenberg.org\r\n\r\n\r\nTitle: Anna Karenina, 1. Band\r\n\r\nAuthor: Leo N. Tolstoi\r\n\r\nRelease Date: February 18, 2014 [EBook #44956]\r\n\r\nLanguage: German\r\n\r\n\r\n*** START OF THIS PROJECT GUTENBERG EBOOK ANNA KARENINA, 1. BAND ***\r\n\r\n\r\n\r\n\r\nProduced by Norbert H. Langkau, Jens Nordmann and the\r\nOnline Distributed Proofreading Team at http://www.pgdp.net\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n Anna Karenina.\r\n\r\n\r\n Roman aus dem Russischen\r\n\r\n des\r\n\r\n Grafen Leo N. Tolstoi.\r\n\r\n\r\n\r\n Nach der siebenten Auflage übersetzt\r\n\r\n von\r\n\r\n Hans Moser.\r\n\r\n\r\n Erster Band.\r\n\r\n\r\n\r\n Leipzig\r\n\r\n Druck und Verlag von Philipp Reclam jun.\r\n\r\n * * * * *\r\n\r\n\r\n\r\n\r\n Erster Teil.\r\n\r\n »Die Rache ist mein, ich will vergelten.«\r\n\r\n 1.\r\n\r\n\r\nAlle glücklichen Familien sind einander ähnlich; jede unglückliche\r\nFamilie ist auf _ihre_ Weise ung' +.. parsed-literal:: + '\ufeffThe Project Gutenberg eBook of Anna Karenina, 1. Band\r\n \r\nThis ebook is for the use of anyone anywhere in the United States and\r\nmost other parts of the world at no cost and with almost no restrictions\r\nwhatsoever. You may copy it, give it away or re-use it under the terms\r\nof the Project Gutenberg License included with this ebook or online\r\nat www.gutenberg.org. If you are not located in the United States,\r\nyou will have to check the laws of the country where you are located\r\nbefore using this eBook.\r\n\r\nTitle: Anna Karenina, 1. Band\r\n\r\n\r\nCreator: graf Leo Tolstoy\r\n\r\nRelease date: February 18, 2014 [eBook #44956]\r\n\r\nLanguage: German\r\n\r\n\r\n\r\n\*\*\* START OF THE PROJECT GUTENBERG EBOOK ANNA KARENINA, 1. BAND \*\r\n\r\n\r\n\r\nProduced by Norbert H. Langkau, Jens Nordmann and the\r\nOnline Distributed Proofreading Team at http://www.pgdp.net\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n Anna Karenina.\r\n\r\n\r\n Roman aus dem Russischen\r\n\r\n des\r\n\r\n Grafen Leo N. Tolstoi.\r\n\r\n\r\n\r\n Nach der siebenten Auflage übersetzt\r\n\r\n von\r\n\r\n Hans Moser.\r\n\r\n\r\n Erster Band.\r\n\r\n\r\n\r\n Leipzig\r\n\r\n Druck und Verlag von Philipp Reclam jun.\r\n\r\n \* \* \* \* \*\r\n\r\n\r\n\r\n\r\n Erster Teil.\r\n\r\n »Die' -Clean Text -############################################################################################################################### +Clean Text +---------------------------------------------------- The downloaded books may contain service information before and after the main text. The text might have different formatting styles and markup, for example, phrases from a different language enclosed in underscores for potential emphasis or italicization: -.. - Yes, Alabin was giving a dinner on glass tables, and the tables sang, - *Il mio tesoro— not Il mio tesoro* though, but something better, + \ *Il mio tesoro*—not *Il mio tesoro*\ though, but something better, and there were some sort of little decanters on the table, and they were women, too,” he remembered. @@ -234,14 +226,12 @@ German version is enclosed in ``* * * * *``, so it is safe to remove everything before the first occurrence and after the last occurrence of these asterisks. -.. hint:: - - There are text-cleaning libraries that clean up common + Hint: There are text-cleaning libraries that clean up common flaws. If the source of the text is known, you can look for a library designed for that source, for example - `gutenberg_cleaner `__. + ```gutenberg_cleaner`` `__. These libraries can reduce manual work and even automate the - process + process.process. .. code:: ipython3 @@ -253,7 +243,7 @@ the last occurrence of these asterisks. start_pattern_en = r"\nPART ONE" anna_karenina_en = re.split(start_pattern_en, anna_karenina_en)[1].strip() - end_pattern_en = "*** END OF THE PROJECT GUTENBERG EBOOK ANNA KARENINA ***" + end_pattern_en = "* END OF THE PROJECT GUTENBERG EBOOK ANNA KARENINA *" anna_karenina_en = anna_karenina_en.split(end_pattern_en)[0].strip() .. code:: ipython3 @@ -342,9 +332,8 @@ needed. 0%| | 0/3 [00:00`__, as the rules for splitting text into sentences may vary for different languages. -.. hint:: - - The ``book_metadata`` obtained from the Gutendex contains + Hint: The ``book_metadata`` obtained from the Gutendex contains the language code as well, enabling automation of this part of the pipeline. - .. code:: ipython3 import pysbd @@ -386,9 +372,8 @@ languages. -Get Sentence Embeddings -############################################################################################################################### - +Get Sentence Embeddings +----------------------------------------------------------------- The next step is to transform sentences into vector representations. Transformer encoder models, like BERT, provide high-quality embeddings @@ -408,12 +393,12 @@ languages. It has the same architecture as the BERT model but has been trained on a different task: to produce identical embeddings for translation pairs. -|image01| +|image0| This makes LaBSE a great choice for our task and it can be reused for different language pairs still producing good results. -.. |image01| image:: https://user-images.githubusercontent.com/51917466/254582913-51531880-373b-40cb-bbf6-1965859df2eb.png +.. |image0| image:: https://user-images.githubusercontent.com/51917466/254582913-51531880-373b-40cb-bbf6-1965859df2eb.png%22 .. code:: ipython3 @@ -422,12 +407,22 @@ different language pairs still producing good results. import numpy as np import torch from openvino.runtime import CompiledModel as OVModel + import openvino as ov model_id = "rasa/LaBSE" pt_model = AutoModel.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) + +.. parsed-literal:: + + 2023-09-15 18:53:46.819925: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-15 18:53:46.859715: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-09-15 18:53:47.576875: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + The model has two outputs: ``last_hidden_state`` and ``pooler_output``. For generating embeddings, you can use either the first vector from the ``last_hidden_state``, which corresponds to the special ``[CLS]`` token, @@ -452,7 +447,7 @@ best fit. return np.vstack(embeddings) else: embeddings = [ - embedding_model(**tokenizer(sent, return_tensors="pt"))[ + embedding_model(tokenizer(sent, return_tensors="pt"))[ "last_hidden_state" ][0][0] for sent in tqdm(sentences, disable=disable_tqdm) @@ -476,13 +471,12 @@ best fit. 0%| | 0/34 [00:00`__ +API `__ accepts the PyTorch/Transformers model object and additional information about model inputs. An ``example_input`` is needed to trace the model execution graph, as PyTorch constructs it dynamically during inference. @@ -491,19 +485,15 @@ The converted model must be compiled for the target device using the .. code:: ipython3 - from openvino.runtime import Core, Type - from openvino.tools.mo import convert_model - - # 3 inputs with dynamic axis [batch_size, sequence_length] and type int64 - inputs_info = [([-1, -1], Type.i64)] * 3 - ov_model = convert_model( + inputs_info = [([-1, -1], ov.Type.i64)] * 3 + ov_model = ov.convert_model( pt_model, example_input=tokenizer("test", return_tensors="pt").data, input=inputs_info, ) - core = Core() + core = ov.Core() compiled_model = core.compile_model(ov_model, "CPU") embeddings_en = get_embeddings(sentences_en, compiled_model) @@ -512,16 +502,36 @@ The converted model must be compiled for the target device using the .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/annotations.py:309: UserWarning: TorchScript will treat type annotations of Tensor dtype-specific subtypes as if they are normal Tensors. dtype constraints are not enforced in compilation either. - warnings.warn("TorchScript will treat type annotations of Tensor " + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. .. parsed-literal:: + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + WARNING:nncf:NNCF provides best results with torch==2.0.1, while current torch version is 1.13.1+cu117. If you encounter issues, consider switching to torch==2.0.1 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/torch/jit/annotations.py:309: UserWarning: TorchScript will treat type annotations of Tensor dtype-specific subtypes as if they are normal Tensors. dtype constraints are not enforced in compilation either. + warnings.warn("TorchScript will treat type annotations of Tensor " @@ -555,9 +565,8 @@ model predictions remain within an acceptable tolerance: -Calculate Sentence Alignment -############################################################################################################################### - +Calculate Sentence Alignment +---------------------------------------------------------------------- With the embedding matrices from the previous step, we can calculate the alignment: 1. Calculate sentence similarity between each pair of @@ -681,9 +690,8 @@ will be lists of German sentence numbers. -Postprocess Sentence Alignment -############################################################################################################################### - +Postprocess Sentence Alignment +------------------------------------------------------------------------ There are several gaps in the resulting alignment, such as English sentence #14 not mapping to any German sentence. Here are some possible @@ -707,9 +715,8 @@ Most likely, English sentence 14 is part of either German sentence 17 or 18. By comparing the similarity using the model, you can choose the most suitable alignment. -Visualize Sentence Alignment -############################################################################################################################### - +Visualize Sentence Alignment +---------------------------------------------------------------------- To evaluate the final alignment and choose the best way to improve the results of the pipeline, we will create an interactive table with HTML @@ -867,16 +874,15 @@ To read the model from disk, use the ``read_model`` method of the ov_model = core.read_model(ov_model_path) -Speed up Embeddings Computation -############################################################################################################################### - +Speed up Embeddings Computation +------------------------------------------------------------------------- Let’s see how we can speed up the most computationally complex part of the pipeline - getting embeddings. You might wonder why, when using OpenVINO, you need to compile the model after reading it. There are two main reasons for this: 1. Compatibility with different devices. The model can be compiled to run on a `specific -device `__, +device `__, like CPU, GPU or GNA. Each device may work with different data types, support different features, and gain performance by changing the neural network for a specific computing model. With OpenVINO, you do not need @@ -885,19 +891,18 @@ hardware. A universal OpenVINO model representation is enough. 1. Optimization for different scenarios. For example, one scenario prioritizes minimizing the *time between starting and finishing model inference* (`latency-oriented -optimization `__). +optimization `__). In our case, it is more important *how many texts per second the model can process* (`throughput-oriented -optimization `__). +optimization `__). To get a throughput-optimized model, pass a `performance -hint `__ +hint `__ as a configuration during compilation. Then OpenVINO selects the optimal parameters for execution on the available hardware. .. code:: ipython3 - from openvino.runtime import Core, AsyncInferQueue, InferRequest from typing import Any @@ -910,7 +915,7 @@ parameters for execution on the available hardware. To further optimize hardware utilization, let’s change the inference mode from synchronous (Sync) to asynchronous (Async). While the synchronous API may be easier to start with, it is -`recommended `__ +`recommended `__ to use the asynchronous (callbacks-based) API in production code. It is the most general and scalable way to implement flow control for any number of requests. @@ -929,13 +934,13 @@ advance and fill it in as the inference requests are executed. .. code:: ipython3 def get_embeddings_async(sentences: List[str], embedding_model: OVModel) -> np.ndarray: - def callback(infer_request: InferRequest, user_data: List[Any]) -> None: + def callback(infer_request: ov.InferRequest, user_data: List[Any]) -> None: embeddings, idx, pbar = user_data embedding = infer_request.get_output_tensor(0).data[0, 0] embeddings[idx] = embedding pbar.update() - infer_queue = AsyncInferQueue(embedding_model) + infer_queue = ov.AsyncInferQueue(embedding_model) infer_queue.set_callback(callback) embedding_dim = ( @@ -955,11 +960,8 @@ advance and fill it in as the inference requests are executed. Let’s compare the models and plot the results. -.. note:: - - To get a more accurate benchmark, use the `Benchmark Python - Tool `__ - + Note: To get a more accurate benchmark, use the `Benchmark Python + Tool `__ .. code:: ipython3 @@ -1074,8 +1076,8 @@ boost. Here are useful links with information about the techniques used in this notebook: - `OpenVINO performance -hints `__ +hints `__ - `OpenVINO Async -API `__ +API `__ - `Throughput -Optimizations `__ +Optimizations `__ diff --git a/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_31_0.png b/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_31_0.png index 153bcc7bee1cd5..b2ceabfb965558 100644 --- a/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_31_0.png +++ b/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_31_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df7720db03699025cc9a4f4a6c63b5a062961e1051ba2f8183328233097dce7e +oid sha256:ce28e8a2906c5681f16ca683fd2ed51f24efdf0e16b3245ddafe16b9556e28f7 size 24464 diff --git a/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_48_0.png b/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_48_0.png index a9f800711b17cd..02bdaf9d68fca0 100644 --- a/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_48_0.png +++ b/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/220-cross-lingual-books-alignment-with-output_48_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a4a7fe4908f5932347d616812fa74b28587eb644058a6f19a03dbab178a31b4 -size 31382 +oid sha256:f4597f3d5412fcd4194f85bb9cd30734052a8de052a11eba2b57b741f89bdbeb +size 32175 diff --git a/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/index.html b/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/index.html index fbb2fd2341b0cb..687e04a344b339 100644 --- a/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/index.html +++ b/docs/notebooks/220-cross-lingual-books-alignment-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/220-cross-lingual-books-alignment-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/220-cross-lingual-books-alignment-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/220-cross-lingual-books-alignment-with-output_files/


../
-220-cross-lingual-books-alignment-with-output_3..> 16-Aug-2023 01:31               24464
-220-cross-lingual-books-alignment-with-output_4..> 16-Aug-2023 01:31               31382
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/220-cross-lingual-books-alignment-with-output_files/


../
+220-cross-lingual-books-alignment-with-output_3..> 31-Oct-2023 00:35               24464
+220-cross-lingual-books-alignment-with-output_4..> 31-Oct-2023 00:35               32175
 

diff --git a/docs/notebooks/221-machine-translation-with-output.rst b/docs/notebooks/221-machine-translation-with-output.rst index 31a2cbc2ba70aa..a21793b053a845 100644 --- a/docs/notebooks/221-machine-translation-with-output.rst +++ b/docs/notebooks/221-machine-translation-with-output.rst @@ -18,102 +18,133 @@ The structure is the same as the one for the input. **Table of contents:** -- `Downloading model <#downloading-model>`__ -- `Load and configure the model <#load-and-configure-the-model>`__ -- `Select inference device <#select-inference-device>`__ -- `Load tokenizers <#load-tokenizers>`__ -- `Perform translation <#perform-translation>`__ -- `Translate the sentence <#translate-the-sentence>`__ - - `Test your translation <#test-your-translation>`__ +- `Downloading model <#downloading-model>`__ +- `Load and configure the + model <#load-and-configure-the-model>`__ +- `Select inference device <#select-inference-device>`__ +- `Load tokenizers <#load-tokenizers>`__ +- `Perform translation <#perform-translation>`__ +- `Translate the sentence <#translate-the-sentence>`__ + + - `Test your translation <#test-your-translation>`__ .. code:: ipython3 - # Install requirements - !pip install -q "openvino-dev>=2023.0.0" - !pip install -q tokenizers + # # Install requirements + %pip install -q "openvino>=2023.1.0" + %pip install -q tokenizers .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 import time - from openvino.runtime import Core + import sys + import openvino as ov import numpy as np import itertools + from pathlib import Path from tokenizers import SentencePieceBPETokenizer + + sys.path.append("../utils") + from notebook_utils import download_file -Downloading model -############################################################################################################################### +Downloading model +----------------------------------------------------------- The following command will download the model to the current directory. Make sure you have run ``pip install openvino-dev`` beforehand. .. code:: ipython3 - ! omz_downloader --name machine-translation-nar-en-de-0002 + base_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1" + model_name = "machine-translation-nar-en-de-0002" + precision = "FP32" + model_base_dir = Path("model") + model_base_dir.mkdir(exist_ok=True) + model_path = model_base_dir / f"{model_name}.xml" + src_tok_dir = model_base_dir / "tokenizer_src" + target_tok_dir = model_base_dir / "tokenizer_tgt" + src_tok_dir.mkdir(exist_ok=True) + target_tok_dir.mkdir(exist_ok=True) + + download_file(base_url + f'/{model_name}/{precision}/{model_name}.xml', f"{model_name}.xml", model_base_dir) + download_file(base_url + f'/{model_name}/{precision}/{model_name}.bin', f"{model_name}.bin", model_base_dir) + download_file(f"{base_url}/{model_name}/tokenizer_src/merges.txt", "merges.txt", src_tok_dir) + download_file(f"{base_url}/{model_name}/tokenizer_tgt/merges.txt", "merges.txt", target_tok_dir) + download_file(f"{base_url}/{model_name}/tokenizer_src/vocab.json", "vocab.json", src_tok_dir) + download_file(f"{base_url}/{model_name}/tokenizer_tgt/vocab.json", "vocab.json", target_tok_dir); + .. parsed-literal:: - ################|| Downloading machine-translation-nar-en-de-0002 ||################ - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/tokenizer_tgt/merges.txt - - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/tokenizer_tgt/vocab.json - - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/tokenizer_src/merges.txt - - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/tokenizer_src/vocab.json - - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/FP32/machine-translation-nar-en-de-0002.xml - - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/FP32/machine-translation-nar-en-de-0002.bin - - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/FP16/machine-translation-nar-en-de-0002.xml - - - ========== Downloading /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/221-machine-translation/intel/machine-translation-nar-en-de-0002/FP16/machine-translation-nar-en-de-0002.bin - - + model/machine-translation-nar-en-de-0002.xml: 0%| | 0.00/825k [00:00`__ -repository for more details. +repository for more details. **Table of contents:** +--- + +- `Imports <#imports>`__ +- `Configurations <#configurations>`__ +- `Select inference device <#select-inference-device>`__ +- `Download the model <#download-the-model>`__ +- `Convert the model to OpenVINO IR <#convert-the-model-to-openvino-ir>`__ +- `Loading the Model <#loading-the-model>`__ +- `Utility Functions <#utility-functions>`__ +- `Load the Image <#load-the-image>`__ +- `Display Colorized Image <#display-colorized-image>`__ -- `Imports <#imports>`__ -- `Configurations <#configurations>`__ +.. code:: ipython3 - - `Select inference device <#select-inference-device>`__ + %pip install "openvino-dev>=2023.1.0" -- `Download the model <#download-the-model>`__ -- `Convert the model to OpenVINO IR <#convert-the-model-to-openvino-ir>`__ -- `Loading the Model <#loading-the-model>`__ -- `Utility Functions <#utility-functions>`__ -- `Load the Image <#load-the-image>`__ -- `Display Colorized Image <#display-colorized-image>`__ -Imports -############################################################################################################################### +.. parsed-literal:: + + Requirement already satisfied: openvino-dev>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2023.1.0) + Requirement already satisfied: addict>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (2.4.0) + Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (0.7.1) + Requirement already satisfied: jstyleson>=0.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (0.0.2) + Requirement already satisfied: networkx<=3.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (2.8.2) + Requirement already satisfied: numpy>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (1.24.3) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (4.8.1.78) + Requirement already satisfied: openvino-telemetry>=2022.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (2023.2.1) + Requirement already satisfied: pillow>=8.1.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (10.0.1) + Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (6.0.1) + Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (2.31.0) + Requirement already satisfied: texttable>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (1.7.0) + Requirement already satisfied: tqdm>=4.54.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (4.66.1) + Requirement already satisfied: openvino==2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (2023.1.0) + Requirement already satisfied: scipy<1.11,>=1.8 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2023.1.0) (1.10.1) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2023.1.0) (3.3.1) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2023.1.0) (3.4) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2023.1.0) (2.0.7) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2023.1.0) (2023.7.22) + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 @@ -68,13 +97,13 @@ Imports import cv2 import matplotlib.pyplot as plt import numpy as np - from openvino.runtime import Core + import openvino as ov sys.path.append("../utils") import notebook_utils as utils -Configurations -############################################################################################################################### +Configurations +-------------------------------------------------------- - ``PRECISION`` - {FP16, FP32}, default: FP16. - ``MODEL_DIR`` - directory where the model is to be stored, default: @@ -92,16 +121,16 @@ Configurations MODEL_PATH = f"{MODEL_DIR}/public/{MODEL_NAME}/{PRECISION}/{MODEL_NAME}.xml" DATA_DIR = "data" -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -121,8 +150,8 @@ Select device from dropdown list for running inference using OpenVINO -Download the model -############################################################################################################################### +Download the model +------------------------------------------------------------ ``omz_downloader`` downloads model files from online sources and, if necessary, patches them to make them more usable with Model Converter. @@ -170,8 +199,8 @@ above. -Convert the model to OpenVINO IR -############################################################################################################################### +Convert the model to OpenVINO IR +-------------------------------------------------------------------------- ``omz_converter`` converts the models that are not in the OpenVINO™ IR format into that format using model conversion API. @@ -196,41 +225,40 @@ respectively .. parsed-literal:: ========== Converting colorization-v2 to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-path=models/public/colorization-v2 --model-name=ECCVGenerator --weights=models/public/colorization-v2/ckpt/colorization-v2-eccv16.pth --import-module=model --input-shape=1,1,256,256 --output-file=models/public/colorization-v2/colorization-v2-eccv16.onnx --input-names=data_l --output-names=color_ab + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-path=models/public/colorization-v2 --model-name=ECCVGenerator --weights=models/public/colorization-v2/ckpt/colorization-v2-eccv16.pth --import-module=model --input-shape=1,1,256,256 --output-file=models/public/colorization-v2/colorization-v2-eccv16.onnx --input-names=data_l --output-names=color_ab ONNX check passed successfully. ========== Converting colorization-v2 to IR (FP16) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=models/public/colorization-v2/FP16 --model_name=colorization-v2 --input=data_l --output=color_ab --input_model=models/public/colorization-v2/colorization-v2-eccv16.onnx '--layout=data_l(NCHW)' '--input_shape=[1, 1, 256, 256]' --compress_to_fp16=True + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=models/public/colorization-v2/FP16 --model_name=colorization-v2 --input=data_l --output=color_ab --input_model=models/public/colorization-v2/colorization-v2-eccv16.onnx '--layout=data_l(NCHW)' '--input_shape=[1, 1, 256, 256]' --compress_to_fp16=True [ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False. Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/222-vision-image-colorization/models/public/colorization-v2/FP16/colorization-v2.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/notebooks/222-vision-image-colorization/models/public/colorization-v2/FP16/colorization-v2.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/222-vision-image-colorization/models/public/colorization-v2/FP16/colorization-v2.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/222-vision-image-colorization/models/public/colorization-v2/FP16/colorization-v2.bin -Loading the Model -############################################################################################################################### +Loading the Model +----------------------------------------------------------- Load the model in OpenVINO Runtime with ``ie.read_model`` and compile it for the specified device with ``ie.compile_model``. .. code:: ipython3 - core = Core() + core = ov.Core() model = core.read_model(model=MODEL_PATH) compiled_model = core.compile_model(model=model, device_name=device.value) input_layer = compiled_model.input(0) output_layer = compiled_model.output(0) N, C, H, W = list(input_layer.shape) - -Utility Functions -############################################################################################################################### +Utility Functions +----------------------------------------------------------- .. code:: ipython3 @@ -306,8 +334,8 @@ Utility Functions plt.show() -Load the Image -############################################################################################################################### +Load the Image +-------------------------------------------------------- .. code:: ipython3 @@ -372,9 +400,8 @@ Load the Image color_img_0 = colorize(test_img_0) color_img_1 = colorize(test_img_1) - -Display Colorized Image -############################################################################################################################### +Display Colorized Image +----------------------------------------------------------------- .. code:: ipython3 @@ -382,7 +409,7 @@ Display Colorized Image -.. image:: 222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_20_0.png +.. image:: 222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_21_0.png .. code:: ipython3 @@ -391,5 +418,5 @@ Display Colorized Image -.. image:: 222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_21_0.png +.. image:: 222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_22_0.png diff --git a/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_20_0.png b/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_20_0.png deleted file mode 100644 index dc2131ada32e7e..00000000000000 --- a/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_20_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:baf459a5b32787c74fb023dbf7e9e171c904903f01bc08f3c4f26aa852ea83a8 -size 415792 diff --git a/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_21_0.png b/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_21_0.png index 446d9025c8cdc4..de27fb581546e2 100644 --- a/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_21_0.png +++ b/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_21_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e454a31f9db9aeea8b4ee9c6281961ff88beaf3d41c2fbfd9280d2da52d5c89 -size 284966 +oid sha256:8f5ed6ce204a5cf8fe43b1e1b8ed4db381192224483065096a6f443cc495456d +size 415784 diff --git a/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_22_0.png b/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_22_0.png new file mode 100644 index 00000000000000..8678fb185fb8ab --- /dev/null +++ b/docs/notebooks/222-vision-image-colorization-with-output_files/222-vision-image-colorization-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0136cde50e53bc35d6ee813518d5b93275c0c521076383ec94adf538da24b035 +size 284968 diff --git a/docs/notebooks/222-vision-image-colorization-with-output_files/index.html b/docs/notebooks/222-vision-image-colorization-with-output_files/index.html index f7bb42accee900..e42b88d06cf194 100644 --- a/docs/notebooks/222-vision-image-colorization-with-output_files/index.html +++ b/docs/notebooks/222-vision-image-colorization-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/222-vision-image-colorization-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/222-vision-image-colorization-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/222-vision-image-colorization-with-output_files/


../
-222-vision-image-colorization-with-output_20_0.png 16-Aug-2023 01:31              415792
-222-vision-image-colorization-with-output_21_0.png 16-Aug-2023 01:31              284966
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/222-vision-image-colorization-with-output_files/


../
+222-vision-image-colorization-with-output_21_0.png 31-Oct-2023 00:35              415784
+222-vision-image-colorization-with-output_22_0.png 31-Oct-2023 00:35              284968
 

diff --git a/docs/notebooks/223-text-prediction-with-output.rst b/docs/notebooks/223-text-prediction-with-output.rst deleted file mode 100644 index 835db52c85ec69..00000000000000 --- a/docs/notebooks/223-text-prediction-with-output.rst +++ /dev/null @@ -1,679 +0,0 @@ -Text Prediction with OpenVINO™ -============================== - -This notebook shows text prediction with OpenVINO. This notebook can -work in two different modes, Text Generation and Conversation, which the -user can select via selecting the model in the Model Selection Section. -We use three models -`GPT-2 `__, -`GPT-Neo `__, and -`PersonaGPT `__, which are a part of -the Generative Pre-trained Transformer (GPT) family. GPT-2 and GPT-Neo -can be used for text generation, whereas PersonaGPT is trained for the -downstream task of conversation. - -GPT-2 and GPT-Neo are pre-trained on a large corpus of English text -using unsupervised training. They both display a broad set of -capabilities, including the ability to generate conditional synthetic -text samples of unprecedented quality, where we prime the model with an -input and have it generate a lengthy continuation. - -More details about the models are provided on their HuggingFace cards: - -- `GPT-2 `__ -- `GPT-Neo `__ - -PersonaGPT is an open-domain conversational agent that can decode -*personalized* and *controlled* responses based on user input. It is -built on the pretrained -`DialoGPT-medium `__ model, -following the `GPT-2 `__ architecture. -PersonaGPT is fine-tuned on the -`Persona-Chat `__ dataset. The model -is available from -`HuggingFace `__. PersonaGPT -displays a broad set of capabilities, including the ability to take on -personas, where we prime the model with few facts and have it generate -based upon that, it can also be used for creating a chatbot on a -knowledge base. - -The following image illustrates the complete demo pipeline used for text -generation: - -.. figure:: https://user-images.githubusercontent.com/91228207/163990722-d2713ede-921e-4594-8b00-8b5c1a4d73b5.jpeg - :alt: image2 - - image2 - -This is a demonstration in which the user can type the beginning of the -text and the network will generate a further. This procedure can be -repeated as many times as the user desires. - -For Text Generation, The model input is tokenized text, which serves as -the initial condition for text generation. Then, logits from the models’ -inference results are obtained, and the token with the highest -probability is selected using the top-k sampling strategy and joined to -the input sequence. This procedure repeats until the end of the sequence -token is received or the specified maximum length is reached. After -that, tokenized IDs are decoded to text. - -The following image illustrates the demo pipeline for conversation: - -.. figure:: https://user-images.githubusercontent.com/95569637/226101538-e204aebd-a34f-4c8b-b90c-5363ba41c080.jpeg - :alt: image2 - - image2 - -For Conversation, User Input is tokenized with ``eos_token`` -concatenated in the end. Then, the text gets generated as detailed -above. The Generated response is added to the history with the -``eos_token`` at the end. Additional user input is added to the history, -and the sequence is passed back into the model. - -**Table of contents:** - -- `Model Selection <#model-selection>`__ -- `Load Model <#load-model>`__ -- `Convert Pytorch Model to OpenVINO IR <#convert-pytorch-model-to-openvino-ir>`__ - - - `Load the model <#load-the-model>`__ - - - `Select inference device <#select-inference-device>`__ - -- `Pre-Processing <#pre-processing>`__ -- `Define tokenization <#define-tokenization>`__ - - - `Define Softmax layer <#define-softmax-layer>`__ - - `Set the minimum sequence length <#set-the-minimum-sequence-length>`__ - - `Top-K sampling <#top-k-sampling>`__ - - `Main Processing Function <#main-processing-function>`__ - -- `Inference with GPT-Neo/GPT-2 <#inference-with-gpt-neo-gpt-2>`__ -- `Conversation with PersonaGPT using OpenVINO™ <#conversation-with-personagpt-using-openvino>`__ -- `Converse Function <#converse-function>`__ -- `Conversation Class <#conversation-class>`__ -- `Conversation with PersonaGPT <#conversation-with-personagpt>`__ - -Model Selection -############################################################################################################################### - -Select the Model to be used for text generation, GPT-2 and GPT-Neo are -used for text generation whereas PersonaGPT is used for Conversation. - -.. code:: ipython3 - - # Install Gradio for Interactive Inference and other requirements - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q gradio - !pip install -q transformers[torch] onnx - - -.. parsed-literal:: - - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 4.24.3 which is incompatible. - pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 4.24.3 which is incompatible. - tf2onnx 1.15.1 requires protobuf~=3.20.2, but you have protobuf 4.24.3 which is incompatible. - - -.. code:: ipython3 - - from gradio import Blocks, Chatbot, Textbox, Row, Column - import ipywidgets as widgets - - style = {'description_width': 'initial'} - model_name = widgets.Select( - options=['PersonaGPT (Converastional)', 'GPT-2', 'GPT-Neo'], - value='PersonaGPT (Converastional)', - description='Select Model:', - disabled=False - ) - - widgets.VBox([model_name]) - - - - -.. parsed-literal:: - - VBox(children=(Select(description='Select Model:', options=('PersonaGPT (Converastional)', 'GPT-2', 'GPT-Neo')… - - - -Load Model -############################################################################################################################### - -Download the Selected Model and Tokenizer from HuggingFace - -.. code:: ipython3 - - from transformers import GPTNeoForCausalLM, GPT2TokenizerFast, GPT2Tokenizer, GPT2LMHeadModel - - if model_name.value == "PersonaGPT (Converastional)": - pt_model = GPT2LMHeadModel.from_pretrained('af1tang/personaGPT') - tokenizer = GPT2Tokenizer.from_pretrained('af1tang/personaGPT') - elif model_name.value == 'GPT-2': - pt_model = GPT2LMHeadModel.from_pretrained('gpt2') - tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - elif model_name.value == 'GPT-Neo': - pt_model = GPTNeoForCausalLM.from_pretrained('EleutherAI/gpt-neo-125M') - tokenizer = GPT2TokenizerFast.from_pretrained('EleutherAI/gpt-neo-125M') - - -.. parsed-literal:: - - 2023-09-08 23:43:01.055206: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-09-08 23:43:01.090531: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-09-08 23:43:01.738604: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Convert Pytorch Model to OpenVINO IR -############################################################################################################################### - -.. figure:: https://user-images.githubusercontent.com/29454499/211261803-784d4791-15cb-4aea-8795-0969dfbb8291.png - :alt: conversion_pipeline - - conversion_pipeline - -For starting work with GPT-Neo model using OpenVINO, a model should be -converted to OpenVINO Intermediate Representation (IR) format. -HuggingFace provides a GPT-Neo model in PyTorch format, which is -supported in OpenVINO via conversion to ONNX. We use the HuggingFace -transformers library’s onnx module to export the model to ONNX. -``transformers.onnx.export`` accepts the preprocessing function for -input sample generation (the tokenizer in our case), an instance of the -model, ONNX export configuration, the ONNX opset version for export and -output path. More information about transformers export to ONNX can be -found in HuggingFace -`documentation `__. - -While ONNX models are directly supported by OpenVINO runtime, it can be -useful to convert them to IR format to take advantage of OpenVINO -optimization tools and features. The ``ov.convert_model`` Python -function of `model conversion -API `__ -can be used for converting the model. The function returns instance of -OpenVINO Model class, which is ready to use in Python interface. The -Model can also be save on device in OpenVINO IR format for future -execution using ``ov.save_model``. In our case dynamic input shapes with -a possible shape range (from 1 token to a maximum length defined in our -processing function) are specified for optimization of memory -consumption. - -.. code:: ipython3 - - from pathlib import Path - from transformers.onnx import export, FeaturesManager - - import openvino as ov - - # define path for saving onnx model - onnx_path = Path("model/text_generator.onnx") - onnx_path.parent.mkdir(exist_ok=True) - - # define path for saving openvino model - model_path = onnx_path.with_suffix(".xml") - - # get model onnx config function for output feature format casual-lm - model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(pt_model, feature='causal-lm') - - # fill onnx config based on pytorch model config - onnx_config = model_onnx_config(pt_model.config) - - # convert model to onnx - onnx_inputs, onnx_outputs = export(preprocessor=tokenizer,model=pt_model,config=onnx_config,opset=onnx_config.default_onnx_opset,output=onnx_path) - - # convert model to openvino - if model_name.value == "PersonaGPT (Converastional)": - ov_model = ov.convert_model(onnx_path, input=[('input_ids', [1, -1], ov.Type.i64), ('attention_mask', [1,-1], ov.Type.i64)]) - else: - ov_model = ov.convert_model(onnx_path, input=[('input_ids', [1, ov.Dimension(1,128)], ov.Type.i64), ('attention_mask', [1, ov.Dimension(1,128)], ov.Type.i64)]) - - # serialize openvino model - ov.save_model(ov_model, str(model_path)) - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/gpt2/modeling_gpt2.py:807: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if batch_size <= 0: - - -Load the model -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -We start by building an OpenVINO Core object. Then we read the network -architecture and model weights from the ``.xml`` and ``.bin`` files, -respectively. Finally, we compile the model for the desired device. - -Select inference device -------------------------------------------------------------------------------------------------------------------------------- - -Select device from dropdown list for running inference using OpenVINO: - -.. code:: ipython3 - - import ipywidgets as widgets - - # initialize openvino core - core = ov.Core() - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value='AUTO', - description='Device:', - disabled=False, - ) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - # read the model and corresponding weights from file - model = core.read_model(model_path) - -.. code:: ipython3 - - # compile the model for CPU devices - compiled_model = core.compile_model(model=model, device_name=device.value) - - # get output tensors - output_key = compiled_model.output(0) - -Input keys are the names of the input nodes and output keys contain -names of the output nodes of the network. In the case of GPT-Neo, we -have ``batch size`` and ``sequence length`` as inputs and -``batch size``, ``sequence length`` and ``vocab size`` as outputs. - -Pre-Processing -############################################################################################################################### - -NLP models often take a list of tokens as a standard input. A token is a -word or a part of a word mapped to an integer. To provide the proper -input, we use a vocabulary file to handle the mapping. So first let’s -load the vocabulary file. - -Define tokenization -############################################################################################################################### - -.. code:: ipython3 - - from typing import List, Tuple - - - # this function converts text to tokens - def tokenize(text: str) -> Tuple[List[int], List[int]]: - """ - tokenize input text using GPT2 tokenizer - - Parameters: - text, str - input text - Returns: - input_ids - np.array with input token ids - attention_mask - np.array with 0 in place, where should be padding and 1 for places where original tokens are located, represents attention mask for model - """ - - inputs = tokenizer(text, return_tensors="np") - return inputs["input_ids"], inputs["attention_mask"] - -``eos_token`` is special token, which means that generation is finished. -We store the index of this token in order to use this index as padding -at later stage. - -.. code:: ipython3 - - eos_token_id = tokenizer.eos_token_id - eos_token = tokenizer.decode(eos_token_id) - -Define Softmax layer -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -A softmax function is used to convert top-k logits into a probability -distribution. - -.. code:: ipython3 - - import numpy as np - - - def softmax(x : np.array) -> np.array: - e_x = np.exp(x - np.max(x, axis=-1, keepdims=True)) - summation = e_x.sum(axis=-1, keepdims=True) - return e_x / summation - -Set the minimum sequence length -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -If the minimum sequence length is not reached, the following code will -reduce the probability of the ``eos`` token occurring. This continues -the process of generating the next words. - -.. code:: ipython3 - - def process_logits(cur_length: int, scores: np.array, eos_token_id : int, min_length : int = 0) -> np.array: - """ - Reduce probability for padded indices. - - Parameters: - cur_length: Current length of input sequence. - scores: Model output logits. - eos_token_id: Index of end of string token in model vocab. - min_length: Minimum length for applying postprocessing. - - Returns: - Processed logits with reduced probability for padded indices. - """ - if cur_length < min_length: - scores[:, eos_token_id] = -float("inf") - return scores - -Top-K sampling -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -In Top-K sampling, we filter the K most likely next words and -redistribute the probability mass among only those K next words. - -.. code:: ipython3 - - def get_top_k_logits(scores : np.array, top_k : int) -> np.array: - """ - Perform top-k sampling on the logits scores. - - Parameters: - scores: np.array, model output logits. - top_k: int, number of elements with the highest probability to select. - - Returns: - np.array, shape (batch_size, sequence_length, vocab_size), - filtered logits scores where only the top-k elements with the highest - probability are kept and the rest are replaced with -inf - """ - filter_value = -float("inf") - top_k = min(max(top_k, 1), scores.shape[-1]) - top_k_scores = -np.sort(-scores)[:, :top_k] - indices_to_remove = scores < np.min(top_k_scores) - filtred_scores = np.ma.array(scores, mask=indices_to_remove, - fill_value=filter_value).filled() - return filtred_scores - -Main Processing Function -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Generating the predicted sequence. - -.. code:: ipython3 - - def generate_sequence(input_ids : List[int], attention_mask : List[int], max_sequence_length : int = 128, - eos_token_id : int = eos_token_id, dynamic_shapes : bool = True) -> List[int]: - """ - Generates a sequence of tokens using a pre-trained language model. - - Parameters: - input_ids: np.array, tokenized input ids for model - attention_mask: np.array, attention mask for model - max_sequence_length: int, maximum sequence length for stopping iteration - eos_token_id: int, index of the end-of-sequence token in the model's vocabulary - dynamic_shapes: bool, whether to use dynamic shapes for inference or pad model input to max_sequence_length - - Returns: - np.array, the predicted sequence of token ids - """ - while True: - cur_input_len = len(input_ids[0]) - if not dynamic_shapes: - pad_len = max_sequence_length - cur_input_len - model_input_ids = np.concatenate((input_ids, [[eos_token_id] * pad_len]), axis=-1) - model_input_attention_mask = np.concatenate((attention_mask, [[0] * pad_len]), axis=-1) - else: - model_input_ids = input_ids - model_input_attention_mask = attention_mask - outputs = compiled_model({"input_ids": model_input_ids, "attention_mask": model_input_attention_mask})[output_key] - next_token_logits = outputs[:, cur_input_len - 1, :] - # pre-process distribution - next_token_scores = process_logits(cur_input_len, - next_token_logits, eos_token_id) - top_k = 20 - next_token_scores = get_top_k_logits(next_token_scores, top_k) - # get next token id - probs = softmax(next_token_scores) - next_tokens = np.random.choice(probs.shape[-1], 1, - p=probs[0], replace=True) - # break the loop if max length or end of text token is reached - if cur_input_len == max_sequence_length or next_tokens[0] == eos_token_id: - break - else: - input_ids = np.concatenate((input_ids, [next_tokens]), axis=-1) - attention_mask = np.concatenate((attention_mask, [[1] * len(next_tokens)]), axis=-1) - return input_ids - -Inference with GPT-Neo/GPT-2 -############################################################################################################################### - -The ``text`` variable below is the input used to generate a predicted -sequence. - -.. code:: ipython3 - - import time - if not model_name.value == "PersonaGPT (Converastional)": - text = "Deep learning is a type of machine learning that uses neural networks" - input_ids, attention_mask = tokenize(text) - - start = time.perf_counter() - output_ids = generate_sequence(input_ids, attention_mask) - end = time.perf_counter() - output_text = " " - # Convert IDs to words and make the sentence from it - for i in output_ids[0]: - output_text += tokenizer.batch_decode([i])[0] - print(f"Generation took {end - start:.3f} s") - print(f"Input Text: {text}") - print() - print(f"{model_name.value}: {output_text}") - else: - print("Selected Model is PersonaGPT. Please select GPT-Neo or GPT-2 in the first cell to generate text sequences") - - -.. parsed-literal:: - - Selected Model is PersonaGPT. Please select GPT-Neo or GPT-2 in the first cell to generate text sequences - - -Conversation with PersonaGPT using OpenVINO -===================================================================================== - -User Input is tokenized with ``eos_token`` concatenated in the end. -Model input is tokenized text, which serves as initial condition for -generation, then logits from model inference result should be obtained -and token with the highest probability is selected using top-k sampling -strategy and joined to input sequence. The procedure repeats until end -of sequence token will be received or specified maximum length is -reached. After that, decoding token ids to text using tokenized should -be applied. - -The Generated response is added to the history with the ``eos_token`` at -the end. Further User Input is added to it and again passed into the -model. - -Converse Function -############################################################################################################################### - -Wrapper on generate sequence function to support conversation - -.. code:: ipython3 - - def converse(input: str, history: List[int], eos_token: str = eos_token, - eos_token_id: int = eos_token_id) -> Tuple[str, List[int]]: - """ - Converse with the Model. - - Parameters: - input: Text input given by the User - history: Chat History, ids of tokens of chat occured so far - eos_token: end of sequence string - eos_token_id: end of sequence index from vocab - Returns: - response: Text Response generated by the model - history: Chat History, Ids of the tokens of chat occured so far,including the tokens of generated response - """ - - # Get Input Ids of the User Input - new_user_input_ids, _ = tokenize(input + eos_token) - - # append the new user input tokens to the chat history, if history exists - if len(history) == 0: - bot_input_ids = new_user_input_ids - else: - bot_input_ids = np.concatenate([history, new_user_input_ids[0]]) - bot_input_ids = np.expand_dims(bot_input_ids, axis=0) - - # Create Attention Mask - bot_attention_mask = np.ones_like(bot_input_ids) - - # Generate Response from the model - history = generate_sequence(bot_input_ids, bot_attention_mask, max_sequence_length=1000) - - # Add the eos_token to mark end of sequence - history = np.append(history[0], eos_token_id) - - # convert the tokens to text, and then split the responses into lines and retrieve the response from the Model - response = ''.join(tokenizer.batch_decode(history)).split(eos_token)[-2] - return response, history - -Conversation Class -############################################################################################################################### - -.. code:: ipython3 - - class Conversation: - def __init__(self): - # Initialize Empty History - self.history = [] - self.messages = [] - - def chat(self, input_text): - """ - Wrapper Over Converse Function. - Parameters: - input_text: Text input given by the User - Returns: - response: Text Response generated by the model - """ - response, self.history = converse(input_text, self.history) - self.messages.append(f"Person: {input_text}") - self.messages.append(f"PersonaGPT: {response}") - return response - -Conversation with PersonaGPT -############################################################################################################################### - -This notebook provides two styles of inference, Plain and Interactive. -The style of inference can be selected in the next cell. - -.. code:: ipython3 - - style = {'description_width': 'initial'} - interactive_mode = widgets.Select( - options=['Plain', 'Interactive'], - value='Plain', - description='Inference Style:', - disabled=False - ) - - widgets.VBox([interactive_mode]) - - - - -.. parsed-literal:: - - VBox(children=(Select(description='Inference Style:', options=('Plain', 'Interactive'), value='Plain'),)) - - - -.. code:: ipython3 - - if model_name.value == "PersonaGPT (Converastional)": - if interactive_mode.value == 'Plain': - conversation = Conversation() - user_prompt = None - pre_written_prompts = ["Hi,How are you?", "What are you doing?", "I like to dance,do you?", "Can you recommend me some books?"] - # Number of responses generated by model - n_prompts = 10 - for i in range(n_prompts): - # Uncomment for taking User Input - # user_prompt = input() - if not user_prompt: - user_prompt = pre_written_prompts[i % len(pre_written_prompts)] - conversation.chat(user_prompt) - print(conversation.messages[-2]) - print(conversation.messages[-1]) - user_prompt = None - else: - def add_text(history, text): - history = history + [(text, None)] - return history, "" - - conversation = Conversation() - - def bot(history): - conversation.chat(history[-1][0]) - response = conversation.messages[-1] - history[-1][1] = response - return history - - with Blocks() as demo: - chatbot = Chatbot([], elem_id="chatbot").style() - - with Row(): - with Column(): - txt = Textbox( - show_label=False, - placeholder="Enter text and press enter, or upload an image", - ).style(container=False) - - txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then( - bot, chatbot, chatbot - ) - - demo.launch() - else: - print("Selected Model is not PersonaGPT, Please select PersonaGPT in the first cell to have a conversation") - - -.. parsed-literal:: - - Person: Hi,How are you? - PersonaGPT: good and you? - Person: What are you doing? - PersonaGPT: working on my studies - Person: I like to dance,do you? - PersonaGPT: i enjoy dance, whats your favorite dance? - Person: Can you recommend me some books? - PersonaGPT: do you like to read? - Person: Hi,How are you? - PersonaGPT: good and you? - Person: What are you doing? - PersonaGPT: what are you doing right now? - Person: I like to dance,do you? - PersonaGPT: i enjoy dance too. - Person: Can you recommend me some books? - PersonaGPT: what do you like about dance? - Person: Hi,How are you? - PersonaGPT: i'm good thanks. - Person: What are you doing? - PersonaGPT: working on studying. - diff --git a/docs/notebooks/224-3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/224-3D-segmentation-point-clouds-with-output.rst index ff203173791738..53fd6f3522bd0c 100644 --- a/docs/notebooks/224-3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/224-3D-segmentation-point-clouds-with-output.rst @@ -8,7 +8,7 @@ Part Segmentation with OpenVINO. We use the detect each part of a chair and return its category. PointNet -############################################################################################################################### +-------- PointNet was proposed by Charles Ruizhongtai Qi, a researcher at Stanford University in 2016: `PointNet: Deep Learning on Point Sets for @@ -23,34 +23,51 @@ effective, showing strong performance on par or even better than state of the art. **Table of contents:** +--- - `Imports <#imports>`__ -- `Prepare the Model <#prepare-the-model>`__ -- `Data Processing Module <#data-processing-module>`__ -- `Visualize the original 3D data <#visualize-the-original-3d-data>`__ -- `Run inference <#run-inference>`__ - - `Select inference device <#select-inference-device>`__ +- `Prepare the Model <#prepare-the-model>`__ +- `Data Processing Module <#data-processing-module>`__ +- `Visualize the original 3D + data <#visualize-the-original-d-data>`__ +- `Run inference <#run-inference>`__ -Imports -############################################################################################################################### + - `Select inference device <#select-inference-device>`__ + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + + +Imports +------------------------------------------------- .. code:: ipython3 - import sys - from pathlib import Path from typing import Union import numpy as np import matplotlib.pyplot as plt - from openvino.runtime import Core, serialize - from openvino.tools import mo + import openvino as ov + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) - sys.path.append("../utils") from notebook_utils import download_file -Prepare the Model -############################################################################################################################### +Prepare the Model +----------------------------------------------------------- Download the pre-trained PointNet ONNX model. This pre-trained model is provided by `axinc-ai `__, and you can @@ -69,31 +86,31 @@ Convert the ONNX model to OpenVINO IR. An OpenVINO IR (Intermediate Representation) model consists of an ``.xml`` file, containing information about network topology, and a ``.bin`` file, containing the weights and biases binary data. Model conversion Python API is used for -conversion of ONNX model to OpenVINO IR. The ``mo.convert_model`` Python +conversion of ONNX model to OpenVINO IR. The ``ov.convert_model`` Python function returns an OpenVINO model ready to load on a device and start making predictions. We can save it on a disk for next usage with -``openvino.runtime.serialize``. For more information about model -conversion Python API, see this +``ov.save_model``. For more information about model conversion Python +API, see this `page `__. .. code:: ipython3 ir_model_xml = onnx_model_path.with_suffix(".xml") - core = Core() + core = ov.Core() if not ir_model_xml.exists(): # Convert model to OpenVINO Model - model = mo.convert_model(onnx_model_path, compress_to_fp16=True) + model = ov.convert_model(onnx_model_path) # Serialize model in OpenVINO IR format xml + bin - serialize(model, str(ir_model_xml)) + ov.save_model(model, ir_model_xml) else: # Read model model = core.read_model(model=ir_model_xml) -Data Processing Module -############################################################################################################################### +Data Processing Module +---------------------------------------------------------------- .. code:: ipython3 @@ -147,8 +164,8 @@ Data Processing Module return ax -Visualize the original 3D data -############################################################################################################################### +Visualize the original 3D data +------------------------------------------------------------------------ The point cloud data can be downloaded from `ShapeNet `__, @@ -157,8 +174,13 @@ chair for example. .. code:: ipython3 - point_data = "../data/pts/chair.pts" - points = load_data(point_data) + # Download data from the openvino_notebooks storage + point_data = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/pts/chair.pts", + directory="data" + ) + + points = load_data(str(point_data)) X = points[:, 0] Y = points[:, 2] Z = points[:, 1] @@ -170,11 +192,17 @@ chair for example. -.. image:: 224-3D-segmentation-point-clouds-with-output_files/224-3D-segmentation-point-clouds-with-output_10_0.png +.. parsed-literal:: + + data/chair.pts: 0%| | 0.00/69.2k [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/224-3D-segmentation-point-clouds-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/224-3D-segmentation-point-clouds-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/224-3D-segmentation-point-clouds-with-output_files/


../
-224-3D-segmentation-point-clouds-with-output_10..> 16-Aug-2023 01:31              209355
-224-3D-segmentation-point-clouds-with-output_15..> 16-Aug-2023 01:31              222552
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/224-3D-segmentation-point-clouds-with-output_files/


../
+224-3D-segmentation-point-clouds-with-output_11..> 31-Oct-2023 00:35              209355
+224-3D-segmentation-point-clouds-with-output_16..> 31-Oct-2023 00:35              222552
 

diff --git a/docs/notebooks/225-stable-diffusion-text-to-image-with-output.rst b/docs/notebooks/225-stable-diffusion-text-to-image-with-output.rst index bfb59f260ff0d8..fcf724f74884b9 100644 --- a/docs/notebooks/225-stable-diffusion-text-to-image-with-output.rst +++ b/docs/notebooks/225-stable-diffusion-text-to-image-with-output.rst @@ -40,31 +40,33 @@ Notebook contains the following steps: **Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Create PyTorch Models pipeline <#create-pytorch-models-pipeline>`__ -- `Convert models to OpenVINO Intermediate representation (IR) format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ - - `Text Encoder <#text-encoder>`__ - - `U-net <#u-net>`__ - - `VAE <#vae>`__ +- `Prerequisites <#prerequisites>`__ +- `Create PyTorch Models + pipeline <#create-pytorch-models-pipeline>`__ +- `Convert models to OpenVINO Intermediate representation (IR) + format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ -- `Prepare Inference Pipeline <#prepare-inference-pipeline>`__ -- `Configure Inference Pipeline <#configure-inference-pipeline>`__ + - `Text Encoder <#text-encoder>`__ + - `U-net <#u-net>`__ + - `VAE <#vae>`__ - - `Text-to-Image generation <#text-to-image-generation>`__ - - `Image-to-Image generation <#image-to-image-generation>`__ +- `Prepare Inference Pipeline <#prepare-inference-pipeline>`__ +- `Configure Inference + Pipeline <#configure-inference-pipeline>`__ -.. - `Interactive demo <#interactive-demo>`__ + - `Text-to-Image generation <#text-to-image-generation>`__ + - `Image-to-Image generation <#image-to-image-generation>`__ -Prerequisites -############################################################################################################################### +- `Interactive demo <#interactive-demo>`__ + +Prerequisites +------------------------------------------------------- **The following is needed only if you want to use the original model. If not, you do not have to do anything. Just run the notebook.** -.. note:: - - The original model (for example, ``stable-diffusion-v1-4``) + **Note**: The original model (for example, ``stable-diffusion-v1-4``) requires you to accept the model license before downloading or using its weights. Visit the `stable-diffusion-v1-4 card `__ to @@ -97,13 +99,14 @@ solutions based on Stable Diffusion. .. code:: ipython3 - !pip install -q "openvino==2023.1.0dev20230811" - !pip install -q "diffusers[torch]>=0.9.0" - !pip install -q "huggingface-hub>=0.9.1" - !pip install -q gradio + %pip install -q "openvino>=2023.1.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "diffusers[torch]>=0.9.0" + %pip install -q "huggingface-hub>=0.9.1" + %pip install -q gradio + %pip install -q transformers -Create PyTorch Models pipeline -############################################################################################################################### +Create PyTorch Models pipeline +------------------------------------------------------------------------ ``StableDiffusionPipeline`` is an end-to-end inference pipeline that you can use to generate images from text with just a few lines of code. @@ -258,8 +261,8 @@ First, load the pre-trained weights of all components of the model. -Convert models to OpenVINO Intermediate representation (IR) format -############################################################################################################################### +Convert models to OpenVINO Intermediate representation (IR) format +------------------------------------------------------------------------------------------------------------ Staring from 2023.0 release, OpenVINO supports direct conversion PyTorch models to OpenVINO IR format. You need to provide a model object and @@ -280,8 +283,8 @@ The model consists of three important parts: Let us convert each part. -Text Encoder -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text-encoder is responsible for transforming the input prompt, for example, “a photo of an astronaut riding a horse” into an embedding @@ -376,8 +379,8 @@ hidden states. -U-net -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +U-net +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Unet model has three inputs: @@ -467,8 +470,8 @@ Model predicts the ``sample`` state for the next step. -VAE -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +VAE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The VAE model has two parts, an encoder and a decoder. The encoder is used to convert the image into a low dimensional latent representation, @@ -596,8 +599,8 @@ of the pipeline, it will be better to convert them to separate models. -Prepare Inference Pipeline -############################################################################################################################### +Prepare Inference Pipeline +-------------------------------------------------------------------- Putting it all together, let us now take a closer look at how the model works in inference by illustrating the logical flow. @@ -998,8 +1001,8 @@ of the variational auto encoder. return timesteps, num_inference_steps - t_start -Configure Inference Pipeline -############################################################################################################################### +Configure Inference Pipeline +---------------------------------------------------------------------- First, you should create instances of OpenVINO Model. @@ -1042,8 +1045,10 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 - vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value) - vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value) + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + + vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_config) + vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value, ov_config) Model tokenizer and scheduler are also important parts of the pipeline. Let us define them and put all components together @@ -1069,16 +1074,14 @@ Let us define them and put all components together scheduler=lms ) -Text-to-Image generation -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Text-to-Image generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, you can define a text prompt for image generation and run inference pipeline. Optionally, you can also change the random generator seed for latent state initialization and number of steps. -.. note:: - - Consider increasing ``steps`` to get more precise results. + **Note**: Consider increasing ``steps`` to get more precise results. A suggested value is ``50``, but it will take longer time to process. .. code:: ipython3 @@ -1173,8 +1176,8 @@ Now is show time! Nice. As you can see, the picture has quite a high definition 🔥. -Image-to-Image generation -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Image-to-Image generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Image-to-Image generation, additionally to text prompt, requires providing initial image. Optionally, you can also change ``strength`` @@ -1205,13 +1208,28 @@ semantically consistent with the input. +.. code:: ipython3 + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + + from notebook_utils import download_file + .. code:: ipython3 import io - default_image_path = "../data/image/coco.jpg" + default_image_path = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", + filename="coco.jpg" + ) + # read uploaded image - image = PIL.Image.open(io.BytesIO(image_widget.value[-1]['content']) if image_widget.value else default_image_path) + image = PIL.Image.open(io.BytesIO(image_widget.value[-1]['content']) if image_widget.value else str(default_image_path)) print('Pipeline settings') print(f'Input text: {text_prompt_i2i.value}') print(f'Seed: {seed_i2i.value}') @@ -1233,7 +1251,7 @@ semantically consistent with the input. -.. image:: 225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_37_1.png +.. image:: 225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_38_1.png @@ -1266,83 +1284,76 @@ semantically consistent with the input. -.. image:: 225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_39_1.png +.. image:: 225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_40_1.png -.. Interactive demo -.. ############################################################################################################################### +Interactive demo +---------------------------------------------------------- -.. .. code:: ipython3 +.. code:: ipython3 -.. import gradio as gr -.. import urllib.request - -.. urllib.request.urlretrieve( -.. "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", -.. "coco.jpg" -.. ) - - -.. def generate_from_text(text, seed, num_steps, _=gr.Progress(track_tqdm=True)): -.. result = ov_pipe(text, num_inference_steps=num_steps, seed=seed) -.. return result["sample"][0] - - -.. def generate_from_image(img, text, seed, num_steps, strength, _=gr.Progress(track_tqdm=True)): -.. result = ov_pipe(text, img, num_inference_steps=num_steps, seed=seed, strength=strength) -.. return result["sample"][0] - - -.. with gr.Blocks() as demo: -.. with gr.Tab("Text-to-Image generation"): -.. with gr.Row(): -.. with gr.Column(): -.. text_input = gr.Textbox(lines=3, label="Text") -.. seed_input = gr.Slider(0, 10000000, value=42, label="Seed") -.. steps_input = gr.Slider(1, 50, value=20, step=1, label="Steps") -.. out = gr.Image(label="Result", type="pil") -.. btn = gr.Button() -.. btn.click(generate_from_text, [text_input, seed_input, steps_input], out) -.. gr.Examples([[sample_text, 42, 20]], [text_input, seed_input, steps_input]) -.. with gr.Tab("Image-to-Image generation"): -.. with gr.Row(): -.. with gr.Column(): -.. i2i_input = gr.Image(label="Image", type="pil") -.. i2i_text_input = gr.Textbox(lines=3, label="Text") -.. i2i_seed_input = gr.Slider(0, 1024, value=42, label="Seed") -.. i2i_steps_input = gr.Slider(1, 50, value=10, step=1, label="Steps") -.. strength_input = gr.Slider(0, 1, value=0.5, label="Strength") -.. i2i_out = gr.Image(label="Result") -.. i2i_btn = gr.Button() -.. sample_i2i_text = "amazing watercolor painting" -.. i2i_btn.click( -.. generate_from_image, -.. [i2i_input, i2i_text_input, i2i_seed_input, i2i_steps_input, strength_input], -.. i2i_out, -.. ) -.. gr.Examples( -.. [["coco.jpg", sample_i2i_text, 42, 10, 0.5]], -.. [i2i_input, i2i_text_input, i2i_seed_input, i2i_steps_input, strength_input], -.. ) + import gradio as gr + + def generate_from_text(text, seed, num_steps, _=gr.Progress(track_tqdm=True)): + result = ov_pipe(text, num_inference_steps=num_steps, seed=seed) + return result["sample"][0] + + + def generate_from_image(img, text, seed, num_steps, strength, _=gr.Progress(track_tqdm=True)): + result = ov_pipe(text, img, num_inference_steps=num_steps, seed=seed, strength=strength) + return result["sample"][0] + + + with gr.Blocks() as demo: + with gr.Tab("Text-to-Image generation"): + with gr.Row(): + with gr.Column(): + text_input = gr.Textbox(lines=3, label="Text") + seed_input = gr.Slider(0, 10000000, value=42, label="Seed") + steps_input = gr.Slider(1, 50, value=20, step=1, label="Steps") + out = gr.Image(label="Result", type="pil") + btn = gr.Button() + btn.click(generate_from_text, [text_input, seed_input, steps_input], out) + gr.Examples([[sample_text, 42, 20]], [text_input, seed_input, steps_input]) + with gr.Tab("Image-to-Image generation"): + with gr.Row(): + with gr.Column(): + i2i_input = gr.Image(label="Image", type="pil") + i2i_text_input = gr.Textbox(lines=3, label="Text") + i2i_seed_input = gr.Slider(0, 1024, value=42, label="Seed") + i2i_steps_input = gr.Slider(1, 50, value=10, step=1, label="Steps") + strength_input = gr.Slider(0, 1, value=0.5, label="Strength") + i2i_out = gr.Image(label="Result") + i2i_btn = gr.Button() + sample_i2i_text = "amazing watercolor painting" + i2i_btn.click( + generate_from_image, + [i2i_input, i2i_text_input, i2i_seed_input, i2i_steps_input, strength_input], + i2i_out, + ) + gr.Examples( + [[str(default_image_path), sample_i2i_text, 42, 10, 0.5]], + [i2i_input, i2i_text_input, i2i_seed_input, i2i_steps_input, strength_input], + ) -.. try: -.. demo.queue().launch(debug=False) -.. except Exception: -.. demo.queue().launch(share=True, debug=False) -.. # if you are launching remotely, specify server_name and server_port -.. # demo.launch(server_name='your server name', server_port='server port in int') -.. # Read more in the docs: https://gradio.app/docs/ + try: + demo.queue().launch(debug=False) + except Exception: + demo.queue().launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ -.. .. parsed-literal:: +.. parsed-literal:: -.. Running on local URL: http://127.0.0.1:7860 + Running on local URL: http://127.0.0.1:7860 -.. To create a public link, set `share=True` in `launch()`. + To create a public link, set `share=True` in `launch()`. .. .. raw:: html -..
+..
diff --git a/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_37_1.png b/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_38_1.png similarity index 100% rename from docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_37_1.png rename to docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_38_1.png diff --git a/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_39_1.png b/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_40_1.png similarity index 100% rename from docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_39_1.png rename to docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/225-stable-diffusion-text-to-image-with-output_40_1.png diff --git a/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/index.html b/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/index.html index c148f018de1e1c..49f3a05b4767c0 100644 --- a/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/index.html +++ b/docs/notebooks/225-stable-diffusion-text-to-image-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/225-stable-diffusion-text-to-image-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/225-stable-diffusion-text-to-image-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/225-stable-diffusion-text-to-image-with-output_files/


../
-225-stable-diffusion-text-to-image-with-output_..> 16-Aug-2023 01:31              372493
-225-stable-diffusion-text-to-image-with-output_..> 16-Aug-2023 01:31              928896
-225-stable-diffusion-text-to-image-with-output_..> 16-Aug-2023 01:31              726937
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/225-stable-diffusion-text-to-image-with-output_files/


../
+225-stable-diffusion-text-to-image-with-output_..> 31-Oct-2023 00:35              372482
+225-stable-diffusion-text-to-image-with-output_..> 31-Oct-2023 00:35              928958
+225-stable-diffusion-text-to-image-with-output_..> 31-Oct-2023 00:35              726871
 

diff --git a/docs/notebooks/226-yolov7-optimization-with-output.rst b/docs/notebooks/226-yolov7-optimization-with-output.rst index 0f00198465ee14..9fa5d0ad5c3d62 100644 --- a/docs/notebooks/226-yolov7-optimization-with-output.rst +++ b/docs/notebooks/226-yolov7-optimization-with-output.rst @@ -1,8 +1,6 @@ Convert and Optimize YOLOv7 with OpenVINO™ ========================================== - - The YOLOv7 algorithm is making big waves in the computer vision and machine learning communities. It is a real-time object detection algorithm that performs image recognition tasks by taking an image as @@ -40,33 +38,39 @@ The tutorial consists of the following steps: - Compare accuracy of the FP32 and quantized models. - Compare performance of the FP32 and quantized models. -**Table of contents**: +**Table of contents:** -- `Get Pytorch model <#get-pytorch-model>`__ -- `Prerequisites <#prerequisites>`__ -- `Check model inference <#check-model-inference>`__ -- `Export to ONNX <#export-to-onnx>`__ -- `Convert ONNX Model to OpenVINO Intermediate Representation (IR) <#convert-onnx-model-to-openvino-intermediate-representation-ir>`__ -- `Verify model inference <#verify-model-inference>`__ - - `Preprocessing <#preprocessing>`__ - - `Postprocessing <#postprocessing>`__ - - `Select inference device <#select-inference-device>`__ +- `Get Pytorch model <#get-pytorch-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Check model inference <#check-model-inference>`__ +- `Export to ONNX <#export-to-onnx>`__ +- `Convert ONNX Model to OpenVINO Intermediate Representation + (IR) <#convert-onnx-model-to-openvino-intermediate-representation-ir>`__ +- `Verify model inference <#verify-model-inference>`__ -- `Verify model accuracy <#verify-model-accuracy>`__ + - `Preprocessing <#preprocessing>`__ + - `Postprocessing <#postprocessing>`__ + - `Select inference device <#select-inference-device>`__ - - `Download dataset <#download-dataset>`__ - - `Create dataloader <#create-dataloader>`__ - - `Define validation function <#define-validation-function>`__ +- `Verify model accuracy <#verify-model-accuracy>`__ -- `Optimize model using NNCF Post-training Quantization API <#optimize-model-using-nncf-post-training-quantization-api>`__ -- `Validate Quantized model inference <#validate-quantized-model-inference>`__ -- `Validate quantized model accuracy <#validate-quantized-model-accuracy>`__ -- `Compare Performance of the Original and Quantized Models <#compare-performance-of-the-original-and-quantized-models>`__ + - `Download dataset <#download-dataset>`__ + - `Create dataloader <#create-dataloader>`__ + - `Define validation + function <#define-validation-function>`__ -Get Pytorch model -############################################################################################################################### +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ +- `Validate Quantized model + inference <#validate-quantized-model-inference>`__ +- `Validate quantized model + accuracy <#validate-quantized-model-accuracy>`__ +- `Compare Performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ +Get Pytorch model +----------------------------------------------------------- Generally, PyTorch models represent an instance of the `torch.nn.Module `__ @@ -84,7 +88,17 @@ In this case, the model creators provide a tool that enables converting the YOLOv7 model to ONNX, so we do not need to do these steps manually. Prerequisites -############################################################################################################################### +------------------------------------------------------- + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" "nncf>=2.5.0" + + +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -106,12 +120,10 @@ Prerequisites Cloning into 'yolov7'... remote: Enumerating objects: 1191, done. - remote: Counting objects: 100% (6/6), done. - remote: Compressing objects: 100% (4/4), done. - remote: Total 1191 (delta 2), reused 6 (delta 2), pack-reused 1185 - Receiving objects: 100% (1191/1191), 74.23 MiB | 4.20 MiB/s, done. - Resolving deltas: 100% (511/511), done. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/226-yolov7-optimization/yolov7 + remote: Total 1191 (delta 0), reused 0 (delta 0), pack-reused 1191 + Receiving objects: 100% (1191/1191), 74.23 MiB | 3.77 MiB/s, done. + Resolving deltas: 100% (516/516), done. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/226-yolov7-optimization/yolov7 .. code:: ipython3 @@ -136,13 +148,12 @@ Prerequisites .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/226-yolov7-optimization/yolov7/model/yolov7-tiny.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/226-yolov7-optimization/yolov7/model/yolov7-tiny.pt') Check model inference -############################################################################################################################### - +--------------------------------------------------------------- ``detect.py`` script run pytorch model inference and save image as result, @@ -163,9 +174,9 @@ result, traced_script_module saved! model is traced! - 5 horses, Done. (70.8ms) Inference, (0.8ms) NMS + 5 horses, Done. (73.0ms) Inference, (0.8ms) NMS The image with the result is saved in: runs/detect/exp/horses.jpg - Done. (0.084s) + Done. (0.086s) .. code:: ipython3 @@ -177,13 +188,12 @@ result, -.. image:: 226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.png +.. image:: 226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.png Export to ONNX -############################################################################################################################### - +-------------------------------------------------------- To export an ONNX format of the model, we will use ``export.py`` script. Let us check its arguments. @@ -272,43 +282,42 @@ an end2end ONNX model, you can check this Starting TorchScript-Lite export with torch 1.13.1+cpu... TorchScript-Lite export success, saved as model/yolov7-tiny.torchscript.ptl - Starting ONNX export with onnx 1.14.0... + Starting ONNX export with onnx 1.15.0... ONNX export success, saved as model/yolov7-tiny.onnx - Export complete (2.53s). Visualize with https://github.com/lutzroeder/netron. + Export complete (2.48s). Visualize with https://github.com/lutzroeder/netron. -Convert ONNX Model to OpenVINO Intermediate Representation (IR). -############################################################################################################################### +Convert ONNX Model to OpenVINO Intermediate Representation (IR) +--------------------------------------------------------------------------------------------------------- -While ONNX models are directly supported by OpenVINO runtime, -it can be useful to convert them to IR format to take the advantage of -OpenVINO optimization tools and features. The ``mo.convert_model`` -python function in OpenVINO Model Optimizer can be used for converting -the model. The function returns instance of OpenVINO Model class, which -is ready to use in Python interface. However, it can also be serialized -to OpenVINO IR format for future execution. +While ONNX models are directly supported by OpenVINO runtime, it can be +useful to convert them to IR format to take the advantage of OpenVINO +model conversion API features. The ``ov.convert_model`` python function +of `model conversion +API `__ +can be used for converting the model. The function returns instance of +OpenVINO Model class, which is ready to use in Python interface. +However, it can also be save on device in OpenVINO IR format using +``ov.save_model`` for future execution. .. code:: ipython3 - from openvino.tools import mo - from openvino.runtime import serialize + import openvino as ov - model = mo.convert_model('model/yolov7-tiny.onnx') + model = ov.convert_model('model/yolov7-tiny.onnx') # serialize model for saving IR - serialize(model, 'model/yolov7-tiny.xml') + ov.save_model(model, 'model/yolov7-tiny.xml') Verify model inference -############################################################################################################################### - +---------------------------------------------------------------- To test model work, we create inference pipeline similar to ``detect.py``. The pipeline consists of preprocessing step, inference of OpenVINO model, and results post-processing to get bounding boxes. Preprocessing -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Model input is a tensor with the ``[1, 3, 640, 640]`` shape in ``N, C, H, W`` format, where @@ -373,23 +382,31 @@ To keep specific shape, preprocessing automatically enables padding. # label names for visualization - NAMES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', - 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', - 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', - 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', - 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', - 'hair drier', 'toothbrush'] + DEFAULT_NAMES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush'] + + # obtain class names from model checkpoint + state_dict = torch.load("model/yolov7-tiny.pt", map_location="cpu") + if hasattr(state_dict["model"], "module"): + NAMES = getattr(state_dict["model"].module, "names", DEFAULT_NAMES) + else: + NAMES = getattr(state_dict["model"], "names", DEFAULT_NAMES) + + del state_dict # colors for visualization COLORS = {name: [np.random.randint(0, 255) for _ in range(3)] for i, name in enumerate(NAMES)} Postprocessing -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Model output contains detection boxes candidates. It is a tensor with the ``[1,25200,85]`` shape in the ``B, N, 85`` format, where: @@ -413,10 +430,9 @@ algorithm and rescale boxes coordinates to original image size. from typing import List, Tuple, Dict from utils.general import scale_coords, non_max_suppression - from openvino.runtime import Model - def detect(model: Model, image_path: Path, conf_thres: float = 0.25, iou_thres: float = 0.45, classes: List[int] = None, agnostic_nms: bool = False): + def detect(model: ov.Model, image_path: Path, conf_thres: float = 0.25, iou_thres: float = 0.45, classes: List[int] = None, agnostic_nms: bool = False): """ OpenVINO YOLOv7 model inference function. Reads image, preprocess it, runs model inference and postprocess results using NMS. Parameters: @@ -464,16 +480,14 @@ algorithm and rescale boxes coordinates to original image size. .. code:: ipython3 - from openvino.runtime import Core - core = Core() + core = ov.Core() # read converted model model = core.read_model('model/yolov7-tiny.xml') Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -512,17 +526,15 @@ Select device from dropdown list for running inference using OpenVINO: -.. image:: 226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.png +.. image:: 226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.png Verify model accuracy -############################################################################################################################### - +--------------------------------------------------------------- Download dataset -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ YOLOv7 tiny is pre-trained on the COCO dataset, so in order to evaluate the model accuracy, we need to download it. According to the @@ -565,8 +577,7 @@ the original model evaluation scripts. Create dataloader -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -592,12 +603,11 @@ Create dataloader .. parsed-literal:: - val: Scanning 'coco/val2017' images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100%|██████████| 5000/5000 [00:01<00:00, 2979.40it/s] + val: Scanning 'coco/val2017' images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100%|██████████| 5000/5000 [00:01<00:00, 2954.46it/s] Define validation function -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We will reuse validation metrics provided in the YOLOv7 repo with a modification for this case (removing extra steps). The original model @@ -613,7 +623,7 @@ evaluation procedure can be found in this def test(data, - model: Model, + model: ov.Model, dataloader: torch.utils.data.DataLoader, conf_thres: float = 0.001, iou_thres: float = 0.65, # for NMS @@ -626,7 +636,7 @@ evaluation procedure can be found in this YOLOv7 accuracy evaluation. Processes validation dataset and compites metrics. Parameters: - model (Model): OpenVINO compiled model. + model (ov.Model): OpenVINO compiled model. dataloader (torch.utils.DataLoader): validation dataset. conf_thres (float, *optional*, 0.001): minimal confidence threshold for keeping detections iou_thres (float, *optional*, 0.65): IOU threshold for NMS @@ -759,12 +769,11 @@ Validation function reports following list of accuracy metrics: .. parsed-literal:: Class Images Labels Precision Recall mAP@.5 mAP@.5:.95 - all 5000 36335 0.651 0.506 0.544 0.359 + all 5000 36335 0.651 0.507 0.544 0.359 Optimize model using NNCF Post-training Quantization API -############################################################################################################################### - +-------------------------------------------------------------------------------------------------- `NNCF `__ provides a suite of advanced algorithms for Neural Networks inference optimization in @@ -772,13 +781,10 @@ OpenVINO with minimal accuracy drop. We will use 8-bit quantization in post-training mode (without the fine-tuning pipeline) to optimize YOLOv7. -.. note:: - - NNCF Post-training Quantization is available as a preview + **Note**: NNCF Post-training Quantization is available as a preview feature in OpenVINO 2022.3 release. Fully functional support will be provided in the next releases. - The optimization process contains the following steps: 1. Create a Dataset for quantization. @@ -830,18 +836,21 @@ asymmetric quantization of activations. quantized_model = nncf.quantize(model, quantization_dataset, preset=nncf.QuantizationPreset.MIXED) - serialize(quantized_model, 'model/yolov7-tiny_int8.xml') + ov.save_model(quantized_model, 'model/yolov7-tiny_int8.xml') .. parsed-literal:: - Statistics collection: 100%|██████████| 300/300 [00:38<00:00, 7.80it/s] - Biases correction: 100%|██████████| 58/58 [00:04<00:00, 14.15it/s] + 2023-10-30 23:38:09.707478: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 23:38:09.738739: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-30 23:38:10.279255: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Statistics collection: 100%|██████████| 300/300 [00:38<00:00, 7.73it/s] + Applying Fast Bias correction: 100%|██████████| 58/58 [00:04<00:00, 13.46it/s] Validate Quantized model inference -############################################################################################################################### - +---------------------------------------------------------------------------- .. code:: ipython3 @@ -866,13 +875,12 @@ Validate Quantized model inference -.. image:: 226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.png +.. image:: 226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.png Validate quantized model accuracy -############################################################################################################################### - +--------------------------------------------------------------------------- .. code:: ipython3 @@ -898,7 +906,7 @@ Validate quantized model accuracy .. parsed-literal:: Class Images Labels Precision Recall mAP@.5 mAP@.5:.95 - all 5000 36335 0.644 0.504 0.54 0.353 + all 5000 36335 0.637 0.508 0.54 0.353 As we can see, model accuracy slightly changed after quantization. @@ -906,23 +914,20 @@ However, if we look at the output image, these changes are not significant. Compare Performance of the Original and Quantized Models -############################################################################################################################### +-------------------------------------------------------------------------------------------------- Finally, use the OpenVINO `Benchmark -Tool `__ +Tool `__ to measure the inference performance of the ``FP32`` and ``INT8`` models. -.. note:: - - For more accurate performance, it is recommended to run + **NOTE**: For more accurate performance, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run ``benchmark_app -m model.xml -d CPU`` to benchmark async inference on CPU for one minute. Change ``CPU`` to ``GPU`` to benchmark on GPU. Run ``benchmark_app --help`` to see an overview of all command-line options. - .. code:: ipython3 device @@ -949,18 +954,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 11.04 ms + [ INFO ] Read model took 13.97 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: images) : f32 / [...] / [1,3,640,640] @@ -974,46 +979,49 @@ models. [ INFO ] Model outputs: [ INFO ] output (node: output) : f32 / [...] / [1,25200,85] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 256.97 ms + [ INFO ] Compile model took 265.32 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] MODEL_PRIORITY: Priority.MEDIUM [ INFO ] MULTI_DEVICE_PRIORITIES: CPU [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 6 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 6 [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 43.97 ms + [ INFO ] First inference took 45.17 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 11400 iterations - [ INFO ] Duration: 120097.35 ms + [ INFO ] Count: 11544 iterations + [ INFO ] Duration: 120105.63 ms [ INFO ] Latency: - [ INFO ] Median: 62.78 ms - [ INFO ] Average: 63.06 ms - [ INFO ] Min: 35.00 ms - [ INFO ] Max: 133.31 ms - [ INFO ] Throughput: 94.92 FPS + [ INFO ] Median: 62.16 ms + [ INFO ] Average: 62.28 ms + [ INFO ] Min: 45.96 ms + [ INFO ] Max: 85.36 ms + [ INFO ] Throughput: 96.12 FPS .. code:: ipython3 @@ -1029,18 +1037,18 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 17.80 ms + [ INFO ] Read model took 23.91 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: images) : f32 / [...] / [1,3,640,640] @@ -1054,44 +1062,47 @@ models. [ INFO ] Model outputs: [ INFO ] output (node: output) : f32 / [...] / [1,25200,85] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 462.21 ms + [ INFO ] Compile model took 456.30 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] MODEL_PRIORITY: Priority.MEDIUM [ INFO ] MULTI_DEVICE_PRIORITIES: CPU [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 6 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 24 + [ INFO ] INFERENCE_PRECISION_HINT: [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 6 [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 6 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 26.85 ms + [ INFO ] First inference took 27.69 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 31326 iterations - [ INFO ] Duration: 120015.35 ms + [ INFO ] Count: 32700 iterations + [ INFO ] Duration: 120025.10 ms [ INFO ] Latency: - [ INFO ] Median: 22.78 ms - [ INFO ] Average: 22.86 ms - [ INFO ] Min: 14.12 ms - [ INFO ] Max: 41.51 ms - [ INFO ] Throughput: 261.02 FPS + [ INFO ] Median: 21.82 ms + [ INFO ] Average: 21.90 ms + [ INFO ] Min: 16.88 ms + [ INFO ] Max: 44.61 ms + [ INFO ] Throughput: 272.44 FPS diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.jpg b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.jpg new file mode 100644 index 00000000000000..fc548e7ce6e1d9 --- /dev/null +++ b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e31df95b9edc90d3ca7f1df06b6a7e752133edf83b803d5b9ef5f85007fb591 +size 64645 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.png b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.png new file mode 100644 index 00000000000000..74fbc2353dce45 --- /dev/null +++ b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_10_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe329db16f68795e1f73c293b698fee6c9c6e4ef606d36357cb06b805de8af2 +size 568876 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.jpg b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.jpg deleted file mode 100644 index 233197348b4916..00000000000000 --- a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2975c3827089d20aad73d540189b9fd908f30dfeff795ff14e68e95327d24d35 -size 63563 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.png b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.png deleted file mode 100644 index b878c1c054b932..00000000000000 --- a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_26_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f397bd82701c329f4e73ff181b5d1c4ac907f4b61910ca09681f63ff83659875 -size 573281 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.jpg b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.jpg new file mode 100644 index 00000000000000..87f3623f3cb4cc --- /dev/null +++ b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535860c41070cd771ee02e2983fe7e02a37c4985a2f1f0dabfefb76352db6291 +size 63263 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.png b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.png new file mode 100644 index 00000000000000..bd375472d3741f --- /dev/null +++ b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_27_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae19e82dff227f9344c02bd32c02e207e0bb7c0d4e620f8f50013824fa9450c6 +size 574652 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.jpg b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.jpg deleted file mode 100644 index 9f6428a8dbce2a..00000000000000 --- a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1513cf0169b7ae87be5234830d8fbaaf3a766a08d9e597bd75ff55ea6d0e2892 -size 63483 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.png b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.png deleted file mode 100644 index a807e7cfdd1747..00000000000000 --- a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_43_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd195b83150860a64d32e8140921e2b1494ed619b8c65789e46fe5ba6d9e4802 -size 572961 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.jpg b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.jpg new file mode 100644 index 00000000000000..9b8e37417e856f --- /dev/null +++ b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccacd9e276d177f6f50c7ea6291ae7a1ee68fceace7af1f1a5a953e46be8855d +size 63193 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.png b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.png new file mode 100644 index 00000000000000..78b7d1275c44f9 --- /dev/null +++ b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_44_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8451206e0966b9e5c60310ad856e84cf1ed9f34d59775c6f33bd1bcb7f8fbe0 +size 574615 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.jpg b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.jpg deleted file mode 100644 index 62f74cb495e85b..00000000000000 --- a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8f7a6a17776622d2117a24a02238d0062fc727c9b093a25b6f5c77584416cf0 -size 64007 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.png b/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.png deleted file mode 100644 index b0d6bbcdae9ff5..00000000000000 --- a/docs/notebooks/226-yolov7-optimization-with-output_files/226-yolov7-optimization-with-output_9_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:77cb641bd57ccfe7c494b2e7b556db50ff5fb0f4c3d2d999ec94fe7679fc3919 -size 568350 diff --git a/docs/notebooks/226-yolov7-optimization-with-output_files/index.html b/docs/notebooks/226-yolov7-optimization-with-output_files/index.html index a0f9f8664b59f5..125d72c1e321b3 100644 --- a/docs/notebooks/226-yolov7-optimization-with-output_files/index.html +++ b/docs/notebooks/226-yolov7-optimization-with-output_files/index.html @@ -1,12 +1,12 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/226-yolov7-optimization-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/226-yolov7-optimization-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/226-yolov7-optimization-with-output_files/


../
-226-yolov7-optimization-with-output_26_0.jpg       16-Aug-2023 01:31               63563
-226-yolov7-optimization-with-output_26_0.png       16-Aug-2023 01:31              573281
-226-yolov7-optimization-with-output_43_0.jpg       16-Aug-2023 01:31               63483
-226-yolov7-optimization-with-output_43_0.png       16-Aug-2023 01:31              572961
-226-yolov7-optimization-with-output_9_0.jpg        16-Aug-2023 01:31               64007
-226-yolov7-optimization-with-output_9_0.png        16-Aug-2023 01:31              568350
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/226-yolov7-optimization-with-output_files/


../
+226-yolov7-optimization-with-output_10_0.jpg       31-Oct-2023 00:35               64645
+226-yolov7-optimization-with-output_10_0.png       31-Oct-2023 00:35              568876
+226-yolov7-optimization-with-output_27_0.jpg       31-Oct-2023 00:35               63263
+226-yolov7-optimization-with-output_27_0.png       31-Oct-2023 00:35              574652
+226-yolov7-optimization-with-output_44_0.jpg       31-Oct-2023 00:35               63193
+226-yolov7-optimization-with-output_44_0.png       31-Oct-2023 00:35              574615
 

diff --git a/docs/notebooks/227-whisper-convert-with-output.rst b/docs/notebooks/227-whisper-convert-with-output.rst new file mode 100644 index 00000000000000..a8288655d9a3f7 --- /dev/null +++ b/docs/notebooks/227-whisper-convert-with-output.rst @@ -0,0 +1,537 @@ +Video Subtitle Generation using Whisper and OpenVINO™ +===================================================== + +`Whisper `__ is an automatic speech +recognition (ASR) system trained on 680,000 hours of multilingual and +multitask supervised data collected from the web. It is a multi-task +model that can perform multilingual speech recognition as well as speech +translation and language identification. + +.. figure:: https://user-images.githubusercontent.com/29454499/204536347-28976978-9a07-416c-acff-fc1214bbfbe0.svg + :alt: asr-training-data-desktop.svg + + asr-training-data-desktop.svg + +You can find more information about this model in the `research +paper `__, `OpenAI +blog `__, `model +card `__ and +GitHub `repository `__. + +In this notebook, we will use Whisper with OpenVINO to generate +subtitles in a sample video. Notebook contains the following steps: 1. +Download the model. 2. Instantiate the PyTorch model pipeline. 3. +Convert model to OpenVINO IR, using model conversion API. 4. Run the +Whisper pipeline with OpenVINO models. + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Instantiate model <#instantiate-model>`__ + + - `Convert model to OpenVINO Intermediate Representation (IR) + format. <#convert-model-to-openvino-intermediate-representation-ir-format>`__ + - `Convert Whisper Encoder to OpenVINO + IR <#convert-whisper-encoder-to-openvino-ir>`__ + - `Convert Whisper decoder to OpenVINO + IR <#convert-whisper-decoder-to-openvino-ir>`__ + +- `Prepare inference pipeline <#prepare-inference-pipeline>`__ + + - `Select inference device <#select-inference-device>`__ + +- `Run video transcription + pipeline <#run-video-transcription-pipeline>`__ +- `Interactive demo <#interactive-demo>`__ + +Prerequisites +------------------------------------------------------- + +Install dependencies. + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" + %pip install -q "python-ffmpeg<=1.0.16" moviepy transformers onnx + %pip install -q -I "git+https://github.com/garywu007/pytube.git" + %pip install -q -U gradio + %pip install -q -I "git+https://github.com/openai/whisper.git@e8622f9afc4eba139bf796c210f5c01081000472" + +Instantiate model +----------------------------------------------------------- + +Whisper is a Transformer based encoder-decoder model, also referred to +as a sequence-to-sequence model. It maps a sequence of audio spectrogram +features to a sequence of text tokens. First, the raw audio inputs are +converted to a log-Mel spectrogram by action of the feature extractor. +Then, the Transformer encoder encodes the spectrogram to form a sequence +of encoder hidden states. Finally, the decoder autoregressively predicts +text tokens, conditional on both the previous tokens and the encoder +hidden states. + +You can see the model architecture in the diagram below: + +.. figure:: https://user-images.githubusercontent.com/29454499/204536571-8f6d8d77-5fbd-4c6d-8e29-14e734837860.svg + :alt: whisper_architecture.svg + + whisper_architecture.svg + +There are several models of different sizes and capabilities trained by +the authors of the model. In this tutorial, we will use the ``base`` +model, but the same actions are also applicable to other models from +Whisper family. + +.. code:: ipython3 + + import whisper + + model_id = "base" + model = whisper.load_model("base") + model.to("cpu") + model.eval() + pass + +Convert model to OpenVINO Intermediate Representation (IR) format. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For best results with OpenVINO, it is recommended to convert the model +to OpenVINO IR format. We need to provide initialized model object and +example of inputs for shape inference. We will use ``ov.convert_model`` +functionality to convert models. The ``ov.convert_model`` Python +function returns an OpenVINO model ready to load on device and start +making predictions. We can save it on disk for next usage with +``ov.save_model``. + +Convert Whisper Encoder to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + from pathlib import Path + + WHISPER_ENCODER_OV = Path("whisper_encoder.xml") + WHISPER_DECODER_OV = Path("whisper_decoder.xml") + +.. code:: ipython3 + + import torch + import openvino as ov + + mel = torch.zeros((1, 80, 3000)) + audio_features = model.encoder(mel) + encoder_model = ov.convert_model(model.encoder, example_input=mel) + ov.save_model(encoder_model, WHISPER_ENCODER_OV) + + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/whisper/model.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape" + + +Convert Whisper decoder to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To reduce computational complexity, the decoder uses cached key/value +projections in attention modules from the previous steps. We need to +modify this process for correct tracing. + +.. code:: ipython3 + + import torch + from typing import Optional, Tuple + from functools import partial + + + def attention_forward( + attention_module, + x: torch.Tensor, + xa: Optional[torch.Tensor] = None, + mask: Optional[torch.Tensor] = None, + kv_cache: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + ): + """ + Override for forward method of decoder attention module with storing cache values explicitly. + Parameters: + attention_module: current attention module + x: input token ids. + xa: input audio features (Optional). + mask: mask for applying attention (Optional). + kv_cache: dictionary with cached key values for attention modules. + idx: idx for search in kv_cache. + Returns: + attention module output tensor + updated kv_cache + """ + q = attention_module.query(x) + + if xa is None: + # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors; + # otherwise, perform key/value projections for self- or cross-attention as usual. + k = attention_module.key(x) + v = attention_module.value(x) + if kv_cache is not None: + k = torch.cat((kv_cache[0], k), dim=1) + v = torch.cat((kv_cache[1], v), dim=1) + kv_cache_new = (k, v) + else: + # for cross-attention, calculate keys and values once and reuse in subsequent calls. + k = attention_module.key(xa) + v = attention_module.value(xa) + kv_cache_new = (None, None) + + wv, qk = attention_module.qkv_attention(q, k, v, mask) + return attention_module.out(wv), kv_cache_new + + + def block_forward( + residual_block, + x: torch.Tensor, + xa: Optional[torch.Tensor] = None, + mask: Optional[torch.Tensor] = None, + kv_cache: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + ): + """ + Override for residual block forward method for providing kv_cache to attention module. + Parameters: + residual_block: current residual block. + x: input token_ids. + xa: input audio features (Optional). + mask: attention mask (Optional). + kv_cache: cache for storing attention key values. + Returns: + x: residual block output + kv_cache: updated kv_cache + + """ + x0, kv_cache = residual_block.attn(residual_block.attn_ln( + x), mask=mask, kv_cache=kv_cache) + x = x + x0 + if residual_block.cross_attn: + x1, _ = residual_block.cross_attn( + residual_block.cross_attn_ln(x), xa) + x = x + x1 + x = x + residual_block.mlp(residual_block.mlp_ln(x)) + return x, kv_cache + + + + # update forward functions + for idx, block in enumerate(model.decoder.blocks): + block.forward = partial(block_forward, block) + block.attn.forward = partial(attention_forward, block.attn) + if block.cross_attn: + block.cross_attn.forward = partial(attention_forward, block.cross_attn) + + + def decoder_forward(decoder, x: torch.Tensor, xa: torch.Tensor, kv_cache: Optional[Tuple[Tuple[torch.Tensor, torch.Tensor]]] = None): + """ + Override for decoder forward method. + Parameters: + x: torch.LongTensor, shape = (batch_size, <= n_ctx) the text tokens + xa: torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx) + the encoded audio features to be attended on + kv_cache: Dict[str, torch.Tensor], attention modules hidden states cache from previous steps + """ + if kv_cache is not None: + offset = kv_cache[0][0].shape[1] + else: + offset = 0 + kv_cache = [None for _ in range(len(decoder.blocks))] + x = decoder.token_embedding( + x) + decoder.positional_embedding[offset: offset + x.shape[-1]] + x = x.to(xa.dtype) + kv_cache_upd = [] + + for block, kv_block_cache in zip(decoder.blocks, kv_cache): + x, kv_block_cache_upd = block(x, xa, mask=decoder.mask, kv_cache=kv_block_cache) + kv_cache_upd.append(tuple(kv_block_cache_upd)) + + x = decoder.ln(x) + logits = ( + x @ torch.transpose(decoder.token_embedding.weight.to(x.dtype), 1, 0)).float() + + return logits, tuple(kv_cache_upd) + + + + # override decoder forward + model.decoder.forward = partial(decoder_forward, model.decoder) + +.. code:: ipython3 + + tokens = torch.ones((5, 3), dtype=torch.int64) + logits, kv_cache = model.decoder(tokens, audio_features, kv_cache=None) + + tokens = torch.ones((5, 1), dtype=torch.int64) + decoder_model = ov.convert_model(model.decoder, example_input=(tokens, audio_features, kv_cache)) + + ov.save_model(decoder_model, WHISPER_DECODER_OV) + + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/torch/jit/_trace.py:154: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:486.) + if a.grad is not None: + + +The decoder model autoregressively predicts the next token guided by +encoder hidden states and previously predicted sequence. This means that +the shape of inputs which depends on the previous step (inputs for +tokens and attention hidden states from previous step) are dynamic. For +efficient utilization of memory, you define an upper bound for dynamic +input shapes. + +Prepare inference pipeline +-------------------------------------------------------------------- + +The image below illustrates the pipeline of video transcribing using the +Whisper model. + +.. figure:: https://user-images.githubusercontent.com/29454499/204536733-1f4342f7-2328-476a-a431-cb596df69854.png + :alt: whisper_pipeline.png + + whisper_pipeline.png + +To run the PyTorch Whisper model, we just need to call the +``model.transcribe(audio, **parameters)`` function. We will try to reuse +original model pipeline for audio transcribing after replacing the +original models with OpenVINO IR versions. + +### Select inference device + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + core = ov.Core() + +.. code:: ipython3 + + import ipywidgets as widgets + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + from utils import patch_whisper_for_ov_inference, OpenVINOAudioEncoder, OpenVINOTextDecoder + + patch_whisper_for_ov_inference(model) + + model.encoder = OpenVINOAudioEncoder(core, WHISPER_ENCODER_OV, device=device.value) + model.decoder = OpenVINOTextDecoder(core, WHISPER_DECODER_OV, device=device.value) + +Run video transcription pipeline +-------------------------------------------------------------------------- + +Now, we are ready to start transcription. We select a video from YouTube +that we want to transcribe. Be patient, as downloading the video may +take some time. + +.. code:: ipython3 + + import ipywidgets as widgets + VIDEO_LINK = "https://youtu.be/kgL5LBM-hFI" + link = widgets.Text( + value=VIDEO_LINK, + placeholder="Type link for video", + description="Video:", + disabled=False + ) + + link + + + + +.. parsed-literal:: + + Text(value='https://youtu.be/kgL5LBM-hFI', description='Video:', placeholder='Type link for video') + + + +.. code:: ipython3 + + from pytube import YouTube + + print(f"Downloading video {link.value} started") + + output_file = Path("downloaded_video.mp4") + yt = YouTube(link.value) + yt.streams.get_highest_resolution().download(filename=output_file) + print(f"Video saved to {output_file}") + + +.. parsed-literal:: + + Downloading video https://youtu.be/kgL5LBM-hFI started + Video saved to downloaded_video.mp4 + + +.. code:: ipython3 + + from utils import get_audio + + audio = get_audio(output_file) + +Select the task for the model: + +- **transcribe** - generate audio transcription in the source language + (automatically detected). +- **translate** - generate audio transcription with translation to + English language. + +.. code:: ipython3 + + task = widgets.Select( + options=["transcribe", "translate"], + value="translate", + description="Select task:", + disabled=False + ) + task + + + + +.. parsed-literal:: + + Select(description='Select task:', index=1, options=('transcribe', 'translate'), value='translate') + + + +.. code:: ipython3 + + transcription = model.transcribe(audio, task=task.value) + +"The results will be saved in the ``downloaded_video.srt`` file. SRT is +one of the most popular formats for storing subtitles and is compatible +with many modern video players. This file can be used to embed +transcription into videos during playback or by injecting them directly +into video files using ``ffmpeg``. + +.. code:: ipython3 + + from utils import prepare_srt + + srt_lines = prepare_srt(transcription) + # save transcription + with output_file.with_suffix(".srt").open("w") as f: + f.writelines(srt_lines) + +Now let us see the results. + +.. code:: ipython3 + + widgets.Video.from_file(output_file, loop=False, width=800, height=800) + + + + +.. parsed-literal:: + + Video(value=b"\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00:'moov\x00\x00\x00lmvhd...", height='800… + + + +.. code:: ipython3 + + print("".join(srt_lines)) + + +.. parsed-literal:: + + 1 + 00:00:00,000 --> 00:00:05,000 + What's that? + + 2 + 00:00:05,000 --> 00:00:07,000 + Oh wow. + + 3 + 00:00:07,000 --> 00:00:09,000 + Excuse me. + + 4 + 00:00:09,000 --> 00:00:11,000 + Hello humans. + + 5 + 00:00:13,000 --> 00:00:15,000 + Focus on me. + + 6 + 00:00:15,000 --> 00:00:17,000 + Focus on the guard. + + 7 + 00:00:17,000 --> 00:00:20,000 + Don't tell anyone what you've seen in here. + + 8 + 00:00:22,000 --> 00:00:24,000 + Have you seen what's in there? + + 9 + 00:00:24,000 --> 00:00:25,000 + They have. + + 10 + 00:00:25,000 --> 00:00:27,000 + Intel. This is where it all changes. + + + + +Interactive demo +---------------------------------------------------------- + +.. code:: ipython3 + + import gradio as gr + + + def transcribe(url, task): + output_file = Path("downloaded_video.mp4") + yt = YouTube(url) + yt.streams.get_highest_resolution().download(filename=output_file) + audio = get_audio(output_file) + transcription = model.transcribe(audio, task=task.lower()) + srt_lines = prepare_srt(transcription) + with output_file.with_suffix(".srt").open("w") as f: + f.writelines(srt_lines) + return [str(output_file), str(output_file.with_suffix(".srt"))] + + + demo = gr.Interface( + transcribe, + [gr.Textbox(label="YouTube URL"), gr.Radio(["Transcribe", "Translate"], value="Transcribe")], + "video", + examples=[["https://youtu.be/kgL5LBM-hFI", "Transcribe"]], + allow_flagging="never" + ) + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/227-whisper-nncf-quantize-with-output.rst b/docs/notebooks/227-whisper-nncf-quantize-with-output.rst new file mode 100644 index 00000000000000..b98bf0d3c4b9c7 --- /dev/null +++ b/docs/notebooks/227-whisper-nncf-quantize-with-output.rst @@ -0,0 +1,611 @@ +Post-Training Quantization of OpenAI Whisper model with NNCF +============================================================ + +The goal of this tutorial is to demonstrate how to speed up the model by +applying 8-bit post-training quantization from +`NNCF `__ (Neural Network +Compression Framework) and infer quantized model via OpenVINO™ Toolkit. +The optimization process contains the following steps: + +1. Quantize the converted OpenVINO model from `227-whisper-convert + notebook <227-whisper-convert.ipynb>`__ with NNCF. +2. Check model result for the demo video. +3. Compare model size, performance and accuracy of FP32 and quantized + INT8 models. + +.. + + **NOTE**: you should run + `227-whisper-convert <227-whisper-convert.ipynb>`__ notebook first to + generate OpenVINO IR model that is used for quantization. + +**Table of contents:** + +- `Prerequisites <#prerequisites>`__ +- `Create and initialize quantization <#create-and-initialize-quantization>`__ +- `Prepare calibration datasets <#prepare-calibration-datasets>`__ +- `Quantize Whisper encoder and decoder models <#quantize-whisper-encoder-and-decoder-models>`__ +- `Transcribe video with quantized OpenVINO model <#transcribe-video-with-quantized-openvino-model>`__ +- `Compare performance and accuracy of the FP32 and INT8 IRs <#compare-performance-and-accuracy-of-the-fp-and-int-irs>`__ + +Prerequisites +------------------------------------------------------- + +Install dependencies. + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" + %pip install -q "nncf>=2.6.0" + %pip install -q datasets librosa soundfile + %pip install -q evaluate jiwer + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import ipywidgets as widgets + + from openvino import Core + core = Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') + + + +Select the task for the model: + +- **transcribe** - generate audio transcription in the source language + (automatically detected). +- **translate** - generate audio transcription with translation to + English language. + +.. code:: ipython3 + + task = widgets.Select( + options=["transcribe", "translate"], + value="translate", + description="Select task:", + disabled=False + ) + task + + + + +.. parsed-literal:: + + Select(description='Select task:', index=1, options=('transcribe', 'translate'), value='translate') + + + +Create and initialize quantization +------------------------------------------------ + +`NNCF `__ enables +post-training quantization by adding the quantization layers into the +model graph and then using a subset of the training dataset to +initialize the parameters of these additional quantization layers. The +framework is designed so that modifications to your original training +code are minor. Quantization is the simplest scenario and requires a few +modifications. + +The optimization process contains the following steps: + +1. Create a calibration dataset for quantization. +2. Run ``nncf.quantize`` to obtain quantized models. +3. Serialize the ``INT8`` model using ``openvino.runtime.serialize`` + function. + +Set paths to the model converted in +`227-whisper-convert <227-whisper-convert.ipynb>`__ notebook and the +paths where quantized models will be saved. + +.. code:: ipython3 + + from pathlib import Path + + WHISPER_ENCODER_OV = Path("whisper_encoder.xml") + WHISPER_DECODER_OV = Path("whisper_decoder.xml") + + WHISPER_ENCODER_OV_INT8 = Path("whisper_encoder_int8.xml") + WHISPER_DECODER_OV_INT8 = Path("whisper_decoder_int8.xml") + +Load FP32 model IR. + +.. code:: ipython3 + + import whisper + from utils import patch_whisper_for_ov_inference, OpenVINOAudioEncoder, OpenVINOTextDecoder + + model_id = "base" + model_fp32 = whisper.load_model(model_id).to("cpu").eval() + patch_whisper_for_ov_inference(model_fp32) + + model_fp32.encoder = OpenVINOAudioEncoder(core, WHISPER_ENCODER_OV, device=device.value) + model_fp32.decoder = OpenVINOTextDecoder(core, WHISPER_DECODER_OV, device=device.value) + +Prepare calibration datasets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Whisper consists of an encoder and a decoder models. We need to collect +calibration data for both of them. + +Below we overwrite encoder/decoder forward methods in order to collect +calibration samples. + +.. code:: ipython3 + + from contextlib import contextmanager + from functools import partial + import openvino as ov + from typing import Optional + import torch + + COLLECT_CALIBRATION_DATA = False + encoder_calibration_data = [] + decoder_calibration_data = [] + + @contextmanager + def calibration_data_collection(): + global COLLECT_CALIBRATION_DATA + try: + COLLECT_CALIBRATION_DATA = True + yield + finally: + COLLECT_CALIBRATION_DATA = False + + + def encoder_forward(self, mel: torch.Tensor): + if COLLECT_CALIBRATION_DATA: + encoder_calibration_data.append(mel) + return torch.from_numpy(self.compiled_model(mel)[self.output_blob]) + + def decoder_forward(self, x: torch.Tensor, xa: torch.Tensor, kv_cache: Optional[dict] = None): + feed_dict = {'x': ov.Tensor(x.numpy()), 'xa': ov.Tensor(xa.numpy())} + feed_dict = (self.preprocess_kv_cache_inputs(feed_dict, kv_cache)) + if COLLECT_CALIBRATION_DATA: + decoder_calibration_data.append(feed_dict) + res = self.compiled_model(feed_dict) + return self.postprocess_outputs(res) + + model_fp32.encoder.forward = partial(encoder_forward, model_fp32.encoder) + model_fp32.decoder.forward = partial(decoder_forward, model_fp32.decoder) + +We use a portion of validation +`librispeech_asr `__ +dataset from Hugging Face as calibration data. + +.. code:: ipython3 + + from datasets import load_dataset + from tqdm.notebook import tqdm + + CALIBRATION_DATASET_SIZE = 30 + + calibration_dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True).take(CALIBRATION_DATASET_SIZE) + + with calibration_data_collection(): + for data_item in tqdm(calibration_dataset, desc="Collecting calibration data", total=CALIBRATION_DATASET_SIZE): + model_fp32.transcribe(data_item["audio"]["array"].astype("float32"), task=task.value) + + + +.. parsed-literal:: + + Collecting calibration data: 0%| | 0/30 [00:00`__ notebook. + +.. code:: ipython3 + + VIDEO_LINK = "https://youtu.be/kgL5LBM-hFI" + link = widgets.Text( + value=VIDEO_LINK, + placeholder="Type link for video", + description="Video:", + disabled=False + ) + link + + + + +.. parsed-literal:: + + Text(value='https://youtu.be/kgL5LBM-hFI', description='Video:', placeholder='Type link for video') + + + +.. code:: ipython3 + + from pytube import YouTube + + print(f"Downloading video {link.value} started") + + output_file = Path("downloaded_video.mp4") + yt = YouTube(link.value) + yt.streams.get_highest_resolution().download(filename=output_file) + print(f"Video saved to {output_file}") + + +.. parsed-literal:: + + Downloading video https://youtu.be/kgL5LBM-hFI started + Video saved to downloaded_video.mp4 + + +.. code:: ipython3 + + from utils import get_audio + + audio = get_audio(output_file) + +Run transcription by the quantized model. + +.. code:: ipython3 + + transcription = model_int8.transcribe(audio, task=task.value) + +.. code:: ipython3 + + from utils import prepare_srt + + srt_lines = prepare_srt(transcription) + # save transcription + with output_file.with_suffix(".srt").open("w") as f: + f.writelines(srt_lines) + +Now let us see the results. + +.. code:: ipython3 + + widgets.Video.from_file(output_file, loop=False, width=800, height=800) + + + + +.. parsed-literal:: + + Video(value=b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00Aimoov\x00\x00\x00lmvhd...', height='800… + + + +.. code:: ipython3 + + print("".join(srt_lines)) + + +.. parsed-literal:: + + 1 + 00:00:00,000 --> 00:00:07,000 + What's that? Oh, wow. + + 2 + 00:00:09,000 --> 00:00:11,000 + Hello humans. + + 3 + 00:00:14,000 --> 00:00:15,000 + Focus on me. + + 4 + 00:00:15,000 --> 00:00:16,000 + Focus on the guard. + + 5 + 00:00:18,000 --> 00:00:20,000 + Don't tell anyone what you've seen in here. + + 6 + 00:00:22,000 --> 00:00:24,000 + Have you seen what's in there? + + 7 + 00:00:24,000 --> 00:00:25,000 + They have intel. + + 8 + 00:00:25,000 --> 00:00:27,000 + This is where it all changes. + + + + +As you can see the result is almost the same. + +Compare performance and accuracy of the FP32 and INT8 IRs +--------------------------------------------------------------------------------------------------- + +Compare model file size. + +.. code:: ipython3 + + def calculate_compression_rate(model_path_ov, model_path_ov_int8): + model_size_fp32 = model_path_ov.with_suffix(".bin").stat().st_size / 1024 + model_size_int8 = model_path_ov_int8.with_suffix(".bin").stat().st_size / 1024 + print(f"Model: {model_path_ov.stem}") + print(f" * FP32 IR model size: {model_size_fp32:.2f} KB") + print(f" * INT8 IR model size: {model_size_int8:.2f} KB") + print(f" * Model compression rate: {model_size_fp32 / model_size_int8:.3f}") + + calculate_compression_rate(WHISPER_ENCODER_OV, WHISPER_ENCODER_OV_INT8) + calculate_compression_rate(WHISPER_DECODER_OV, WHISPER_DECODER_OV_INT8) + + +.. parsed-literal:: + + Model: whisper_encoder + * FP32 IR model size: 40216.07 KB + * INT8 IR model size: 21092.37 KB + * Model compression rate: 1.907 + Model: whisper_decoder + * FP32 IR model size: 101961.09 KB + * INT8 IR model size: 78058.77 KB + * Model compression rate: 1.306 + + +To measure the inference performance of the ``FP32`` and ``INT8`` +encoder/decoder models, we use median inference time on calibration +dataset. So we can approximately estimate the speed-up of the dynamic +quantized models. + + **NOTE**: For the most accurate performance estimation, it is + recommended to run ``benchmark_app`` with static shapes in a + terminal/command prompt after closing other applications. + +.. code:: ipython3 + + import time + import numpy as np + + def calculate_call_inference_time(model, dataset): + inference_time = [] + for data_item in tqdm(dataset[:100], desc="Measuring performance"): + start = time.perf_counter() + model(data_item) + end = time.perf_counter() + delta = end - start + inference_time.append(delta) + return np.median(inference_time) + + + encoder_time_fp32 = calculate_call_inference_time(model_fp32.encoder.compiled_model, encoder_calibration_data) + encoder_time_int8 = calculate_call_inference_time(model_int8.encoder.compiled_model, encoder_calibration_data) + print(f"Encoder performance speedup: {encoder_time_fp32 / encoder_time_int8:.3f}") + + decoder_time_fp32 = calculate_call_inference_time(model_fp32.decoder.compiled_model, decoder_calibration_data) + decoder_time_int8 = calculate_call_inference_time(model_int8.decoder.compiled_model, decoder_calibration_data) + print(f"Decoder performance speedup: {decoder_time_fp32 / decoder_time_int8:.3f}") + + + +.. parsed-literal:: + + Measuring performance: 0%| | 0/60 [00:00`__ +test dataset. We rely on the Word Error Rate (WER) metric and compute +accuracy as ``(1 - WER)``. + +.. code:: ipython3 + + from evaluate import load + from transformers import WhisperProcessor + + wer = load("wer") + + TEST_DATASET_SIZE = 100 + test_dataset = load_dataset("librispeech_asr", "clean", split="test", streaming=True).take(TEST_DATASET_SIZE) + + def calculate_transcription_time_and_accuracy(model, dataset): + processor = WhisperProcessor.from_pretrained("openai/whisper-large") + + ground_truths = [] + predictions = [] + inference_time = [] + for data_item in tqdm(dataset, desc="Measuring performance and accuracy", total=TEST_DATASET_SIZE): + audio = data_item["audio"]["array"].astype("float32") + + start_time = time.perf_counter() + transcription = model.transcribe(audio, task=task.value) + end_time = time.perf_counter() + delta_time = end_time - start_time + + reference = processor.tokenizer._normalize(data_item["text"]) + prediction = processor.tokenizer._normalize(transcription["text"]) + ground_truths.append(reference) + predictions.append(prediction) + inference_time.append(delta_time) + + word_accuracy = (1 - wer.compute(references=ground_truths, predictions=predictions)) * 100 + mean_inference_time = np.mean(inference_time) + return mean_inference_time, word_accuracy + + transcription_time_fp32, accuracy_fp32 = calculate_transcription_time_and_accuracy(model_fp32, test_dataset) + transcription_time_int8, accuracy_int8 = calculate_transcription_time_and_accuracy(model_int8, test_dataset) + print(f"Whisper transcription performance speedup: {transcription_time_fp32 / transcription_time_int8:.3f}") + print(f"Whisper transcription word accuracy. FP32: {accuracy_fp32:.2f}%. INT8: {accuracy_int8:.2f}%. Accuracy drop :{accuracy_fp32 - accuracy_int8:.2f}%.") + + + +.. parsed-literal:: + + Measuring performance and accuracy: 0%| | 0/100 [00:00`__ is an automatic speech -recognition (ASR) system trained on 680,000 hours of multilingual and -multitask supervised data collected from the web. It is a multi-task -model that can perform multilingual speech recognition as well as speech -translation and language identification. - -.. figure:: https://user-images.githubusercontent.com/29454499/204536347-28976978-9a07-416c-acff-fc1214bbfbe0.svg - :alt: asr-training-data-desktop.svg - - asr-training-data-desktop.svg - -You can find more information about this model in the `research -paper `__, `OpenAI -blog `__, `model -card `__ and -GitHub `repository `__. - -In this notebook, we will use Whisper with OpenVINO to generate -subtitles in a sample video. Notebook contains the following steps: - -1. Download the model. -2. Instantiate the PyTorch model pipeline. -3. Convert model to OpenVINO IR, using model conversion API. -4. Run the Whisper pipeline with OpenVINO models. - -**Table of contents:** - -- `Prerequisites <#Prerequisites>`__ -- `Instantiate model <#Instantiate-model>`__ - - - `Convert model to OpenVINO Intermediate Representation (IR) format. <#convert-model-to-openvino-intermediate-representation-ir-format>`__ - - `Convert Whisper Encoder to OpenVINO IR <#convert-whisper-encoder-to-openvino-ir>`__ - - `Convert Whisper decoder to OpenVINO IR <#convert-whisper-decoder-to-openvino-ir>`__ - -- `Prepare inference pipeline <#prepare-inference-pipeline>`__ - - - `Select inference device <#select-inference-device>`__ - -- `Run video transcription pipeline <#run-video-transcription-pipeline>`__ -.. - `Interactive demo <#interactive-demo>`__ - -Prerequisites -############################################################################################################################### - -Install dependencies. - -.. code:: ipython3 - - %pip install -q "openvino==2023.1.0.dev20230811" - %pip install -q "python-ffmpeg<=1.0.16" moviepy transformers onnx - %pip install -q -I "git+https://github.com/garywu007/pytube.git" - %pip install -q -U gradio - %pip install -q -I "git+https://github.com/openai/whisper.git@e8622f9afc4eba139bf796c210f5c01081000472" - - -.. parsed-literal:: - - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - Note: you may need to restart the kernel to use updated packages. - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - ppgan 2.1.0 requires imageio==2.9.0, but you have imageio 2.31.3 which is incompatible. - ppgan 2.1.0 requires librosa==0.8.1, but you have librosa 0.9.2 which is incompatible. - ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.0.76 which is incompatible. - Note: you may need to restart the kernel to use updated packages. - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - Note: you may need to restart the kernel to use updated packages. - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - Note: you may need to restart the kernel to use updated packages. - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - black 21.7b0 requires tomli<2.0.0,>=0.2.6, but you have tomli 2.0.1 which is incompatible. - google-auth 2.22.0 requires urllib3<2.0, but you have urllib3 2.0.4 which is incompatible. - nncf 2.5.0.dev0+90a1e860 requires networkx<=2.8.2,>=2.6, but you have networkx 3.1 which is incompatible. - onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 4.24.3 which is incompatible. - paddleclas 2.5.1 requires faiss-cpu==1.7.1.post2, but you have faiss-cpu 1.7.4 which is incompatible. - paddleclas 2.5.1 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. - ppgan 2.1.0 requires imageio==2.9.0, but you have imageio 2.31.3 which is incompatible. - ppgan 2.1.0 requires librosa==0.8.1, but you have librosa 0.9.2 which is incompatible. - ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.8.0.76 which is incompatible. - pyannote-audio 2.0.1 requires networkx<3.0,>=2.6, but you have networkx 3.1 which is incompatible. - pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 4.24.3 which is incompatible. - tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. - tf2onnx 1.15.1 requires protobuf~=3.20.2, but you have protobuf 4.24.3 which is incompatible. - torchaudio 0.13.1+cpu requires torch==1.13.1, but you have torch 2.0.1 which is incompatible. - torchvision 0.14.1+cpu requires torch==1.13.1, but you have torch 2.0.1 which is incompatible. - Note: you may need to restart the kernel to use updated packages. - - -Instantiate model -############################################################################################################################### - -Whisper is a Transformer based encoder-decoder model, also referred to -as a sequence-to-sequence model. It maps a sequence of audio spectrogram -features to a sequence of text tokens. First, the raw audio inputs are -converted to a log-Mel spectrogram by action of the feature extractor. -Then, the Transformer encoder encodes the spectrogram to form a sequence -of encoder hidden states. Finally, the decoder autoregressively predicts -text tokens, conditional on both the previous tokens and the encoder -hidden states. - -You can see the model architecture in the diagram below: - -.. figure:: https://user-images.githubusercontent.com/29454499/204536571-8f6d8d77-5fbd-4c6d-8e29-14e734837860.svg - :alt: whisper_architecture.svg - - whisper_architecture.svg - -There are several models of different sizes and capabilities trained by -the authors of the model. In this tutorial, we will use the ``base`` -model, but the same actions are also applicable to other models from -Whisper family. - -.. code:: ipython3 - - import whisper - - model = whisper.load_model("base") - model.to("cpu") - model.eval() - pass - -Convert model to OpenVINO Intermediate Representation (IR) format. -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -For best results with OpenVINO, it is recommended to convert the model -to OpenVINO IR format. We need to provide initialized model object and -example of inputs for shape inference. We will use ``ov.convert_model`` -functionality to convert models. The ``ov.convert_model`` Python -function returns an OpenVINO model ready to load on device and start -making predictions. We can save it on disk for next usage with -``ov.save_model``. - -Convert Whisper Encoder to OpenVINO IR -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -.. code:: ipython3 - - from pathlib import Path - - WHISPER_ENCODER_OV = Path("whisper_encoder.xml") - WHISPER_DECODER_OV = Path("whisper_decoder.xml") - -.. code:: ipython3 - - import torch - import openvino as ov - - mel = torch.zeros((1, 80, 3000)) - audio_features = model.encoder(mel) - encoder_model = ov.convert_model(model.encoder, example_input=mel) - ov.save_model(encoder_model, WHISPER_ENCODER_OV) - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/whisper/model.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape" - - -Convert Whisper decoder to OpenVINO IR -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -To reduce computational complexity, the decoder uses cached key/value -projections in attention modules from the previous steps. We need to -modify this process for correct tracing. - -There are 2 types of attention modules in Whisper Decoder - -self-attention, that makes projection for internal decoder state and -cross-attention, that uses internal state of encoder for calculating -attention. Decoder model runs autoregressively, it means that each new -step uses prediction from previous step as input and in the same time it -conditioned by encoder hidden state calculated before decoding start. To -sum up, it is enough calculate cross-attention once on first step and -reuse it for next steps for reducing computational complexity. -Self-attention hidden state for sequence that generated on previous -steps remains without changes, so it is possible to calculate it only -for current token and then join it to previously generated. - -.. code:: ipython3 - - import torch - from typing import Optional, Tuple - from functools import partial - - - def attention_forward( - attention_module, - x: torch.Tensor, - xa: Optional[torch.Tensor] = None, - mask: Optional[torch.Tensor] = None, - kv_cache: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - ): - """ - Override for forward method of decoder attention module with storing cache values explicitly. - Parameters: - attention_module: current attention module - x: input token ids. - xa: input audio features (Optional). - mask: mask for applying attention (Optional). - kv_cache: dictionary with cached key values for attention modules. - idx: idx for search in kv_cache. - Returns: - attention module output tensor - updated kv_cache - """ - q = attention_module.query(x) - - if xa is None: - # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors; - # otherwise, perform key/value projections for self- or cross-attention as usual. - k = attention_module.key(x) - v = attention_module.value(x) - if kv_cache is not None: - k = torch.cat((kv_cache[0], k), dim=1) - v = torch.cat((kv_cache[1], v), dim=1) - - else: - if kv_cache is None or kv_cache[0].shape[1] == 0: - # for cross-attention, calculate keys and values once and reuse in subsequent calls. - k = attention_module.key(xa) - v = attention_module.value(xa) - else: - k, v = kv_cache - - kv_cache_new = (k, v) - - wv, qk = attention_module.qkv_attention(q, k, v, mask) - return attention_module.out(wv), kv_cache_new - - - def block_forward( - residual_block, - x: torch.Tensor, - xa: Optional[torch.Tensor] = None, - mask: Optional[torch.Tensor] = None, - kv_cache: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - ): - """ - Override for residual block forward method for providing kv_cache to attention module. - Parameters: - residual_block: current residual block. - x: input token_ids. - xa: input audio features (Optional). - mask: attention mask (Optional). - kv_cache: cache for storing attention key values. - Returns: - x: residual block output - kv_cache: updated kv_cache - - """ - x0, kv_cache_self = residual_block.attn(residual_block.attn_ln( - x), mask=mask, kv_cache=kv_cache[0]) - x = x + x0 - if residual_block.cross_attn: - x1, kv_cache_cross = residual_block.cross_attn( - residual_block.cross_attn_ln(x), xa, kv_cache=kv_cache[1]) - x = x + x1 - x = x + residual_block.mlp(residual_block.mlp_ln(x)) - return x, (kv_cache_self, kv_cache_cross) - - class CrossAttnKVGetter(torch.nn.Module): - """ - Helper class for scripting approach of caching cross attention key values. - The main idea that they should be calculated once and reused for next steps. - Tracing can not correctly catch condition for that, that is why we need to use scripting for this part of model. - """ - def __init__(self, attn): - super().__init__() - self.attn_key = attn.key - self.attn_value = attn.value - - def forward(self, xa: torch.Tensor, kv_cache: Tuple[torch.Tensor, torch.Tensor]): - if kv_cache is None or kv_cache[0].shape[1] == 0: - # for cross-attention, calculate keys and values once and reuse in subsequent calls. - k = self.attn_key(xa) - v = self.attn_value(xa) - else: - k, v = kv_cache - return k, v - - def crossattention_forward( - attention_module, - x: torch.Tensor, - xa: Optional[torch.Tensor] = None, - mask: Optional[torch.Tensor] = None, - kv_cache: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - ): - """ - Override for forward method of decoder cross attention module with storing cache values explicitly. - Parameters: - attention_module: current attention module - x: input token ids. - xa: input audio features (Optional). - mask: mask for applying attention (Optional). - kv_cache: dictionary with cached key values for attention modules. - idx: idx for search in kv_cache. - Returns: - attention module output tensor - updated kv_cache - """ - q = attention_module.query(x) - - if xa is None: - # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors; - # otherwise, perform key/value projections for self- or cross-attention as usual. - k = attention_module.key(x) - v = attention_module.value(x) - else: - k, v = attention_module.kv_getter(xa, kv_cache) - kv_cache_new = (k, v) - - wv, qk = attention_module.qkv_attention(q, k, v, mask) - return attention_module.out(wv), kv_cache_new - - - # update forward functions - for _, block in enumerate(model.decoder.blocks): - block.forward = partial(block_forward, block) - block.attn.forward = partial(attention_forward, block.attn) - if block.cross_attn: - kv_getter = CrossAttnKVGetter(block.cross_attn) - block.cross_attn.kv_getter = torch.jit.script(kv_getter) - block.cross_attn.forward = partial(crossattention_forward, block.cross_attn) - - - def decoder_forward(decoder, x: torch.Tensor, xa: torch.Tensor, kv_cache: Optional[Tuple[Tuple[torch.Tensor, torch.Tensor]]] = None): - """ - Override for decoder forward method. - Parameters: - x: torch.LongTensor, shape = (batch_size, <= n_ctx) the text tokens - xa: torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx) - the encoded audio features to be attended on - kv_cache: Dict[str, torch.Tensor], attention modules hidden states cache from previous steps - """ - if kv_cache is not None: - offset = kv_cache[0][0][0].shape[1] - else: - offset = 0 - kv_cache = [(None, None) for _ in range(len(decoder.blocks))] - x = decoder.token_embedding( - x) + decoder.positional_embedding[offset: offset + x.shape[-1]] - x = x.to(xa.dtype) - kv_cache_upd = [] - - for block, kv_block_cache in zip(decoder.blocks, kv_cache): - x, kv_block_cache_upd = block(x, xa, mask=decoder.mask, kv_cache=kv_block_cache) - kv_cache_upd.append(tuple(kv_block_cache_upd)) - - x = decoder.ln(x) - logits = ( - x @ torch.transpose(decoder.token_embedding.weight.to(x.dtype), 1, 0)).float() - - return logits, tuple(kv_cache_upd) - - - # override decoder forward - model.decoder.forward = partial(decoder_forward, model.decoder) - -.. code:: ipython3 - - encoder_hidden_size = audio_features.shape[2] - kv_cache_init = [((torch.zeros((5, 0, encoder_hidden_size)), torch.zeros((5, 0, encoder_hidden_size))), (torch.zeros((1, 0, encoder_hidden_size)), torch.zeros((1, 0, encoder_hidden_size)))) for _ in range(len(model.decoder.blocks))] - -.. code:: ipython3 - - tokens = torch.ones((5, 3), dtype=torch.int64) - logits, kv_cache = model.decoder(tokens, audio_features, kv_cache=kv_cache_init) - - tokens = torch.ones((5, 1), dtype=torch.int64) - decoder_model = ov.convert_model(model.decoder, example_input=(tokens, audio_features, kv_cache)) - decoder_cache_input = decoder_model.inputs[2:] - for i in range(2, len(decoder_cache_input), 4): - decoder_cache_input[i].get_node().set_partial_shape(ov.PartialShape([-1, -1, encoder_hidden_size])) - decoder_cache_input[i + 1].get_node().set_partial_shape(ov.PartialShape([-1, -1, encoder_hidden_size])) - - decoder_model.validate_nodes_and_infer_types() - ov.save_model(decoder_model, WHISPER_DECODER_OV) - del decoder_model - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-499/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:154: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:486.) - if a.grad is not None: - - -The decoder model autoregressively predicts the next token guided by -encoder hidden states and previously predicted sequence. This means that -the shape of inputs which depends on the previous step (inputs for -tokens and attention hidden states from previous step) are dynamic. For -efficient utilization of memory, you define an upper bound for dynamic -input shapes. - -Prepare inference pipeline -############################################################################################################################### - -The image below illustrates the pipeline of video transcribing using the -Whisper model. - -.. figure:: https://user-images.githubusercontent.com/29454499/204536733-1f4342f7-2328-476a-a431-cb596df69854.png - :alt: whisper_pipeline.png - - whisper_pipeline.png - -To run the PyTorch Whisper model, we just need to call the -``model.transcribe(audio, **parameters)`` function. We will try to reuse -original model pipeline for audio transcribing after replacing the -original models with OpenVINO IR versions. - -Select inference device -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Select device from dropdown list for running inference using OpenVINO: - -.. code:: ipython3 - - core = ov.Core() - -.. code:: ipython3 - - import ipywidgets as widgets - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value='AUTO', - description='Device:', - disabled=False, - ) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - from utils import patch_whisper_for_ov_inference, OpenVINOAudioEncoder, OpenVINOTextDecoder - - patch_whisper_for_ov_inference(model) - - model.encoder = OpenVINOAudioEncoder(core, WHISPER_ENCODER_OV, device=device.value) - model.decoder = OpenVINOTextDecoder(core, WHISPER_DECODER_OV, device=device.value) - -Run video transcription pipeline -############################################################################################################################### - -Now, we are ready to start transcription. We select a video from YouTube -that we want to transcribe. Be patient, as downloading the video may -take some time. - -.. code:: ipython3 - - import ipywidgets as widgets - VIDEO_LINK = "https://youtu.be/kgL5LBM-hFI" - link = widgets.Text( - value=VIDEO_LINK, - placeholder="Type link for video", - description="Video:", - disabled=False - ) - - link - - - - -.. parsed-literal:: - - Text(value='https://youtu.be/kgL5LBM-hFI', description='Video:', placeholder='Type link for video') - - - -.. code:: ipython3 - - from pytube import YouTube - - print(f"Downloading video {link.value} started") - - output_file = Path("downloaded_video.mp4") - yt = YouTube(link.value) - yt.streams.get_highest_resolution().download(filename=output_file) - print(f"Video saved to {output_file}") - - -.. parsed-literal:: - - Downloading video https://youtu.be/kgL5LBM-hFI started - Video saved to downloaded_video.mp4 - - -.. code:: ipython3 - - from utils import get_audio - - audio = get_audio(output_file) - -Select the task for the model: - -- **transcribe** - generate audio transcription in the source language - (automatically detected). -- **translate** - generate audio transcription with translation to - English language. - -.. code:: ipython3 - - task = widgets.Select( - options=["transcribe", "translate"], - value="translate", - description="Select task:", - disabled=False - ) - task - - - - -.. parsed-literal:: - - Select(description='Select task:', index=1, options=('transcribe', 'translate'), value='translate') - - - -.. code:: ipython3 - - transcription = model.transcribe(audio, task=task.value) - -"The results will be saved in the ``downloaded_video.srt`` file. SRT is -one of the most popular formats for storing subtitles and is compatible -with many modern video players. This file can be used to embed -transcription into videos during playback or by injecting them directly -into video files using ``ffmpeg``. - -.. code:: ipython3 - - from utils import prepare_srt - - srt_lines = prepare_srt(transcription) - # save transcription - with output_file.with_suffix(".srt").open("w") as f: - f.writelines(srt_lines) - -Now let us see the results. - -.. code:: ipython3 - - widgets.Video.from_file(output_file, loop=False, width=800, height=800) - - - - -.. parsed-literal:: - - Video(value=b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00isommp42\x00\x00Aimoov\x00\x00\x00lmvhd...', height='800… - - - -.. code:: ipython3 - - print("".join(srt_lines)) - - -.. parsed-literal:: - - 1 - 00:00:00,000 --> 00:00:05,000 - Oh, what's that? - - 2 - 00:00:05,000 --> 00:00:09,000 - Oh, wow. - - 3 - 00:00:09,000 --> 00:00:10,000 - Hello, humans. - - 4 - 00:00:13,000 --> 00:00:15,000 - Focus on me. - - 5 - 00:00:15,000 --> 00:00:18,000 - Focus on the guard. - - 6 - 00:00:18,000 --> 00:00:22,000 - Don't tell anyone what you've seen in here. - - 7 - 00:00:22,000 --> 00:00:23,000 - Oh, my. - - 8 - 00:00:23,000 --> 00:00:24,000 - Have you seen what's in there? - - 9 - 00:00:24,000 --> 00:00:25,000 - They have intel. - - 10 - 00:00:25,000 --> 00:00:27,000 - This is where it all changes. - - - - -.. Interactive demo -.. ############################################################################################################################### - -.. .. code:: ipython3 - -.. import gradio as gr - - -.. def transcribe(url, task): -.. output_file = Path("downloaded_video.mp4") -.. yt = YouTube(url) -.. yt.streams.get_highest_resolution().download(filename=output_file) -.. audio = get_audio(output_file) -.. transcription = model.transcribe(audio, task=task.lower()) -.. srt_lines = prepare_srt(transcription) -.. with output_file.with_suffix(".srt").open("w") as f: -.. f.writelines(srt_lines) -.. return [str(output_file), str(output_file.with_suffix(".srt"))] - - -.. demo = gr.Interface( -.. transcribe, -.. [gr.Textbox(label="YouTube URL"), gr.Radio(["Transcribe", "Translate"], value="Transcribe")], -.. "video", -.. examples=[["https://youtu.be/kgL5LBM-hFI", "Transcribe"]], -.. allow_flagging="never" -.. ) -.. try: -.. demo.launch(debug=False) -.. except Exception: -.. demo.launch(share=True, debug=False) -.. # if you are launching remotely, specify server_name and server_port -.. # demo.launch(server_name='your server name', server_port='server port in int') -.. # Read more in the docs: https://gradio.app/docs/ - - -.. .. parsed-literal:: - -.. Running on local URL: http://127.0.0.1:7860 - -.. To create a public link, set `share=True` in `launch()`. - - - -.. .. raw:: html - -..
- diff --git a/docs/notebooks/228-clip-zero-shot-convert-with-output.rst b/docs/notebooks/228-clip-zero-shot-convert-with-output.rst index 63f70768c20f1a..7bce69e45d109a 100644 --- a/docs/notebooks/228-clip-zero-shot-convert-with-output.rst +++ b/docs/notebooks/228-clip-zero-shot-convert-with-output.rst @@ -1,8 +1,6 @@ Zero-shot Image Classification with OpenAI CLIP and OpenVINO™ ============================================================= - - Zero-shot image classification is a computer vision task to classify images into one of several classes without any prior training or knowledge of the classes. @@ -26,28 +24,25 @@ image classification. The notebook contains the following steps: 1. Download the model. 2. Instantiate the PyTorch model. -3. Export the ONNX model and convert it to OpenVINO IR, using model - conversion API. +3. Convert model to OpenVINO IR, using model conversion API. 4. Run CLIP with OpenVINO. -.. _top: - -**Table of contents**: - -- `Instantiate model <#instantiate-model>`__ -- `Run PyTorch model inference <#run-pytorch-model-inference>`__ - - - `Convert model to OpenVINO Intermediate Representation (IR) format. <#convert-model-to-openvino-intermediate-representation-ir-format>`__ +**Table of contents:** -- `Run OpenVINO model <#run-openvino-model>`__ - - `Select inference device <#select-inference-device>`__ +- `Instantiate model <#instantiate-model>`__ +- `Run PyTorch model + inference <#run-pytorch-model-inference>`__ +- `Convert model to OpenVINO Intermediate Representation (IR) + format. <#convert-model-to-openvino-intermediate-representation-ir-format>`__ +- `Run OpenVINO model <#run-openvino-model>`__ -- `Next Steps <#next-steps>`__ + - `Select inference device <#select-inference-device>`__ -Instantiate model `⇑ <#top>`__ -############################################################################################################################### +- `Next Steps <#next-steps>`__ +Instantiate model +----------------------------------------------------------- CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on various (image, text) pairs. It can be instructed in natural @@ -84,6 +79,10 @@ cached for the next usage. ``CLIPProcessor`` class is a wrapper for input data preprocessing. It includes both encoding the text using tokenizer and preparing the images. +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu gradio "openvino>=2023.1.0" "transformers[torch]>=4.30" + .. code:: ipython3 from transformers import CLIPProcessor, CLIPModel @@ -94,57 +93,18 @@ tokenizer and preparing the images. processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") - -.. parsed-literal:: - - Downloading (…)lve/main/config.json: 0%| | 0.00/4.10k [00:00 0 + 2023-10-26 14:25:33.940360: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-26 14:25:33.975867: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-26 14:25:34.675789: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - -.. parsed-literal:: - - Downloading (…)cial_tokens_map.json: 0%| | 0.00/389 [00:00`__ -############################################################################################################################### - +Run PyTorch model inference +--------------------------------------------------------------------- To perform classification, define labels and load an image in RGB format. To give the model wider text context and improve guidance, we @@ -158,10 +118,26 @@ similarity score for the final result. .. code:: ipython3 + from urllib.request import urlretrieve + from pathlib import Path + from PIL import Image + + urlretrieve( + "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/228-clip-zero-shot-image-classification/visualize.py", + filename='visualize.py' + ) from visualize import visualize_result - image = Image.open('../data/image/coco.jpg') + + sample_path = Path("data/coco.jpg") + sample_path.parent.mkdir(parents=True, exist_ok=True) + urlretrieve( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", + sample_path, + ) + image = Image.open(sample_path) + input_labels = ['cat', 'dog', 'wolf', 'tiger', 'man', 'horse', 'frog', 'tree', 'house', 'computer'] text_descriptions = [f"This is a photo of a {label}" for label in input_labels] @@ -174,87 +150,50 @@ similarity score for the final result. -.. image:: 228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_4_0.png - - -Convert model to OpenVINO Intermediate Representation (IR) format. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. image:: 228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_5_0.png -.. figure:: https://user-images.githubusercontent.com/29454499/208048580-8264e54c-151c-43ef-9e25-1302cd0dd7a2.png - :alt: conversion_path - conversion_path +Convert model to OpenVINO Intermediate Representation (IR) format. +------------------------------------------------------------------------------------------------------------ For best results with OpenVINO, it is recommended to convert the model -to OpenVINO IR format. OpenVINO supports PyTorch via ONNX conversion. -The ``torch.onnx.export`` function enables conversion of PyTorch models -to ONNX format. It requires to provide initialized model object, example -of inputs for tracing and path for saving result. The model contains -operations which supported for ONNX tracing starting with opset 14, it -is recommended to use it as ``opset_version`` parameter. Besides that, -we need to have opportunity to provide descriptions various of length -and images with different sizes, for preserving this capability after -ONNX conversion, ``dynamic_axes`` parameter can be used. More -information about PyTorch to ONNX exporting can be found in this -`tutorial `__ -and `PyTorch -documentation `__. We will -use ``mo.convert_model`` functionality to convert the ONNX model. The -``mo.convert_model`` Python function returns an OpenVINO model ready to -load on the device and start making predictions. We can save it on disk -for the next usage with ``openvino.runtime.serialize``. +to OpenVINO IR format. OpenVINO supports PyTorch via Model conversion +API. To convert the PyTorch model to OpenVINO IR format we will use +``ov.convert_model`` of `model conversion +API `__. +The ``ov.convert_model`` Python function returns an OpenVINO Model +object ready to load on the device and start making predictions. We can +save it on disk for the next usage with ``ov.save_model``. .. code:: ipython3 - import torch + import openvino as ov - torch.onnx.export( - model, # model being run - # model input in one of acceptable format: torch.Tensor (for single input), tuple or list of tensors for multiple inputs or dictionary with string keys and tensors as values. - dict(inputs), - "clip-vit-base-patch16.onnx", # where to save the model - opset_version=14, # the ONNX version to export the model to - input_names=["input_ids", "pixel_values", "attention_mask"], # the model's input names - output_names=["logits_per_image", "logits_per_text", "text_embeds", "image_embeds"], # the model's output names - dynamic_axes={ # variable length axes - "input_ids": {0: "batch", 1: "sequence"}, - "pixel_values": {0: "batch", 1: "num_channels", 2: "height", 3: "width"}, - "attention_mask": {0: "batch", 1: "sequence"}, - "logits_per_image": {0: "batch"}, - "logits_per_text": {0: "batch"}, - "text_embeds": {0: "batch"}, - "image_embeds": {0: "batch"} - } - ) + model.config.torchscript = True + ov_model = ov.convert_model(model, example_input=dict(inputs)) + ov.save_model(ov_model, 'clip-vit-base-patch16.xml') + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. .. parsed-literal:: - /home/adrian/repos/openvino_notebooks/recipes/intelligent_queue_management/venv/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py:284: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:287: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /home/adrian/repos/openvino_notebooks/recipes/intelligent_queue_management/venv/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:327: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): - /home/adrian/repos/openvino_notebooks/recipes/intelligent_queue_management/venv/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py:684: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mask = torch.full((tgt_len, tgt_len), torch.tensor(torch.finfo(dtype).min, device=device), device=device) - /home/adrian/repos/openvino_notebooks/recipes/intelligent_queue_management/venv/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py:292: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:295: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): - /home/adrian/repos/openvino_notebooks/recipes/intelligent_queue_management/venv/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py:301: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:304: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, tgt_len, src_len): - /home/adrian/repos/openvino_notebooks/recipes/intelligent_queue_management/venv/lib/python3.10/site-packages/torch/onnx/symbolic_opset9.py:5408: UserWarning: Exporting aten::index operator of advanced indexing in opset 14 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results. - warnings.warn( -.. code:: ipython3 - - from openvino.runtime import serialize - from openvino.tools import mo - - ov_model = mo.convert_model('clip-vit-base-patch16.onnx', compress_to_fp16=True) - serialize(ov_model, 'clip-vit-base-patch16.xml') - -Run OpenVINO model `⇑ <#top>`__ -############################################################################################################################### - +Run OpenVINO model +------------------------------------------------------------ The steps for making predictions with the OpenVINO CLIP model are similar to the PyTorch model. Let us check the model result using the @@ -263,16 +202,14 @@ same input data from the example above with PyTorch. .. code:: ipython3 from scipy.special import softmax - from openvino.runtime import Core # create OpenVINO core object instance - core = Core() + core = ov.Core() -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -292,7 +229,7 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') @@ -325,63 +262,74 @@ example, ``cat,dog,bird``) .. code:: ipython3 - import ipywidgets as widgets - style = {'description_width': 'initial'} + import gradio as gr - image_widget = widgets.FileUpload( - accept='', - multiple=False, - description='Upload image', - style=style - ) - labels_widget = widgets.Textarea( - value='cat,dog,bird', - placeholder='Type something', - description='Enter your classes separated by ,:', - disabled=False, - style=style + def classify(image, text): + """Classify image using classes listing. + Args: + image (np.ndarray): image that needs to be classified in CHW format. + text (str): comma-separated list of class labels + Returns: + (dict): Mapping between class labels and class probabilities. + """ + labels = text.split(",") + text_descriptions = [f"This is a photo of a {label}" for label in labels] + inputs = processor(text=text_descriptions, images=[image], return_tensors="np", padding=True) + ov_logits_per_image = compiled_model(dict(inputs))[logits_per_image_out] + probs = softmax(ov_logits_per_image, axis=1)[0] + + return {label: float(prob) for label, prob in zip(labels, probs)} + + + demo = gr.Interface( + classify, + [ + gr.Image(label="Image", type="pil"), + gr.Textbox(label="Labels", info="Comma-separated list of class labels"), + ], + gr.Label(label="Result"), + examples=[[sample_path, "cat,dog,bird"]], ) - widgets.VBox(children=[image_widget, labels_widget]) - - + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ .. parsed-literal:: - VBox(children=(FileUpload(value=(), description='Upload image'), Textarea(value='cat,dog,bird', description='E… + Running on local URL: http://127.0.0.1:7861 + Rerunning server... use `close()` to stop if you need to change `launch()` parameters. + ---- +.. parsed-literal:: -Run the next cell to get the result for your submitted data: + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -.. code:: ipython3 - import io - # read uploaded image - image = Image.open(io.BytesIO(image_widget.value[-1]['content'])) if image_widget.value else image - # obtain list of labels - labels = labels_widget.value.split(',') - # convert labels to text description - text_descriptions = [f"This is a photo of a {label}" for label in labels] +.. parsed-literal:: + + Running on public URL: https://4ec3df1c48219763b1.gradio.live - # preprocess input - inputs = processor(text=text_descriptions, images=[image], return_tensors="np", padding=True) - # run inference - ov_logits_per_image = compiled_model(dict(inputs))[logits_per_image_out] - # perform softmax on score - probs = softmax(ov_logits_per_image, axis=1) - # visualize prediction - visualize_result(image, labels, probs[0]) + This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces) -.. image:: 228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_17_0.png +.. .. raw:: html +..
-Next Steps `⇑ <#top>`__ -############################################################################################################################### +Next Steps +---------------------------------------------------- Open the `228-clip-zero-shot-quantize <228-clip-zero-shot-quantize.ipynb>`__ diff --git a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_12_0.png b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_12_0.png index e8137a5ea461a7..64d693a003d9f5 100644 --- a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_12_0.png +++ b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_12_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7b8ae280413c012c0cc0c2c4df95fe57b56a971584a536d32a46442b9d89c4b +oid sha256:3ae9a00e2ff86b206e6aa2588b4e8b49a8bca483b43f4b31696bd9e3f3f08238 size 464100 diff --git a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_17_0.png b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_17_0.png deleted file mode 100644 index 2f089a8dbecad7..00000000000000 --- a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_17_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c7a20c293a356fb88fdd3713c032b5093b6acf2d03ab4c98a70b11829d1c75ab -size 461829 diff --git a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_4_0.png b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_4_0.png deleted file mode 100644 index e8137a5ea461a7..00000000000000 --- a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_4_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c7b8ae280413c012c0cc0c2c4df95fe57b56a971584a536d32a46442b9d89c4b -size 464100 diff --git a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_5_0.png b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_5_0.png new file mode 100644 index 00000000000000..64d693a003d9f5 --- /dev/null +++ b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/228-clip-zero-shot-convert-with-output_5_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae9a00e2ff86b206e6aa2588b4e8b49a8bca483b43f4b31696bd9e3f3f08238 +size 464100 diff --git a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/index.html b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/index.html index 30057e0e2ebb9e..20b3576951af5f 100644 --- a/docs/notebooks/228-clip-zero-shot-convert-with-output_files/index.html +++ b/docs/notebooks/228-clip-zero-shot-convert-with-output_files/index.html @@ -1,9 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/228-clip-zero-shot-convert-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/228-clip-zero-shot-convert-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/228-clip-zero-shot-convert-with-output_files/


../
-228-clip-zero-shot-convert-with-output_12_0.png    16-Aug-2023 01:31              464100
-228-clip-zero-shot-convert-with-output_17_0.png    16-Aug-2023 01:31              461829
-228-clip-zero-shot-convert-with-output_4_0.png     16-Aug-2023 01:31              464100
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/228-clip-zero-shot-convert-with-output_files/


../
+228-clip-zero-shot-convert-with-output_12_0.png    31-Oct-2023 00:35              464100
+228-clip-zero-shot-convert-with-output_5_0.png     31-Oct-2023 00:35              464100
 

diff --git a/docs/notebooks/228-clip-zero-shot-quantize-with-output.rst b/docs/notebooks/228-clip-zero-shot-quantize-with-output.rst index 1e335a73b2fd53..dc8519624b51b2 100644 --- a/docs/notebooks/228-clip-zero-shot-quantize-with-output.rst +++ b/docs/notebooks/228-clip-zero-shot-quantize-with-output.rst @@ -1,8 +1,6 @@ Post-Training Quantization of OpenAI CLIP model with NNCF ========================================================= - - The goal of this tutorial is to demonstrate how to speed up the model by applying 8-bit post-training quantization from `NNCF `__ (Neural Network @@ -16,39 +14,39 @@ The optimization process contains the following steps: 3. Compare model size of converted and quantized models. 4. Compare performance of converted and quantized models. -.. note:: +.. - You should run + **NOTE**: you should run `228-clip-zero-shot-convert <228-clip-zero-shot-convert.ipynb>`__ notebook first to generate OpenVINO IR model that is used for quantization. -.. _top: - -**Table of contents**: +**Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Create and initialize quantization <#create-and-initialize-quantization>`__ - - `Prepare datasets <#prepare-datasets>`__ +- `Prerequisites <#prerequisites>`__ +- `Create and initialize + quantization <#create-and-initialize-quantization>`__ -- `Run quantized OpenVINO model <#run-quantized-openvino-model>`__ + - `Prepare datasets <#prepare-datasets>`__ - - `Compare File Size <#compare-file-size>`__ - - `Compare inference time of the FP16 IR and quantized models <#compare-inference-time-of-the-fp16-ir-and-quantized-models>`__ +- `Run quantized OpenVINO + model <#run-quantized-openvino-model>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### + - `Compare File Size <#compare-file-size>`__ + - `Compare inference time of the FP16 IR and quantized + models <#compare-inference-time-of-the-fp-ir-and-quantized-models>`__ +Prerequisites +------------------------------------------------------- .. code:: ipython3 - !pip install -q datasets - !pip install -q "git+https://github.com/openvinotoolkit/nncf.git@6c0aebadd2fcdbe1481a11b40b8cd9f66b3b6fab" - -Create and initialize quantization `⇑ <#top>`__ -############################################################################################################################### + %pip install -q datasets + %pip install -q "nncf>=2.6.0" +Create and initialize quantization +---------------------------------------------------------------------------- `NNCF `__ enables post-training quantization by adding the quantization layers into the @@ -65,9 +63,8 @@ The optimization process contains the following steps: 3. Serialize the ``INT8`` model using ``openvino.runtime.serialize`` function. -Prepare datasets `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Prepare datasets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `Conceptual Captions `__ dataset @@ -90,10 +87,22 @@ model. max_length = model.config.text_config.max_position_embeddings processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/torch/cuda/__init__.py:138: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 11080). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.) + return torch._C._cuda_getDeviceCount() > 0 + 2023-10-26 16:44:33.809201: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-26 16:44:33.845253: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-26 16:44:34.564478: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + .. code:: ipython3 import requests from io import BytesIO + import numpy as np from PIL import Image from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) @@ -133,6 +142,9 @@ model. url = example[image_column] try: image = get_pil_from_url(url) + h, w = image.size + if h == 1 or w == 1: + return None except Exception: return None @@ -145,6 +157,7 @@ model. import torch from datasets import load_dataset + from tqdm.notebook import tqdm def prepare_calibration_data(dataloader, init_steps): """ @@ -154,7 +167,7 @@ model. data = [] print(f"Fetching {init_steps} for the initialization...") counter = 0 - for batch in dataloader: + for batch in tqdm(dataloader): if counter == init_steps: break if batch: @@ -182,12 +195,9 @@ model. Create a quantized model from the pre-trained ``FP16`` model. -.. note:: - - Quantization is time and memory consuming operation. + **NOTE**: Quantization is time and memory consuming operation. Running quantization code below may take a long time. - .. code:: ipython3 import logging @@ -205,35 +215,14 @@ Create a quantized model from the pre-trained ``FP16`` model. .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino - - - -.. parsed-literal:: - - Downloading builder script: 0%| | 0.00/6.69k [00:00`__ in the NNCF repository for more information. -Run quantized OpenVINO model `⇑ <#top>`__ -############################################################################################################################### - +Run quantized OpenVINO model +---------------------------------------------------------------------- The steps for making predictions with the quantized OpenVINO CLIP model are similar to the PyTorch model. Let us check the model result using @@ -292,18 +280,26 @@ notebook <228-clip-zero-shot-image-classification.ipynb>`__. .. parsed-literal:: - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') .. code:: ipython3 - import numpy as np + from pathlib import Path from scipy.special import softmax from openvino.runtime import compile_model from visualize import visualize_result + from urllib.request import urlretrieve + + sample_path = Path("data/coco.jpg") + sample_path.parent.mkdir(parents=True, exist_ok=True) + urlretrieve( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", + sample_path, + ) + image = Image.open(sample_path) - image = Image.open('../data/image/coco.jpg') input_labels = ['cat', 'dog', 'wolf', 'tiger', 'man', 'horse', 'frog', 'tree', 'house', 'computer'] text_descriptions = [f"This is a photo of a {label}" for label in input_labels] @@ -319,9 +315,8 @@ notebook <228-clip-zero-shot-image-classification.ipynb>`__. .. image:: 228-clip-zero-shot-quantize-with-output_files/228-clip-zero-shot-quantize-with-output_16_0.png -Compare File Size `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Compare File Size +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -337,23 +332,21 @@ Compare File Size `⇑ <#top>`__ .. parsed-literal:: FP16 IR model size: 285.38 MB - INT8 model size: 168.14 MB - Model compression rate: 1.697 + INT8 model size: 144.17 MB + Model compression rate: 1.979 -Compare inference time of the FP16 IR and quantized models -`⇑ <#top>`__ To measure the inference performance of the ``FP16`` and -``INT8`` models, we use median inference time on calibration dataset. So -we can approximately estimate the speed up of the dynamic quantized -models. +Compare inference time of the FP16 IR and quantized models +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. note:: +To measure the inference performance of the ``FP16`` and ``INT8`` +models, we use median inference time on calibration dataset. So we can +approximately estimate the speed up of the dynamic quantized models. - For the most accurate performance estimation, it is + **NOTE**: For the most accurate performance estimation, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications with static shapes. - .. code:: ipython3 import time @@ -380,5 +373,5 @@ models. .. parsed-literal:: - Performance speed up: 2.092 + Performance speed up: 1.548 diff --git a/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/228-clip-zero-shot-quantize-with-output_16_0.png b/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/228-clip-zero-shot-quantize-with-output_16_0.png index eb2f7d94e987cb..8cf35d676b025d 100644 --- a/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/228-clip-zero-shot-quantize-with-output_16_0.png +++ b/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/228-clip-zero-shot-quantize-with-output_16_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f59ef4796e26565ffd05fe4ad9681c3188952ba6bb297319aeb75e29dd86fafa -size 464309 +oid sha256:22e3fcd532e39f068b897eccd3c4f7b0ba9d565379e9b3f8486a7a56b500e385 +size 464748 diff --git a/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/index.html b/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/index.html index 5f79389b92c3ad..fe4c36acab71da 100644 --- a/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/index.html +++ b/docs/notebooks/228-clip-zero-shot-quantize-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/228-clip-zero-shot-quantize-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/228-clip-zero-shot-quantize-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/228-clip-zero-shot-quantize-with-output_files/


../
-228-clip-zero-shot-quantize-with-output_16_0.png   16-Aug-2023 01:31              464309
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/228-clip-zero-shot-quantize-with-output_files/


../
+228-clip-zero-shot-quantize-with-output_16_0.png   31-Oct-2023 00:35              464748
 

diff --git a/docs/notebooks/229-distilbert-sequence-classification-with-output.rst b/docs/notebooks/229-distilbert-sequence-classification-with-output.rst index 4095cec55bf40f..4bc1bb3daf5b49 100644 --- a/docs/notebooks/229-distilbert-sequence-classification-with-output.rst +++ b/docs/notebooks/229-distilbert-sequence-classification-with-output.rst @@ -1,32 +1,60 @@ Sentiment Analysis with OpenVINO™ ================================= - - **Sentiment analysis** is the use of natural language processing, text analysis, computational linguistics, and biometrics to systematically identify, extract, quantify, and study affective states and subjective information. This notebook demonstrates how to convert and run a sequence classification model using OpenVINO. -.. _top: +**Table of contents:** + + +- `Imports <#imports>`__ +- `Initializing the Model <#initializing-the-model>`__ +- `Initializing the Tokenizer <#initializing-the-tokenizer>`__ +- `Convert Model to OpenVINO Intermediate Representation + format <#convert-model-to-openvino-intermediate-representation-format>`__ + + - `Select inference device <#select-inference-device>`__ + +- `Inference <#inference>`__ + + - `For a single input + sentence <#for-a-single-input-sentence>`__ + - `Read from a text file <#read-from-a-text-file>`__ -**Table of contents**: +Imports +------------------------------------------------- -- `Imports <#imports>`__ -- `Initializing the Model <#initializing-the-model>`__ -- `Initializing the Tokenizer <#initializing-the-tokenizer>`__ -- `Convert Model to OpenVINO Intermediate Representation format <#convert-model-to-openvino-intermediate-representation-format>`__ +.. code:: ipython3 - - `Select inference device <#select-inference-device>`__ + %pip install "openvino>=2023.1.0" transformers -- `Inference <#inference>`__ - - `For a single input sentence <#for single -a- -input-sentence>`__ - - `Read from a text file <#read-from-a-text-file>`__ +.. parsed-literal:: -Imports `⇑ <#top>`__ -############################################################################################################################### + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2023.1.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.34.1) + Requirement already satisfied: numpy>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.24.3) + Requirement already satisfied: openvino-telemetry>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2023.2.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.13.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.17.3) + Requirement already satisfied: packaging>=20.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (23.2) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.1) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2023.10.3) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.31.0) + Requirement already satisfied: tokenizers<0.15,>=0.14 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.14.1) + Requirement already satisfied: safetensors>=0.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.0) + Requirement already satisfied: tqdm>=4.27 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (4.66.1) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.10.0) + Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.8.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.1) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.0.7) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2023.7.22) + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. .. code:: ipython3 @@ -36,14 +64,13 @@ Imports `⇑ <#top>`__ import time from transformers import AutoModelForSequenceClassification, AutoTokenizer import numpy as np - from openvino.tools import mo - from openvino.runtime import PartialShape, Type, serialize, Core + import openvino as ov -Initializing the Model `⇑ <#top>`__ -############################################################################################################################### +Initializing the Model +---------------------------------------------------------------- -We will use the transformer-based -`DistilBERT base uncased finetuned SST-2 `__ +We will use the transformer-based `DistilBERT base uncased finetuned +SST-2 `__ model from Hugging Face. .. code:: ipython3 @@ -53,9 +80,8 @@ model from Hugging Face. pretrained_model_name_or_path=checkpoint ) -Initializing the Tokenizer `⇑ <#top>`__ -############################################################################################################################### - +Initializing the Tokenizer +-------------------------------------------------------------------- Text Preprocessing cleans the text-based input data so it can be fed into the model. @@ -74,47 +100,59 @@ understand the context of a sentence. Here, we will use pretrained_model_name_or_path=checkpoint ) -Convert Model to OpenVINO Intermediate Representation format. `⇑ <#top>`__ -############################################################################################################################### +Convert Model to OpenVINO Intermediate Representation format +------------------------------------------------------------------------------------------------------ -`Model conversion API `__ +`Model conversion +API `__ facilitates the transition between training and deployment environments, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices. .. code:: ipython3 + import torch + ir_xml_name = checkpoint + ".xml" MODEL_DIR = "model/" ir_xml_path = Path(MODEL_DIR) / ir_xml_name - ov_model = mo.convert_model(model, input=[mo.InputCutInfo(shape=PartialShape([1, -1]), type=Type.i64), mo.InputCutInfo(shape=PartialShape([1, -1]), type=Type.i64)]) - serialize(ov_model, ir_xml_path) + + MAX_SEQ_LENGTH = 128 + input_info = [(ov.PartialShape([1, -1]), ov.Type.i64), (ov.PartialShape([1, -1]), ov.Type.i64)] + default_input = torch.ones(1, MAX_SEQ_LENGTH, dtype=torch.int64) + inputs = { + "input_ids": default_input, + "attention_mask": default_input, + } + + ov_model = ov.convert_model(model, input=input_info, example_input=inputs) + ov.save_model(ov_model, ir_xml_path) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:223: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:223: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) -OpenVINO™ Runtime uses the `Infer Request `__ +OpenVINO™ Runtime uses the `Infer +Request `__ mechanism which enables running models on different devices in asynchronous or synchronous manners. The model graph is sent as an argument to the OpenVINO API and an inference request is created. The default inference mode is AUTO but it can be changed according to requirements and hardware available. You can explore the different inference modes and their usage `in -documentation. `__ +documentation. `__ .. code:: ipython3 - core = Core() - -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + core = ov.Core() +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -157,9 +195,8 @@ Select device from dropdown list for running inference using OpenVINO: e_x = np.exp(x - np.max(x)) return e_x / e_x.sum() -Inference `⇑ <#top>`__ -############################################################################################################################### - +Inference +--------------------------------------------------- .. code:: ipython3 @@ -184,9 +221,8 @@ Inference `⇑ <#top>`__ probability = np.argmax(softmax(i)) return label[probability] -For a single input sentence `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +For a single input sentence +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -202,17 +238,39 @@ For a single input sentence `⇑ <#top>`__ .. parsed-literal:: Label: POSITIVE - Total Time: 0.04 seconds + Total Time: 0.03 seconds + + +Read from a text file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. code:: ipython3 + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file + + # Download the text from the openvino_notebooks storage + vocab_file_path = download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/food_reviews.txt", + directory="data" + ) + + + +.. parsed-literal:: -Read from a text file `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + data/food_reviews.txt: 0%| | 0.00/71.0 [00:00`__ User Input: We went because the restaurant had good reviews. Label: POSITIVE - Total Time: 0.02 seconds + Total Time: 0.03 seconds diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output.rst b/docs/notebooks/230-yolov8-instance-segmentation-with-output.rst new file mode 100644 index 00000000000000..8ee70305894350 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output.rst @@ -0,0 +1,1393 @@ +Convert and Optimize YOLOv8 instance segmentation model with OpenVINO™ +====================================================================== + +Instance segmentation goes a step further than object detection and +involves identifying individual objects in an image and segmenting them +from the rest of the image. Instance segmentation as an object detection +are often used as key components in computer vision systems. +Applications that use real-time instance segmentation models include +video analytics, robotics, autonomous vehicles, multi-object tracking +and object counting, medical image analysis, and many others. + +This tutorial demonstrates step-by-step instructions on how to run and +optimize PyTorch YOLOv8 with OpenVINO. We consider the steps required +for instance segmentation scenario. + +The tutorial consists of the following steps: + +- Prepare the PyTorch model. +- Download and prepare a dataset. +- Validate the original model. +- Convert the PyTorch model to OpenVINO IR. +- Validate the converted model. +- Prepare and run optimization pipeline. +- Compare performance of the FP32 and quantized models. +- Compare accuracy of the FP32 and quantized models. +- Live demo + +**Table of contents:** + + +- `Get PyTorch model <#get-pytorch-model>`__ + + - `Prerequisites <#prerequisites>`__ + +- `Instantiate model <#instantiate-model>`__ + + - `Convert model to OpenVINO + IR <#convert-model-to-openvino-ir>`__ + - `Verify model inference <#verify-model-inference>`__ + - `Preprocessing <#preprocessing>`__ + - `Postprocessing <#postprocessing>`__ + - `Select inference device <#select-inference-device>`__ + - `Test on single image <#test-on-single-image>`__ + +- `Check model accuracy on the + dataset <#check-model-accuracy-on-the-dataset>`__ + + - `Download the validation + dataset <#download-the-validation-dataset>`__ + - `Define validation + function <#define-validation-function>`__ + - `Configure Validator helper and create + DataLoader <#configure-validator-helper-and-create-dataloader>`__ + +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ + + - `Validate Quantized model + inference <#validate-quantized-model-inference>`__ + +- `Compare the Original and Quantized + Models <#compare-the-original-and-quantized-models>`__ + + - `Compare performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ + - `Validate quantized model + accuracy <#validate-quantized-model-accuracy>`__ + +- `Other ways to optimize + model <#other-ways-to-optimize-model>`__ +- `Live demo <#live-demo>`__ + + - `Run Live Object Detection and + Segmentation <#run-live-object-detection-and-segmentation>`__ + +Get PyTorch model +----------------------------------------------------------- + +Generally, PyTorch models represent an instance of the +`torch.nn.Module `__ +class, initialized by a state dictionary with model weights. We will use +the YOLOv8 nano model (also known as ``yolov8n``) pre-trained on a COCO +dataset, which is available in this +`repo `__. Similar steps are +also applicable to other YOLOv8 models. Typical steps to obtain a +pre-trained model: 1. Create an instance of a model class. 2. Load a +checkpoint state dict, which contains the pre-trained model weights. 3. +Turn the model to evaluation for switching some operations to inference +mode. + +In this case, the creators of the model provide an API that enables +converting the YOLOv8 model to ONNX and then to OpenVINO IR. Therefore, +we do not need to do these steps manually. + +Prerequisites +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Install necessary packages. + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" "nncf>=2.5.0" + %pip install "ultralytics==8.0.43" onnx + +Import required utility functions. The lower cell will download the +``notebook_utils`` Python module from GitHub. + +.. code:: ipython3 + + from pathlib import Path + + # Fetch the notebook utils script from the openvino_notebooks repo + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + + from notebook_utils import download_file, VideoPlayer + +Define utility functions for drawing results + +.. code:: ipython3 + + from typing import Tuple, Dict + import cv2 + import numpy as np + from PIL import Image + from ultralytics.yolo.utils.plotting import colors + + + def plot_one_box(box:np.ndarray, img:np.ndarray, color:Tuple[int, int, int] = None, mask:np.ndarray = None, label:str = None, line_thickness:int = 5): + """ + Helper function for drawing single bounding box on image + Parameters: + x (np.ndarray): bounding box coordinates in format [x1, y1, x2, y2] + img (no.ndarray): input image + color (Tuple[int, int, int], *optional*, None): color in BGR format for drawing box, if not specified will be selected randomly + mask (np.ndarray, *optional*, None): instance segmentation mask polygon in format [N, 2], where N - number of points in contour, if not provided, only box will be drawn + label (str, *optonal*, None): box label string, if not provided will not be provided as drowing result + line_thickness (int, *optional*, 5): thickness for box drawing lines + """ + # Plots one bounding box on image img + tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + if mask is not None: + image_with_mask = img.copy() + mask + cv2.fillPoly(image_with_mask, pts=[mask.astype(int)], color=color) + img = cv2.addWeighted(img, 0.5, image_with_mask, 0.5, 1) + return img + + + def draw_results(results:Dict, source_image:np.ndarray, label_map:Dict): + """ + Helper function for drawing bounding boxes on image + Parameters: + image_res (np.ndarray): detection predictions in format [x1, y1, x2, y2, score, label_id] + source_image (np.ndarray): input image for drawing + label_map; (Dict[int, str]): label_id to class name mapping + Returns: + + """ + boxes = results["det"] + masks = results.get("segment") + for idx, (*xyxy, conf, lbl) in enumerate(boxes): + label = f'{label_map[int(lbl)]} {conf:.2f}' + mask = masks[idx] if masks is not None else None + source_image = plot_one_box(xyxy, source_image, mask=mask, label=label, color=colors(int(lbl)), line_thickness=1) + return source_image + +.. code:: ipython3 + + # Download a test sample + IMAGE_PATH = Path('./data/coco_bike.jpg') + download_file( + url='https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg', + filename=IMAGE_PATH.name, + directory=IMAGE_PATH.parent + ) + + +.. parsed-literal:: + + 'data/coco_bike.jpg' already exists. + + + + +.. parsed-literal:: + + PosixPath('/home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/data/coco_bike.jpg') + + + +Instantiate model +----------------------------------------------------------- + +For loading the model, required to specify a path to the model +checkpoint. It can be some local path or name available on models hub +(in this case model checkpoint will be downloaded automatically). + +Making prediction, the model accepts a path to input image and returns +list with Results class object. Results contains boxes for object +detection model and boxes and masks for segmentation model. Also it +contains utilities for processing results, for example, ``plot()`` +method for drawing. + +Let us consider the examples: + +.. code:: ipython3 + + models_dir = Path('./models') + models_dir.mkdir(exist_ok=True) + +.. code:: ipython3 + + from ultralytics import YOLO + + SEG_MODEL_NAME = "yolov8n-seg" + + seg_model = YOLO(models_dir / f'{SEG_MODEL_NAME}.pt') + label_map = seg_model.model.names + + res = seg_model(IMAGE_PATH) + Image.fromarray(res[0].plot()[:, :, ::-1]) + + +.. parsed-literal:: + + 2023-10-05 19:10:02.690018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-05 19:10:02.730258: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-05 19:10:03.377715: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Ultralytics YOLOv8.0.43 🚀 Python-3.8.10 torch-2.0.1+cpu CPU + YOLOv8n-seg summary (fused): 195 layers, 3404320 parameters, 0 gradients, 12.6 GFLOPs + + image 1/1 /home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/data/coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 dog, 55.0ms + Speed: 2.6ms preprocess, 55.0ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640) + /home/ea/work/ov_venv/lib/python3.8/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( + + + + +.. image:: 230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.png + + + +Convert model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +YOLOv8 provides API for convenient model exporting to different formats +including OpenVINO IR. ``model.export`` is responsible for model +conversion. We need to specify the format, and additionally, we can +preserve dynamic shapes in the model. + +.. code:: ipython3 + + # instance segmentation model + seg_model_path = models_dir / f"{SEG_MODEL_NAME}_openvino_model/{SEG_MODEL_NAME}.xml" + if not seg_model_path.exists(): + seg_model.export(format="openvino", dynamic=True, half=False) + +Verify model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To test model work, we create inference pipeline similar to +``model.predict`` method. The pipeline consists of preprocessing step, +inference of OpenVINO model and results post-processing to get results. + +Preprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Model input is a tensor with the ``[-1, 3, -1, -1]`` shape in the +``N, C, H, W`` format, where \* ``N`` - number of images in batch (batch +size) \* ``C`` - image channels \* ``H`` - image height \* ``W`` - image +width + +The model expects images in RGB channels format and normalized in [0, 1] +range. Although the model supports dynamic input shape with preserving +input divisibility to 32, it is recommended to use static shapes, for +example, 640x640 for better efficiency. To resize images to fit model +size ``letterbox``, resize approach is used, where the aspect ratio of +width and height is preserved. + +To keep a specific shape, preprocessing automatically enables padding. + +.. code:: ipython3 + + from typing import Tuple + from ultralytics.yolo.utils import ops + import torch + import numpy as np + + + def letterbox(img: np.ndarray, new_shape:Tuple[int, int] = (640, 640), color:Tuple[int, int, int] = (114, 114, 114), auto:bool = False, scale_fill:bool = False, scaleup:bool = False, stride:int = 32): + """ + Resize image and padding for detection. Takes image as input, + resizes image to fit into new shape with saving original aspect ratio and pads it to meet stride-multiple constraints + + Parameters: + img (np.ndarray): image for preprocessing + new_shape (Tuple(int, int)): image size after preprocessing in format [height, width] + color (Tuple(int, int, int)): color for filling padded area + auto (bool): use dynamic input size, only padding for stride constrins applied + scale_fill (bool): scale image to fill new_shape + scaleup (bool): allow scale image if it is lower then desired input size, can affect model accuracy + stride (int): input padding stride + Returns: + img (np.ndarray): image after preprocessing + ratio (Tuple(float, float)): hight and width scaling ratio + padding_size (Tuple(int, int)): height and width padding size + + + """ + # Resize and pad image while meeting stride-multiple constraints + shape = img.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better test mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scale_fill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return img, ratio, (dw, dh) + + + def preprocess_image(img0: np.ndarray): + """ + Preprocess image according to YOLOv8 input requirements. + Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. + + Parameters: + img0 (np.ndarray): image for preprocessing + Returns: + img (np.ndarray): image after preprocessing + """ + # resize + img = letterbox(img0)[0] + + # Convert HWC to CHW + img = img.transpose(2, 0, 1) + img = np.ascontiguousarray(img) + return img + + + def image_to_tensor(image:np.ndarray): + """ + Preprocess image according to YOLOv8 input requirements. + Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. + + Parameters: + img (np.ndarray): image for preprocessing + Returns: + input_tensor (np.ndarray): input tensor in NCHW format with float32 values in [0, 1] range + """ + input_tensor = image.astype(np.float32) # uint8 to fp32 + input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 + + # add batch dimension + if input_tensor.ndim == 3: + input_tensor = np.expand_dims(input_tensor, 0) + return input_tensor + +Postprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The model output contains: - detection boxes candidates - proto mask +candidates + +Detection boxes candidates are the tensors with the ``[-1,84,-1]`` shape +in the ``B,84,N`` format, where: + +- ``B`` - batch size +- ``N`` - number of detection boxes + +For getting the final prediction, we need to apply a non-maximum +suppression algorithm and rescale box coordinates to the original image +size. + +After final prediction detection box has the [``x``, ``y``, ``h``, +``w``, ``class_no_1``, …, ``class_no_80``] format, where: + +- (``x``, ``y``) - raw coordinates of box center +- ``h``, ``w`` - raw height and width of the box +- ``class_no_1``, …, ``class_no_80`` - probability distribution over + the classes. + +Proto mask candidates are used for instance segmentation. It should be +decoded by using box coordinates. It is a tensor with the +``[-1 32, -1, -1]`` shape in the ``B,C H,W`` format, where: - ``B`` - +batch size - ``C`` - number of candidates - ``H`` - mask height - ``W`` +- mask width + +.. code:: ipython3 + + try: + scale_segments = ops.scale_segments + except AttributeError: + scale_segments = ops.scale_coords + + def postprocess( + pred_boxes:np.ndarray, + input_hw:Tuple[int, int], + orig_img:np.ndarray, + min_conf_threshold:float = 0.25, + nms_iou_threshold:float = 0.7, + agnosting_nms:bool = False, + max_detections:int = 300, + pred_masks:np.ndarray = None, + retina_mask:bool = False + ): + """ + YOLOv8 model postprocessing function. Applied non maximum supression algorithm to detections and rescale boxes to original image size + Parameters: + pred_boxes (np.ndarray): model output prediction boxes + input_hw (np.ndarray): preprocessed image + orig_image (np.ndarray): image before preprocessing + min_conf_threshold (float, *optional*, 0.25): minimal accepted confidence for object filtering + nms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMS + agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not + max_detections (int, *optional*, 300): maximum detections after NMS + pred_masks (np.ndarray, *optional*, None): model ooutput prediction masks, if not provided only boxes will be postprocessed + retina_mask (bool, *optional*, False): retina mask postprocessing instead of native decoding + Returns: + pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] and + segment - segmentation polygons for each element in batch + """ + nms_kwargs = {"agnostic": agnosting_nms, "max_det":max_detections} + # if pred_masks is not None: + # nms_kwargs["nm"] = 32 + preds = ops.non_max_suppression( + torch.from_numpy(pred_boxes), + min_conf_threshold, + nms_iou_threshold, + nc=80, + **nms_kwargs + ) + results = [] + proto = torch.from_numpy(pred_masks) if pred_masks is not None else None + + for i, pred in enumerate(preds): + shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape + if not len(pred): + results.append({"det": [], "segment": []}) + continue + if proto is None: + pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() + results.append({"det": pred}) + continue + if retina_mask: + pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() + masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], shape[:2]) # HWC + segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] + else: + masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], input_hw, upsample=True) + pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() + segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] + results.append({"det": pred[:, :6].numpy(), "segment": segments}) + return results + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + import openvino as ov + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +Test on single image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now, once we have defined preprocessing and postprocessing steps, we are +ready to check model prediction. + +.. code:: ipython3 + + core = ov.Core() + seg_ov_model = core.read_model(seg_model_path) + if device.value != "CPU": + seg_ov_model.reshape({0: [1, 3, 640, 640]}) + seg_compiled_model = core.compile_model(seg_ov_model, device.value) + + + def detect(image:np.ndarray, model:ov.Model): + """ + OpenVINO YOLOv8 model inference function. Preprocess image, runs model inference and postprocess results using NMS. + Parameters: + image (np.ndarray): input image. + model (Model): OpenVINO compiled model. + Returns: + detections (np.ndarray): detected boxes in format [x1, y1, x2, y2, score, label] + """ + num_outputs = len(model.outputs) + preprocessed_image = preprocess_image(image) + input_tensor = image_to_tensor(preprocessed_image) + result = model(input_tensor) + boxes = result[model.output(0)] + masks = None + if num_outputs > 1: + masks = result[model.output(1)] + input_hw = input_tensor.shape[2:] + detections = postprocess(pred_boxes=boxes, input_hw=input_hw, orig_img=image, pred_masks=masks) + return detections + + input_image = np.array(Image.open(IMAGE_PATH)) + detections = detect(input_image, seg_compiled_model)[0] + image_with_masks = draw_results(detections, input_image, label_map) + + Image.fromarray(image_with_masks) + + + + +.. image:: 230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.png + + + +Great! The result is the same, as produced by original models. + +Check model accuracy on the dataset +----------------------------------------------------------------------------- + +For comparing the optimized model result with the original, it is good +to know some measurable results in terms of model accuracy on the +validation dataset. + +Download the validation dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +YOLOv8 is pre-trained on the COCO dataset, so to evaluate the model +accuracy we need to download it. According to the instructions provided +in the YOLOv8 repo, we also need to download annotations in the format +used by the author of the model, for use with the original model +evaluation function. + + **Note**: The initial dataset download may take a few minutes to + complete. The download speed will vary depending on the quality of + your internet connection. + +.. code:: ipython3 + + from zipfile import ZipFile + + DATA_URL = "http://images.cocodataset.org/zips/val2017.zip" + LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip" + CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/8ebe94d1e928687feaa1fee6d5668987df5e43be/ultralytics/datasets/coco.yaml" # last compatible format with ultralytics 8.0.43 + + from ultralytics.yolo.utils import DATASETS_DIR + + OUT_DIR = DATASETS_DIR + + DATA_PATH = OUT_DIR / "val2017.zip" + LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip" + CFG_PATH = OUT_DIR / "coco.yaml" + + download_file(DATA_URL, DATA_PATH.name, DATA_PATH.parent) + download_file(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent) + download_file(CFG_URL, CFG_PATH.name, CFG_PATH.parent) + + if not (OUT_DIR / "coco/labels").exists(): + with ZipFile(LABELS_PATH , "r") as zip_ref: + zip_ref.extractall(OUT_DIR) + with ZipFile(DATA_PATH , "r") as zip_ref: + zip_ref.extractall(OUT_DIR / 'coco/images') + + +.. parsed-literal:: + + '/home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/datasets/val2017.zip' already exists. + '/home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/datasets/coco2017labels-segments.zip' already exists. + + + +.. parsed-literal:: + + /home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/datasets/coco.yaml: 0%| | 0.00/1… + + +Define validation function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + from tqdm.notebook import tqdm + from ultralytics.yolo.utils.metrics import ConfusionMatrix + + + def test(model:ov.Model, core:ov.Core, data_loader:torch.utils.data.DataLoader, validator, num_samples:int = None): + """ + OpenVINO YOLOv8 model accuracy validation function. Runs model validation on dataset and returns metrics + Parameters: + model (Model): OpenVINO model + data_loader (torch.utils.data.DataLoader): dataset loader + validator: instance of validator class + num_samples (int, *optional*, None): validate model only on specified number samples, if provided + Returns: + stats: (Dict[str, float]) - dictionary with aggregated accuracy metrics statistics, key is metric name, value is metric value + """ + validator.seen = 0 + validator.jdict = [] + validator.stats = [] + validator.batch_i = 1 + validator.confusion_matrix = ConfusionMatrix(nc=validator.nc) + model.reshape({0: [1, 3, -1, -1]}) + num_outputs = len(model.outputs) + compiled_model = core.compile_model(model) + for batch_i, batch in enumerate(tqdm(data_loader, total=num_samples)): + if num_samples is not None and batch_i == num_samples: + break + batch = validator.preprocess(batch) + results = compiled_model(batch["img"]) + if num_outputs == 1: + preds = torch.from_numpy(results[compiled_model.output(0)]) + else: + preds = [torch.from_numpy(results[compiled_model.output(0)]), torch.from_numpy(results[compiled_model.output(1)])] + preds = validator.postprocess(preds) + validator.update_metrics(preds, batch) + stats = validator.get_stats() + return stats + + + def print_stats(stats:np.ndarray, total_images:int, total_objects:int): + """ + Helper function for printing accuracy statistic + Parameters: + stats: (Dict[str, float]) - dictionary with aggregated accuracy metrics statistics, key is metric name, value is metric value + total_images (int) - number of evaluated images + total objects (int) + Returns: + None + """ + print("Boxes:") + mp, mr, map50, mean_ap = stats['metrics/precision(B)'], stats['metrics/recall(B)'], stats['metrics/mAP50(B)'], stats['metrics/mAP50-95(B)'] + # Print results + s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'Precision', 'Recall', 'mAP@.5', 'mAP@.5:.95') + print(s) + pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format + print(pf % ('all', total_images, total_objects, mp, mr, map50, mean_ap)) + if 'metrics/precision(M)' in stats: + s_mp, s_mr, s_map50, s_mean_ap = stats['metrics/precision(M)'], stats['metrics/recall(M)'], stats['metrics/mAP50(M)'], stats['metrics/mAP50-95(M)'] + # Print results + s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'Precision', 'Recall', 'mAP@.5', 'mAP@.5:.95') + print(s) + pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format + print(pf % ('all', total_images, total_objects, s_mp, s_mr, s_map50, s_mean_ap)) + +Configure Validator helper and create DataLoader +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The original model repository uses a ``Validator`` wrapper, which +represents the accuracy validation pipeline. It creates dataloader and +evaluation metrics and updates metrics on each data batch produced by +the dataloader. Besides that, it is responsible for data preprocessing +and results postprocessing. For class initialization, the configuration +should be provided. We will use the default setup, but it can be +replaced with some parameters overriding to test on custom data. The +model has connected the ``ValidatorClass`` method, which creates a +validator class instance. + +.. code:: ipython3 + + from ultralytics.yolo.utils import DEFAULT_CFG + from ultralytics.yolo.cfg import get_cfg + from ultralytics.yolo.data.utils import check_det_dataset + + args = get_cfg(cfg=DEFAULT_CFG) + args.data = str(CFG_PATH) + +.. code:: ipython3 + + seg_validator = seg_model.ValidatorClass(args=args) + seg_validator.data = check_det_dataset(args.data) + seg_data_loader = seg_validator.get_dataloader("datasets/coco/", 1) + + seg_validator.is_coco = True + seg_validator.class_map = ops.coco80_to_coco91_class() + seg_validator.names = seg_model.model.names + seg_validator.metrics.names = seg_validator.names + seg_validator.nc = seg_model.model.model[-1].nc + seg_validator.nm = 32 + seg_validator.process = ops.process_mask + seg_validator.plot_masks = [] + + +.. parsed-literal:: + + val: Scanning datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00\ **Note**: Model evaluation is time consuming +process and can take several minutes, depending on the hardware. For +reducing calculation time, we define ``num_samples`` parameter with +evaluation subset size, but in this case, accuracy can be noncomparable +with originally reported by the authors of the model, due to validation +subset difference. *To validate the models on the full dataset set +``NUM_TEST_SAMPLES = None``.* + +.. code:: ipython3 + + NUM_TEST_SAMPLES = 300 + +.. code:: ipython3 + + fp_seg_stats = test(seg_ov_model, core, seg_data_loader, seg_validator, num_samples=NUM_TEST_SAMPLES) + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00`__ provides a suite of +advanced algorithms for Neural Networks inference optimization in +OpenVINO with minimal accuracy drop. We will use 8-bit quantization in +post-training mode (without the fine-tuning pipeline) to optimize +YOLOv8. + +The optimization process contains the following steps: + +1. Create a Dataset for quantization. +2. Run ``nncf.quantize`` for getting an optimized model. +3. Serialize OpenVINO IR model, using the ``openvino.runtime.serialize`` + function. + +Reuse validation dataloader in accuracy testing for quantization. For +that, it should be wrapped into the ``nncf.Dataset`` object and define a +transformation function for getting only input tensors. + +.. code:: ipython3 + + import nncf # noqa: F811 + from typing import Dict + + + def transform_fn(data_item:Dict): + """ + Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. + Parameters: + data_item: Dict with data item produced by DataLoader during iteration + Returns: + input_tensor: Input data for quantization + """ + input_tensor = seg_validator.preprocess(data_item)['img'].numpy() + return input_tensor + + + quantization_dataset = nncf.Dataset(seg_data_loader, transform_fn) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +The ``nncf.quantize`` function provides an interface for model +quantization. It requires an instance of the OpenVINO Model and +quantization dataset. Optionally, some additional parameters for the +configuration quantization process (number of samples for quantization, +preset, ignored scope, etc.) can be provided. YOLOv8 model contains +non-ReLU activation functions, which require asymmetric quantization of +activations. To achieve a better result, we will use a ``mixed`` +quantization preset. It provides symmetric quantization of weights and +asymmetric quantization of activations. For more accurate results, we +should keep the operation in the postprocessing subgraph in floating +point precision, using the ``ignored_scope`` parameter. + + **Note**: Model post-training quantization is time-consuming process. + Be patient, it can take several minutes depending on your hardware. + +.. code:: ipython3 + + ignored_scope = nncf.IgnoredScope( + types=["Multiply", "Subtract", "Sigmoid"], # ignore operations + names=[ + "/model.22/dfl/conv/Conv", # in the post-processing subgraph + "/model.22/Add", + "/model.22/Add_1", + "/model.22/Add_2", + "/model.22/Add_3", + "/model.22/Add_4", + "/model.22/Add_5", + "/model.22/Add_6", + "/model.22/Add_7", + "/model.22/Add_8", + "/model.22/Add_9", + "/model.22/Add_10" + ] + ) + + + # Detection model + quantized_seg_model = nncf.quantize( + seg_ov_model, + quantization_dataset, + preset=nncf.QuantizationPreset.MIXED, + ignored_scope=ignored_scope + ) + + +.. parsed-literal:: + + INFO:nncf:12 ignored nodes was found by name in the NNCFGraph + INFO:nncf:9 ignored nodes was found by types in the NNCFGraph + INFO:nncf:Not adding activation input quantizer for operation: 140 /model.22/Sigmoid + INFO:nncf:Not adding activation input quantizer for operation: 174 /model.22/dfl/conv/Conv + INFO:nncf:Not adding activation input quantizer for operation: 199 /model.22/Sub + INFO:nncf:Not adding activation input quantizer for operation: 200 /model.22/Add_10 + INFO:nncf:Not adding activation input quantizer for operation: 217 /model.22/Sub_1 + INFO:nncf:Not adding activation input quantizer for operation: 250 /model.22/Mul_5 + + +.. parsed-literal:: + + Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:38<00:00, 7.78it/s] + Applying Fast Bias correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [00:03<00:00, 19.05it/s] + + +.. code:: ipython3 + + from openvino.runtime import serialize + + int8_model_seg_path = models_dir / f'{SEG_MODEL_NAME}_openvino_int8_model/{SEG_MODEL_NAME}.xml' + print(f"Quantized segmentation model will be saved to {int8_model_seg_path}") + serialize(quantized_seg_model, str(int8_model_seg_path)) + + +.. parsed-literal:: + + Quantized segmentation model will be saved to models/yolov8n-seg_openvino_int8_model/yolov8n-seg.xml + + +Validate Quantized model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``nncf.quantize`` returns the OpenVINO Model class instance, which is +suitable for loading on a device for making predictions. ``INT8`` model +input data and output result formats have no difference from the +floating point model representation. Therefore, we can reuse the same +``detect`` function defined above for getting the ``INT8`` model result +on the image. + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + if device.value != "CPU": + quantized_seg_model.reshape({0: [1, 3, 640, 640]}) + quantized_seg_compiled_model = core.compile_model(quantized_seg_model, device.value) + input_image = np.array(Image.open(IMAGE_PATH)) + detections = detect(input_image, quantized_seg_compiled_model)[0] + image_with_masks = draw_results(detections, input_image, label_map) + + Image.fromarray(image_with_masks) + + + + +.. image:: 230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.png + + + +Compare the Original and Quantized Models +----------------------------------------------------------------------------------- + +Compare performance of the Original and Quantized Models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Finally, use the OpenVINO `Benchmark +Tool `__ +to measure the inference performance of the ``FP32`` and ``INT8`` +models. + + **Note**: For more accurate performance, it is recommended to run + ``benchmark_app`` in a terminal/command prompt after closing other + applications. Run + ``benchmark_app -m -d CPU -shape ""`` to + benchmark async inference on CPU on specific input data shape for one + minute. Change ``CPU`` to ``GPU`` to benchmark on GPU. Run + ``benchmark_app --help`` to see an overview of all command-line + options. + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + !benchmark_app -m $seg_model_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 20.21 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [?,3,?,?] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [?,116,?] + [ INFO ] output1 (node: output1) : f32 / [...] / [?,32,8..,8..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'images': [1,3,640,640] + [ INFO ] Reshape model took 13.52 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,8400] + [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,160,160] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 457.49 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 12 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 42.16 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 1860 iterations + [ INFO ] Duration: 15069.12 ms + [ INFO ] Latency: + [ INFO ] Median: 92.86 ms + [ INFO ] Average: 96.95 ms + [ INFO ] Min: 53.68 ms + [ INFO ] Max: 181.23 ms + [ INFO ] Throughput: 123.43 FPS + + +.. code:: ipython3 + + !benchmark_app -m $int8_model_seg_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 31.10 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [1,3,?,?] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,21..] + [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,8..,8..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'images': [1,3,640,640] + [ INFO ] Reshape model took 17.80 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,8400] + [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,160,160] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 679.71 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 12 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 24.87 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 4416 iterations + [ INFO ] Duration: 15063.93 ms + [ INFO ] Latency: + [ INFO ] Median: 38.93 ms + [ INFO ] Average: 40.76 ms + [ INFO ] Min: 24.40 ms + [ INFO ] Max: 83.87 ms + [ INFO ] Throughput: 293.15 FPS + + +Validate quantized model accuracy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As we can see, there is no significant difference between ``INT8`` and +float model result in a single image test. To understand how +quantization influences model prediction precision, we can compare model +accuracy on a dataset. + +.. code:: ipython3 + + int8_seg_stats = test(quantized_seg_model, core, seg_data_loader, seg_validator, num_samples=NUM_TEST_SAMPLES) + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00`__ + +Preprocessing API enables making preprocessing a part of the model +reducing application code and dependency on additional image processing +libraries. The main advantage of Preprocessing API is that preprocessing +steps will be integrated into the execution graph and will be performed +on a selected device (CPU/GPU etc.) rather than always being executed on +CPU as part of an application. This will also improve selected device +utilization. For more information, refer to the overview of +`Preprocessing API +tutorial <118-optimize-preprocessing-with-output.html>`__. +To see, how it could be used with YOLOV8 object detection model , +please, see `Convert and Optimize YOLOv8 real-time object detection with +OpenVINO tutorial <./230-yolov8-object-detection.ipynb>`__ + +Live demo +--------------------------------------------------- + +The following code runs model inference on a video: + +.. code:: ipython3 + + import collections + import time + from IPython import display + + + def run_instance_segmentation(source=0, flip=False, use_popup=False, skip_first_frames=0, model=seg_model, device=device.value): + player = None + if device != "CPU": + model.reshape({0: [1, 3, 640, 640]}) + compiled_model = core.compile_model(model, device) + try: + # Create a video player to play with target fps. + player = VideoPlayer( + source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames + ) + # Start capturing. + player.start() + if use_popup: + title = "Press ESC to Exit" + cv2.namedWindow( + winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE + ) + + processing_times = collections.deque() + while True: + # Grab the frame. + frame = player.next() + if frame is None: + print("Source ended") + break + # If the frame is larger than full HD, reduce size to improve the performance. + scale = 1280 / max(frame.shape) + if scale < 1: + frame = cv2.resize( + src=frame, + dsize=None, + fx=scale, + fy=scale, + interpolation=cv2.INTER_AREA, + ) + # Get the results. + input_image = np.array(frame) + + start_time = time.time() + # model expects RGB image, while video capturing in BGR + detections = detect(input_image[:, :, ::-1], compiled_model)[0] + stop_time = time.time() + + image_with_boxes = draw_results(detections, input_image, label_map) + frame = image_with_boxes + + processing_times.append(stop_time - start_time) + # Use processing times from last 200 frames. + if len(processing_times) > 200: + processing_times.popleft() + + _, f_width = frame.shape[:2] + # Mean processing time [ms]. + processing_time = np.mean(processing_times) * 1000 + fps = 1000 / processing_time + cv2.putText( + img=frame, + text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", + org=(20, 40), + fontFace=cv2.FONT_HERSHEY_COMPLEX, + fontScale=f_width / 1000, + color=(0, 0, 255), + thickness=1, + lineType=cv2.LINE_AA, + ) + # Use this workaround if there is flickering. + if use_popup: + cv2.imshow(winname=title, mat=frame) + key = cv2.waitKey(1) + # escape = 27 + if key == 27: + break + else: + # Encode numpy array to jpg. + _, encoded_img = cv2.imencode( + ext=".jpg", img=frame, params=[cv2.IMWRITE_JPEG_QUALITY, 100] + ) + # Create an IPython image. + i = display.Image(data=encoded_img) + # Display the image in this notebook. + display.clear_output(wait=True) + display.display(i) + # ctrl-c + except KeyboardInterrupt: + print("Interrupted") + # any different error + except RuntimeError as e: + print(e) + finally: + if player is not None: + # Stop capturing. + player.stop() + if use_popup: + cv2.destroyAllWindows() + +Run Live Object Detection and Segmentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a webcam as the video input. By default, the primary webcam is set +with \ ``source=0``. If you have multiple webcams, each one will be +assigned a consecutive number starting at 0. Set \ ``flip=True`` when +using a front-facing camera. Some web browsers, especially Mozilla +Firefox, may cause flickering. If you experience flickering, +set \ ``use_popup=True``. + + **NOTE**: To use this notebook with a webcam, you need to run the + notebook on a computer with a webcam. If you run the notebook on a + remote server (for example, in Binder or Google Colab service), the + webcam will not work. By default, the lower cell will run model + inference on a video file. If you want to try live inference on your + webcam set ``WEBCAM_INFERENCE = True`` + +.. code:: ipython3 + + WEBCAM_INFERENCE = False + + if WEBCAM_INFERENCE: + VIDEO_SOURCE = 0 # Webcam + else: + VIDEO_SOURCE = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4' + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + run_instance_segmentation(source=VIDEO_SOURCE, flip=True, use_popup=False, model=seg_ov_model, device=device.value) + + + +.. image:: 230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_60_0.png + + +.. parsed-literal:: + + Source ended + diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.jpg b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.jpg new file mode 100644 index 00000000000000..312a15b77bcc77 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc96d6ea30eb847e9bb4d21746a7023fb9c65bec9e541a9ee6a3285e29e303f +size 81079 diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.png b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.png new file mode 100644 index 00000000000000..5cc6729e1d8fe3 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_11_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860af64994d31c78cca5b135d123b43fa6ce242e7c9a9d405af71f6378ca8371 +size 790288 diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.jpg b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.jpg new file mode 100644 index 00000000000000..f058efb47f66c1 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8d24361c768d325a0e2312d6017b2a6009acd74016c474210321b4b4b9abcec +size 104433 diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.png b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.png new file mode 100644 index 00000000000000..55b1207a49c2a8 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff90581c563622844bf83f58a2009a7f4912386a4715b6bed9ba1dfb723e784 +size 919053 diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.jpg b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.jpg new file mode 100644 index 00000000000000..7a388b32651d87 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f48335ee8e7d5d68e2362ab739dadc1296bc249448bebdf7ea4a1cd6c4cad3 +size 103935 diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.png b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.png new file mode 100644 index 00000000000000..a773f9f1906c23 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_44_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ae669f9481f2cda26667a0585a49e7a106b042f589c7f0c2254358a98062bc +size 918316 diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_60_0.png b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_60_0.png new file mode 100644 index 00000000000000..7e0cf8c227a6dd --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/230-yolov8-instance-segmentation-with-output_60_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75690d9066aa42dec4d0c3615900d455b30b3e38b42e78f432fe3d0aa1e6be42 +size 495438 diff --git a/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/index.html b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/index.html new file mode 100644 index 00000000000000..3c7b8dfd83b309 --- /dev/null +++ b/docs/notebooks/230-yolov8-instance-segmentation-with-output_files/index.html @@ -0,0 +1,13 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/230-yolov8-instance-segmentation-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/230-yolov8-instance-segmentation-with-output_files/


../
+230-yolov8-instance-segmentation-with-output_11..> 31-Oct-2023 00:35               81079
+230-yolov8-instance-segmentation-with-output_11..> 31-Oct-2023 00:35              790288
+230-yolov8-instance-segmentation-with-output_22..> 31-Oct-2023 00:35              104433
+230-yolov8-instance-segmentation-with-output_22..> 31-Oct-2023 00:35              919053
+230-yolov8-instance-segmentation-with-output_44..> 31-Oct-2023 00:35              103935
+230-yolov8-instance-segmentation-with-output_44..> 31-Oct-2023 00:35              918316
+230-yolov8-instance-segmentation-with-output_60..> 31-Oct-2023 00:35              495438
+

+ diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output.rst b/docs/notebooks/230-yolov8-keypoint-detection-with-output.rst new file mode 100644 index 00000000000000..67e4cff60db1d8 --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output.rst @@ -0,0 +1,1361 @@ +Convert and Optimize YOLOv8 keypoint detection model with OpenVINO™ +=================================================================== + +Keypoint detection/Pose is a task that involves detecting specific +points in an image or video frame. These points are referred to as +keypoints and are used to track movement or pose estimation. YOLOv8 can +detect keypoints in an image or video frame with high accuracy and +speed. + +This tutorial demonstrates step-by-step instructions on how to run and +optimize `PyTorch YOLOv8 Pose +model `__ with OpenVINO. We +consider the steps required for keypoint detection scenario. + +The tutorial consists of the following steps: + +- Prepare the PyTorch model. +- Download and prepare a dataset. +- Validate the original model. +- Convert the PyTorch model to OpenVINO IR. +- Validate the converted model. +- Prepare and run optimization pipeline. +- Compare performance of the FP32 and quantized models. +- Compare accuracy of the FP32 and quantized models. +- Live demo + +**Table of contents:** + + +- `Get PyTorch model <#get-pytorch-model>`__ + + - `Prerequisites <#prerequisites>`__ + +- `Instantiate model <#instantiate-model>`__ + + - `Convert model to OpenVINO + IR <#convert-model-to-openvino-ir>`__ + - `Verify model inference <#verify-model-inference>`__ + - `Preprocessing <#preprocessing>`__ + - `Postprocessing <#postprocessing>`__ + - `Select inference device <#select-inference-device>`__ + - `Test on single image <#test-on-single-image>`__ + +- `Check model accuracy on the + dataset <#check-model-accuracy-on-the-dataset>`__ + + - `Download the validation + dataset <#download-the-validation-dataset>`__ + - `Define validation + function <#define-validation-function>`__ + - `Configure Validator helper and create + DataLoader <#configure-validator-helper-and-create-dataloader>`__ + +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ + + - `Validate Quantized model + inference <#validate-quantized-model-inference>`__ + +- `Compare the Original and Quantized + Models <#compare-the-original-and-quantized-models>`__ + + - `Compare performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ + - `Compare accuracy of the Original and Quantized + Models <#compare-accuracy-of-the-original-and-quantized-models>`__ + +- `Other ways to optimize + model <#other-ways-to-optimize-model>`__ +- `Live demo <#live-demo>`__ + + - `Run Keypoint Detection on + video <#run-keypoint-detection-on-video>`__ + +Get PyTorch model +----------------------------------------------------------- + +Generally, PyTorch models represent an instance of the +`torch.nn.Module `__ +class, initialized by a state dictionary with model weights. We will use +the YOLOv8 nano model (also known as ``yolov8n``) pre-trained on a COCO +dataset, which is available in this +`repo `__. Similar steps are +also applicable to other YOLOv8 models. Typical steps to obtain a +pre-trained model: 1. Create an instance of a model class. 2. Load a +checkpoint state dict, which contains the pre-trained model weights. 3. +Turn the model to evaluation for switching some operations to inference +mode. + +In this case, the creators of the model provide an API that enables +converting the YOLOv8 model to ONNX and then to OpenVINO IR. Therefore, +we do not need to do these steps manually. + +Prerequisites +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Install necessary packages. + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" "nncf>=2.5.0" "protobuf==3.20.*" "ultralytics==8.0.159" "onnx" + +Import required utility functions. The lower cell will download the +``notebook_utils`` Python module from GitHub. + +.. code:: ipython3 + + from pathlib import Path + + # Fetch the notebook utils script from the openvino_notebooks repo + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + + from notebook_utils import download_file, VideoPlayer + +Define utility functions for drawing results + +.. code:: ipython3 + + from typing import Tuple, Dict + import cv2 + import numpy as np + from PIL import Image + from ultralytics.utils.plotting import colors + + + def plot_one_box(box:np.ndarray, img:np.ndarray, color:Tuple[int, int, int] = None, keypoints:np.ndarray = None, label:str = None, line_thickness:int = 5): + """ + Helper function for drawing single bounding box on image + Parameters: + box (np.ndarray): bounding box coordinates in format [x1, y1, x2, y2] + img (no.ndarray): input image + color (Tuple[int, int, int], *optional*, None): color in BGR format for drawing box, if not specified will be selected randomly + keypoints (np.ndarray, *optional*, None): keypoints in format [x1, y1, s], x1, y1 - keypoint coordinates, s - the confidence scores, + if not provided, only box will be drawn + label (str, *optonal*, None): box label string, if not provided will not be provided as drowing result + line_thickness (int, *optional*, 5): thickness for box drawing lines + """ + # Plots one bounding box on image img + tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + if label: + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + if keypoints is not None: + kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] + skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], + [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] + limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]] + shape = img.shape[:2] + for i, k in enumerate(keypoints): + color_k = [int(x) for x in kpt_color[i]] + x_coord, y_coord = k[0], k[1] + if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: + if len(k) == 3: + if k[2] < 0.5: + continue + cv2.circle(img, (int(x_coord), int(y_coord)), 5, color_k, -1, lineType=cv2.LINE_AA) + + ndim = keypoints.shape[-1] + for i, sk in enumerate(skeleton): + pos1 = (int(keypoints[(sk[0] - 1), 0]), int(keypoints[(sk[0] - 1), 1])) + pos2 = (int(keypoints[(sk[1] - 1), 0]), int(keypoints[(sk[1] - 1), 1])) + if ndim == 3: + conf1 = keypoints[(sk[0] - 1), 2] + conf2 = keypoints[(sk[1] - 1), 2] + if conf1 < 0.5 or conf2 < 0.5: + continue + if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0: + continue + if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0: + continue + cv2.line(img, pos1, pos2, [int(x) for x in limb_color[i]], thickness=2, lineType=cv2.LINE_AA) + + return img + + + def draw_results(results:Dict, source_image:np.ndarray, label_map:Dict): + """ + Helper function for drawing bounding boxes on image + Parameters: + image_res (np.ndarray): detection predictions in format [x1, y1, x2, y2, score, label_id] + source_image (np.ndarray): input image for drawing + label_map; (Dict[int, str]): label_id to class name mapping + """ + boxes = results["box"] + keypoints = results.get("kpt") + h, w = source_image.shape[:2] + for idx, (*xyxy, conf, lbl) in enumerate(boxes): + if conf < 0.4: + continue + label = f'{label_map[0]} {conf:.2f}' + kp = keypoints[idx] if keypoints is not None else None + source_image = plot_one_box(xyxy, source_image, keypoints=kp, label=label, color=colors(int(lbl)), line_thickness=1) + return source_image + +.. code:: ipython3 + + # Download a test sample + IMAGE_PATH = Path('./data/intel_rnb.jpg') + download_file( + url='https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg', + filename=IMAGE_PATH.name, + directory=IMAGE_PATH.parent + ) + + +.. parsed-literal:: + + 'data/intel_rnb.jpg' already exists. + + + + +.. parsed-literal:: + + PosixPath('/home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/data/intel_rnb.jpg') + + + +Instantiate model +----------------------------------------------------------- + +For loading the model, required to specify a path to the model +checkpoint. It can be some local path or name available on models hub +(in this case model checkpoint will be downloaded automatically). + +Making prediction, the model accepts a path to input image and returns +list with Results class object. Results contains boxes and key points. +Also it contains utilities for processing results, for example, +``plot()`` method for drawing. + +Let us consider the examples: + +.. code:: ipython3 + + models_dir = Path('./models') + models_dir.mkdir(exist_ok=True) + +.. code:: ipython3 + + from ultralytics import YOLO + + POSE_MODEL_NAME = "yolov8n-pose" + + pose_model = YOLO(models_dir / f'{POSE_MODEL_NAME}.pt') + label_map = pose_model.model.names + + res = pose_model(IMAGE_PATH) + Image.fromarray(res[0].plot()[:, :, ::-1]) + + +.. parsed-literal:: + + + image 1/1 /home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/data/intel_rnb.jpg: 480x640 1 person, 52.6ms + Speed: 2.1ms preprocess, 52.6ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640) + + + + +.. image:: 230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.png + + + +Convert model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +YOLOv8 provides API for convenient model exporting to different formats +including OpenVINO IR. ``model.export`` is responsible for model +conversion. We need to specify the format, and additionally, we can +preserve dynamic shapes in the model. + +.. code:: ipython3 + + # object detection model + pose_model_path = models_dir / f"{POSE_MODEL_NAME}_openvino_model/{POSE_MODEL_NAME}.xml" + if not pose_model_path.exists(): + pose_model.export(format="openvino", dynamic=True, half=False) + +Verify model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To test model work, we create inference pipeline similar to +``model.predict`` method. The pipeline consists of preprocessing step, +inference of OpenVINO model and results post-processing to get results. + +Preprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Model input is a tensor with the ``[-1, 3, -1, -1]`` shape in the +``N, C, H, W`` format, where \* ``N`` - number of images in batch (batch +size) \* ``C`` - image channels \* ``H`` - image height \* ``W`` - image +width + +The model expects images in RGB channels format and normalized in [0, 1] +range. Although the model supports dynamic input shape with preserving +input divisibility to 32, it is recommended to use static shapes, for +example, 640x640 for better efficiency. To resize images to fit model +size ``letterbox``, resize approach is used, where the aspect ratio of +width and height is preserved. + +To keep a specific shape, preprocessing automatically enables padding. + +.. code:: ipython3 + + from typing import Tuple + import torch + import numpy as np + + + def letterbox(img: np.ndarray, new_shape:Tuple[int, int] = (640, 640), color:Tuple[int, int, int] = (114, 114, 114), auto:bool = False, scale_fill:bool = False, scaleup:bool = False, stride:int = 32): + """ + Resize image and padding for detection. Takes image as input, + resizes image to fit into new shape with saving original aspect ratio and pads it to meet stride-multiple constraints + + Parameters: + img (np.ndarray): image for preprocessing + new_shape (Tuple(int, int)): image size after preprocessing in format [height, width] + color (Tuple(int, int, int)): color for filling padded area + auto (bool): use dynamic input size, only padding for stride constrins applied + scale_fill (bool): scale image to fill new_shape + scaleup (bool): allow scale image if it is lower then desired input size, can affect model accuracy + stride (int): input padding stride + Returns: + img (np.ndarray): image after preprocessing + ratio (Tuple(float, float)): hight and width scaling ratio + padding_size (Tuple(int, int)): height and width padding size + + + """ + # Resize and pad image while meeting stride-multiple constraints + shape = img.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better test mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scale_fill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return img, ratio, (dw, dh) + + + def preprocess_image(img0: np.ndarray): + """ + Preprocess image according to YOLOv8 input requirements. + Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. + + Parameters: + img0 (np.ndarray): image for preprocessing + Returns: + img (np.ndarray): image after preprocessing + """ + # resize + img = letterbox(img0)[0] + + # Convert HWC to CHW + img = img.transpose(2, 0, 1) + img = np.ascontiguousarray(img) + return img + + + def image_to_tensor(image:np.ndarray): + """ + Preprocess image according to YOLOv8 input requirements. + Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. + + Parameters: + img (np.ndarray): image for preprocessing + Returns: + input_tensor (np.ndarray): input tensor in NCHW format with float32 values in [0, 1] range + """ + input_tensor = image.astype(np.float32) # uint8 to fp32 + input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 + + # add batch dimension + if input_tensor.ndim == 3: + input_tensor = np.expand_dims(input_tensor, 0) + return input_tensor + +Postprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The model output contains detection boxes candidates, it is a tensor +with the ``[-1,56,-1]`` shape in the ``B,56,N`` format, where: + +- ``B`` - batch size +- ``N`` - number of detection boxes + +For getting the final prediction, we need to apply a non-maximum +suppression algorithm and rescale box coordinates to the original image +size. + +After prediction detection box has the [``x``, ``y``, ``h``, ``w``, +``detection_precision``, ``class_id``, ``keypoint_1_x``, +``keypoint_1_y``, ``keypoint_1_score``, …, ``keypoint_17_x``, +``keypoint_17_y``, ``keypoint_17_score``] format, where: + +- (``x``, ``y``) - raw coordinates of box center +- ``h``, ``w`` - raw height and width of the box +- ``detection_precision`` - probability distribution over the classes +- ``class_id`` - in this case class could be only one, it is ``person`` +- (``keypoint_1_x``, ``keypoint_1_y``) - raw coordinates for one of 17 + keypoints +- ``keypoint_1_score`` - the confidence scores + +.. code:: ipython3 + + from ultralytics.utils import ops + + def postprocess( + pred_boxes:np.ndarray, + input_hw:Tuple[int, int], + orig_img:np.ndarray, + min_conf_threshold:float = 0.25, + nms_iou_threshold:float = 0.45, + agnosting_nms:bool = False, + max_detections:int = 80, + ): + """ + YOLOv8 model postprocessing function. Applied non maximum supression algorithm to detections and rescale boxes to original image size + Parameters: + pred_boxes (np.ndarray): model output prediction boxes + input_hw (np.ndarray): preprocessed image + orig_image (np.ndarray): image before preprocessing + min_conf_threshold (float, *optional*, 0.25): minimal accepted confidence for object filtering + nms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMS + agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not + max_detections (int, *optional*, 300): maximum detections after NMS + Returns: + pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] and + kpt - 17 keypoints in format [x1, y1, score1] + """ + nms_kwargs = {"agnostic": agnosting_nms, "max_det":max_detections} + preds = ops.non_max_suppression( + torch.from_numpy(pred_boxes), + min_conf_threshold, + nms_iou_threshold, + nc=1, + **nms_kwargs + ) + + results = [] + + kpt_shape = [17, 3] + for i, pred in enumerate(preds): + shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape + pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() + pred_kpts = pred[:, 6:].view(len(pred), *kpt_shape) if len(pred) else pred[:, 6:] + pred_kpts = ops.scale_coords(input_hw, pred_kpts, shape) + results.append({"box": pred[:, :6].numpy(), 'kpt': pred_kpts.numpy()}) + + return results + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + import openvino as ov + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +Test on single image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now, once we have defined preprocessing and postprocessing steps, we are +ready to check model prediction. + +.. code:: ipython3 + + core = ov.Core() + pose_ov_model = core.read_model(pose_model_path) + if device.value != "CPU": + pose_ov_model.reshape({0: [1, 3, 640, 640]}) + pose_compiled_model = core.compile_model(pose_ov_model, device.value) + + + def detect(image:np.ndarray, model:ov.Model): + """ + OpenVINO YOLOv8 model inference function. Preprocess image, runs model inference and postprocess results using NMS. + Parameters: + image (np.ndarray): input image. + model (Model): OpenVINO compiled model. + Returns: + detections (np.ndarray): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] and + kpt - 17 keypoints in format [x1, y1, score1] + """ + preprocessed_image = preprocess_image(image) + input_tensor = image_to_tensor(preprocessed_image) + result = model(input_tensor) + boxes = result[model.output(0)] + input_hw = input_tensor.shape[2:] + detections = postprocess(pred_boxes=boxes, input_hw=input_hw, orig_img=image) + return detections + + input_image = np.array(Image.open(IMAGE_PATH)) + detections = detect(input_image, pose_compiled_model)[0] + image_with_boxes = draw_results(detections, input_image, label_map) + + Image.fromarray(image_with_boxes) + + + + +.. image:: 230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.png + + + +Great! The result is the same, as produced by original models. + +Check model accuracy on the dataset +----------------------------------------------------------------------------- + +For comparing the optimized model result with the original, it is good +to know some measurable results in terms of model accuracy on the +validation dataset. + +Download the validation dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +YOLOv8 is pre-trained on the COCO dataset, so to evaluate the model +accuracy we need to download it. According to the instructions provided +in the YOLOv8 repo, we also need to download annotations in the format +used by the author of the model, for use with the original model +evaluation function. + + **Note**: The initial dataset download may take a few minutes to + complete. The download speed will vary depending on the quality of + your internet connection. + +.. code:: ipython3 + + from zipfile import ZipFile + + DATA_URL = "http://images.cocodataset.org/zips/val2017.zip" + LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip" + CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/8ebe94d1e928687feaa1fee6d5668987df5e43be/ultralytics/datasets/coco-pose.yaml" + + OUT_DIR = Path('./datasets') + + DATA_PATH = OUT_DIR / "val2017.zip" + LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip" + CFG_PATH = OUT_DIR / "coco-pose.yaml" + + download_file(DATA_URL, DATA_PATH.name, DATA_PATH.parent) + download_file(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent) + download_file(CFG_URL, CFG_PATH.name, CFG_PATH.parent) + + if not (OUT_DIR / "coco/labels").exists(): + with ZipFile(LABELS_PATH , "r") as zip_ref: + zip_ref.extractall(OUT_DIR) + with ZipFile(DATA_PATH , "r") as zip_ref: + zip_ref.extractall(OUT_DIR / 'coco/images') + + +.. parsed-literal:: + + 'datasets/val2017.zip' already exists. + 'datasets/coco2017labels-segments.zip' already exists. + + + +.. parsed-literal:: + + datasets/coco-pose.yaml: 0%| | 0.00/781 [00:00\ **Note**: Model evaluation is time consuming +process and can take several minutes, depending on the hardware. For +reducing calculation time, we define ``num_samples`` parameter with +evaluation subset size, but in this case, accuracy can be noncomparable +with originally reported by the authors of the model, due to validation +subset difference. *To validate the models on the full dataset set +``NUM_TEST_SAMPLES = None``.* + +.. code:: ipython3 + + NUM_TEST_SAMPLES = 300 + +.. code:: ipython3 + + fp_pose_stats = test(pose_ov_model, core, pose_data_loader, pose_validator, num_samples=NUM_TEST_SAMPLES) + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00`__ provides a suite of +advanced algorithms for Neural Networks inference optimization in +OpenVINO with minimal accuracy drop. We will use 8-bit quantization in +post-training mode (without the fine-tuning pipeline) to optimize +YOLOv8. + +The optimization process contains the following steps: + +1. Create a Dataset for quantization. +2. Run ``nncf.quantize`` for getting an optimized model. +3. Serialize OpenVINO IR model, using the ``openvino.runtime.serialize`` + function. + +Reuse validation dataloader in accuracy testing for quantization. For +that, it should be wrapped into the ``nncf.Dataset`` object and define a +transformation function for getting only input tensors. + +.. code:: ipython3 + + import nncf # noqa: F811 + from typing import Dict + + + def transform_fn(data_item:Dict): + """ + Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. + Parameters: + data_item: Dict with data item produced by DataLoader during iteration + Returns: + input_tensor: Input data for quantization + """ + input_tensor = pose_validator.preprocess(data_item)['img'].numpy() + return input_tensor + + + quantization_dataset = nncf.Dataset(pose_data_loader, transform_fn) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +The ``nncf.quantize`` function provides an interface for model +quantization. It requires an instance of the OpenVINO Model and +quantization dataset. Optionally, some additional parameters for the +configuration quantization process (number of samples for quantization, +preset, ignored scope, etc.) can be provided. YOLOv8 model contains +non-ReLU activation functions, which require asymmetric quantization of +activations. To achieve a better result, we will use a ``mixed`` +quantization preset. It provides symmetric quantization of weights and +asymmetric quantization of activations. For more accurate results, we +should keep the operation in the postprocessing subgraph in floating +point precision, using the ``ignored_scope`` parameter. + + **Note**: Model post-training quantization is time-consuming process. + Be patient, it can take several minutes depending on your hardware. + +.. code:: ipython3 + + ignored_scope = nncf.IgnoredScope( + types=["Multiply", "Subtract", "Sigmoid"], # ignore operations + names=[ + "/model.22/dfl/conv/Conv", # in the post-processing subgraph + "/model.22/Add", + "/model.22/Add_1", + "/model.22/Add_2", + "/model.22/Add_3", + "/model.22/Add_4", + "/model.22/Add_5", + "/model.22/Add_6", + "/model.22/Add_7", + "/model.22/Add_8", + "/model.22/Add_9", + "/model.22/Add_10" + ] + ) + + + # Detection model + quantized_pose_model = nncf.quantize( + pose_ov_model, + quantization_dataset, + preset=nncf.QuantizationPreset.MIXED, + ignored_scope=ignored_scope + ) + + +.. parsed-literal:: + + INFO:nncf:12 ignored nodes was found by name in the NNCFGraph + INFO:nncf:12 ignored nodes was found by types in the NNCFGraph + INFO:nncf:Not adding activation input quantizer for operation: 134 /model.22/Mul_6 + 145 /model.22/Add_12 + + INFO:nncf:Not adding activation input quantizer for operation: 135 /model.22/Sigmoid_1 + INFO:nncf:Not adding activation input quantizer for operation: 156 /model.22/Mul_7 + INFO:nncf:Not adding activation input quantizer for operation: 144 /model.22/Sigmoid + INFO:nncf:Not adding activation input quantizer for operation: 174 /model.22/dfl/conv/Conv + INFO:nncf:Not adding activation input quantizer for operation: 196 /model.22/Sub + INFO:nncf:Not adding activation input quantizer for operation: 197 /model.22/Add_10 + INFO:nncf:Not adding activation input quantizer for operation: 212 /model.22/Sub_1 + INFO:nncf:Not adding activation input quantizer for operation: 239 /model.22/Mul_5 + + +.. parsed-literal:: + + Statistics collection: 3%|███▉ | 8/300 [00:01<00:38, 7.55it/s] + Applying Fast Bias correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 72/72 [00:03<00:00, 19.73it/s] + + +.. code:: ipython3 + + from openvino.runtime import serialize + int8_model_pose_path = models_dir / f'{POSE_MODEL_NAME}_openvino_int8_model/{POSE_MODEL_NAME}.xml' + print(f"Quantized keypoint detection model will be saved to {int8_model_pose_path}") + serialize(quantized_pose_model, str(int8_model_pose_path)) + + +.. parsed-literal:: + + Quantized keypoint detection model will be saved to models/yolov8n-pose_openvino_int8_model/yolov8n-pose.xml + + +Validate Quantized model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``nncf.quantize`` returns the OpenVINO Model class instance, which is +suitable for loading on a device for making predictions. ``INT8`` model +input data and output result formats have no difference from the +floating point model representation. Therefore, we can reuse the same +``detect`` function defined above for getting the ``INT8`` model result +on the image. + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + if device.value != "CPU": + quantized_pose_model.reshape({0: [1, 3, 640, 640]}) + quantized_pose_compiled_model = core.compile_model(quantized_pose_model, device.value) + input_image = np.array(Image.open(IMAGE_PATH)) + detections = detect(input_image, quantized_pose_compiled_model)[0] + image_with_boxes = draw_results(detections, input_image, label_map) + + Image.fromarray(image_with_boxes) + + + + +.. image:: 230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.png + + + +Compare the Original and Quantized Models +----------------------------------------------------------------------------------- + +Compare performance of the Original and Quantized Models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Finally, use the OpenVINO `Benchmark +Tool `__ +to measure the inference performance of the ``FP32`` and ``INT8`` +models. + + **Note**: For more accurate performance, it is recommended to run + ``benchmark_app`` in a terminal/command prompt after closing other + applications. Run + ``benchmark_app -m -d CPU -shape ""`` to + benchmark async inference on CPU on specific input data shape for one + minute. Change ``CPU`` to ``GPU`` to benchmark on GPU. Run + ``benchmark_app --help`` to see an overview of all command-line + options. + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + # Inference FP32 model (OpenVINO IR) + !benchmark_app -m $pose_model_path -d $device.value -api async -shape "[1,3,640,640]" + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ WARNING ] Default duration 120 seconds is used for unknown device AUTO + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 17.85 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [?,3,?,?] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [?,56,?] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'images': [1,3,640,640] + [ INFO ] Reshape model took 11.94 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,56,8400] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 410.27 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 12 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 120000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 33.91 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 18420 iterations + [ INFO ] Duration: 120067.97 ms + [ INFO ] Latency: + [ INFO ] Median: 74.24 ms + [ INFO ] Average: 78.05 ms + [ INFO ] Min: 39.74 ms + [ INFO ] Max: 165.06 ms + [ INFO ] Throughput: 153.41 FPS + + +.. code:: ipython3 + + # Inference INT8 model (OpenVINO IR) + !benchmark_app -m $int8_model_pose_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 + + +.. parsed-literal:: + + [Step 1/11] Parsing and validating input arguments + [ INFO ] Parsing input parameters + [Step 2/11] Loading OpenVINO Runtime + [ INFO ] OpenVINO: + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] Device info: + [ INFO ] AUTO + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 + [ INFO ] + [ INFO ] + [Step 3/11] Setting device configuration + [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. + [Step 4/11] Reading model files + [ INFO ] Loading model files + [ INFO ] Read model took 29.51 ms + [ INFO ] Original model I/O parameters: + [ INFO ] Model inputs: + [ INFO ] images (node: images) : f32 / [...] / [1,3,?,?] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,56,21..] + [Step 5/11] Resizing model to match image sizes and given batch + [ INFO ] Model batch size: 1 + [ INFO ] Reshaping model: 'images': [1,3,640,640] + [ INFO ] Reshape model took 16.46 ms + [Step 6/11] Configuring input of the model + [ INFO ] Model inputs: + [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] + [ INFO ] Model outputs: + [ INFO ] output0 (node: output0) : f32 / [...] / [1,56,8400] + [Step 7/11] Loading the model to the device + [ INFO ] Compile model took 732.13 ms + [Step 8/11] Querying optimal runtime parameters + [ INFO ] Model: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 18 + [ INFO ] MULTI_DEVICE_PRIORITIES: CPU + [ INFO ] CPU: + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 18 + [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 18 + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT + [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False + [Step 9/11] Creating infer requests and preparing input tensors + [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! + [ INFO ] Fill input 'images' with random values + [Step 10/11] Measuring performance (Start inference asynchronously, 18 inference requests, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 26.46 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 6426 iterations + [ INFO ] Duration: 15072.05 ms + [ INFO ] Latency: + [ INFO ] Median: 40.12 ms + [ INFO ] Average: 42.00 ms + [ INFO ] Min: 27.49 ms + [ INFO ] Max: 121.32 ms + [ INFO ] Throughput: 426.35 FPS + + +Compare accuracy of the Original and Quantized Models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As we can see, there is no significant difference between ``INT8`` and +float model result in a single image test. To understand how +quantization influences model prediction precision, we can compare model +accuracy on a dataset. + +.. code:: ipython3 + + int8_pose_stats = test(quantized_pose_model, core, pose_data_loader, pose_validator, num_samples=NUM_TEST_SAMPLES) + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00`__ + +Preprocessing API enables making preprocessing a part of the model +reducing application code and dependency on additional image processing +libraries. The main advantage of Preprocessing API is that preprocessing +steps will be integrated into the execution graph and will be performed +on a selected device (CPU/GPU etc.) rather than always being executed on +CPU as part of an application. This will also improve selected device +utilization. For more information, refer to the overview of +`Preprocessing API +tutorial <118-optimize-preprocessing-with-output.html>`__. +To see, how it could be used with YOLOV8 object detection model , +please, see `Convert and Optimize YOLOv8 real-time object detection with +OpenVINO tutorial <./230-yolov8-object-detection.ipynb>`__ + +Live demo +--------------------------------------------------- + +The following code runs model inference on a video: + +.. code:: ipython3 + + import collections + import time + from IPython import display + + + def run_keypoint_detection(source=0, flip=False, use_popup=False, skip_first_frames=0, model=pose_model, device=device.value): + player = None + if device != "CPU": + model.reshape({0: [1, 3, 640, 640]}) + compiled_model = core.compile_model(model, device) + try: + # Create a video player to play with target fps. + player = VideoPlayer( + source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames + ) + # Start capturing. + player.start() + if use_popup: + title = "Press ESC to Exit" + cv2.namedWindow( + winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE + ) + + processing_times = collections.deque() + while True: + # Grab the frame. + frame = player.next() + if frame is None: + print("Source ended") + break + # If the frame is larger than full HD, reduce size to improve the performance. + scale = 1280 / max(frame.shape) + if scale < 1: + frame = cv2.resize( + src=frame, + dsize=None, + fx=scale, + fy=scale, + interpolation=cv2.INTER_AREA, + ) + # Get the results. + input_image = np.array(frame) + + start_time = time.time() + # model expects RGB image, while video capturing in BGR + detections = detect(input_image[:, :, ::-1], compiled_model)[0] + stop_time = time.time() + + image_with_boxes = draw_results(detections, input_image, label_map) + frame = image_with_boxes + + processing_times.append(stop_time - start_time) + # Use processing times from last 200 frames. + if len(processing_times) > 200: + processing_times.popleft() + + _, f_width = frame.shape[:2] + # Mean processing time [ms]. + processing_time = np.mean(processing_times) * 1000 + fps = 1000 / processing_time + cv2.putText( + img=frame, + text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", + org=(20, 40), + fontFace=cv2.FONT_HERSHEY_COMPLEX, + fontScale=f_width / 1000, + color=(0, 0, 255), + thickness=1, + lineType=cv2.LINE_AA, + ) + # Use this workaround if there is flickering. + if use_popup: + cv2.imshow(winname=title, mat=frame) + key = cv2.waitKey(1) + # escape = 27 + if key == 27: + break + else: + # Encode numpy array to jpg. + _, encoded_img = cv2.imencode( + ext=".jpg", img=frame, params=[cv2.IMWRITE_JPEG_QUALITY, 100] + ) + # Create an IPython image. + i = display.Image(data=encoded_img) + # Display the image in this notebook. + display.clear_output(wait=True) + display.display(i) + # ctrl-c + except KeyboardInterrupt: + print("Interrupted") + # any different error + except RuntimeError as e: + print(e) + finally: + if player is not None: + # Stop capturing. + player.stop() + if use_popup: + cv2.destroyAllWindows() + +Run Keypoint Detection on video +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + VIDEO_SOURCE = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4' + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + run_keypoint_detection(source=VIDEO_SOURCE, flip=True, use_popup=False, model=pose_ov_model, device=device.value) + + + +.. image:: 230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_62_0.png + + +.. parsed-literal:: + + Source ended + diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.jpg b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.jpg new file mode 100644 index 00000000000000..8416ee4928dfe7 --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef1c9d780c6229982db4e0488e80876cfbd839774c29e0520f2982d2d73b5af2 +size 58622 diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.png b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.png new file mode 100644 index 00000000000000..23025daded4ec5 --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_11_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65197279f340bc0c28da77a9f7e38b48dc658e67510cee2785ea2666d1230c24 +size 581068 diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.jpg b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.jpg new file mode 100644 index 00000000000000..84e6299859fdff --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b1a72d1576ff64ad3a39ce2cd2a6ba3ec52064658f0b95a959d7d400c7b0f24 +size 58280 diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.png b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.png new file mode 100644 index 00000000000000..1ef752a814c6e9 --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aad0af4692c4f8bca8f3f9743d13e1d47458ab2021b055d2a2a9cee6e5ee95b +size 584143 diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.jpg b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.jpg new file mode 100644 index 00000000000000..ed7fc33c8f74b5 --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e372bab673bb6fc868de2090d47aa8168704c96505e5b3088f7456ad857736d +size 58058 diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.png b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.png new file mode 100644 index 00000000000000..3ea38c71af7cef --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_46_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac27f8c1d426e82197bb9ef05813969879300612ecefd669bafd83312b1fda2 +size 584051 diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_62_0.png b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_62_0.png new file mode 100644 index 00000000000000..34cbe3211461c0 --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/230-yolov8-keypoint-detection-with-output_62_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91473c690b959bd3c1d99007c05511389f82e3c449eb61e4ef4988aa3bc5f31 +size 490879 diff --git a/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/index.html b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/index.html new file mode 100644 index 00000000000000..f6787514b14ade --- /dev/null +++ b/docs/notebooks/230-yolov8-keypoint-detection-with-output_files/index.html @@ -0,0 +1,13 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/230-yolov8-keypoint-detection-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/230-yolov8-keypoint-detection-with-output_files/


../
+230-yolov8-keypoint-detection-with-output_11_1.jpg 31-Oct-2023 00:35               58622
+230-yolov8-keypoint-detection-with-output_11_1.png 31-Oct-2023 00:35              581068
+230-yolov8-keypoint-detection-with-output_22_0.jpg 31-Oct-2023 00:35               58280
+230-yolov8-keypoint-detection-with-output_22_0.png 31-Oct-2023 00:35              584143
+230-yolov8-keypoint-detection-with-output_46_0.jpg 31-Oct-2023 00:35               58058
+230-yolov8-keypoint-detection-with-output_46_0.png 31-Oct-2023 00:35              584051
+230-yolov8-keypoint-detection-with-output_62_0.png 31-Oct-2023 00:35              490879
+

+ diff --git a/docs/notebooks/230-yolov8-optimization-with-output.rst b/docs/notebooks/230-yolov8-object-detection-with-output.rst similarity index 55% rename from docs/notebooks/230-yolov8-optimization-with-output.rst rename to docs/notebooks/230-yolov8-object-detection-with-output.rst index 54ba88729f8191..8a7e28c20834ca 100644 --- a/docs/notebooks/230-yolov8-optimization-with-output.rst +++ b/docs/notebooks/230-yolov8-object-detection-with-output.rst @@ -1,98 +1,90 @@ -Convert and Optimize YOLOv8 with OpenVINO™ -========================================== +Convert and Optimize YOLOv8 real-time object detection with OpenVINO™ +===================================================================== - - -The YOLOv8 algorithm developed by Ultralytics is a cutting-edge, -state-of-the-art (SOTA) model that is designed to be fast, accurate, and -easy to use, making it an excellent choice for a wide range of object -detection, image segmentation, and image classification tasks. - -YOLO stands for “You Only Look Once”, it is a popular family of -real-time object detection algorithms. The original YOLO object detector -was first released in 2016. Since then, different versions and variants -of YOLO have been proposed, each providing a significant increase in -performance and efficiency. YOLOv8 builds upon the success of previous -YOLO versions and introduces new features and improvements to further -boost performance and flexibility. More details about its realization -can be found in the original model -`repository `__. - -Real-time object detection and instance segmentation are often used as -key components in computer vision systems. Applications that use -real-time object detection models include video analytics, robotics, -autonomous vehicles, multi-object tracking and object counting, medical -image analysis, and many others. +Real-time object detection is often used as a key component in computer +vision systems. Applications that use real-time object detection models +include video analytics, robotics, autonomous vehicles, multi-object +tracking and object counting, medical image analysis, and many others. This tutorial demonstrates step-by-step instructions on how to run and optimize PyTorch YOLOv8 with OpenVINO. We consider the steps required -for object detection and instance segmentation scenarios. +for object detection scenario. The tutorial consists of the following steps: -- Prepare the PyTorch model. -- Download and prepare a dataset. -- Validate the original model. -- Convert the PyTorch model to OpenVINO IR. -- Validate the converted model. -- Prepare and run optimization pipeline. -- Compare performance of the FP32 and quantized models. -- Compare accuracy of the FP32 and quantized models. +- Prepare the PyTorch model. +- Download and prepare a dataset. +- Validate the original model. +- Convert the PyTorch model to OpenVINO IR. +- Validate the converted model. +- Prepare and run optimization pipeline. +- Compare performance ofthe FP32 and quantized models. +- Compare accuracy of the FP32 and quantized models. +- Other optimization possibilities with OpenVINO api +- Live demo + +**Table of contents:** + -.. _top: +- `Get PyTorch model <#get-pytorch-model>`__ -**Table of contents**: + - `Prerequisites <#prerequisites>`__ -- `Get Pytorch model <#get-pytorch-model>`__ -- `Prerequisites <#prerequisites>`__ -- `Instantiate model <#instantiate-model>`__ +- `Instantiate model <#instantiate-model>`__ - - `Object detection <#object-detection>`__ - - `Instance Segmentation: <#instance-segmentation>`__ - - `Convert model to OpenVINO IR <#convert-model-to-openvino-ir>`__ - - `Verify model inference <#verify-model-inference>`__ - - `Preprocessing <#preprocessing>`__ - - `Postprocessing <#postprocessing>`__ - - `Select inference device <#select-inference-device>`__ - - `Test on single image <#test-on-single-image>`__ - - `Check model accuracy on the dataset <#check-model-accuracy-on-the-dataset>`__ + - `Convert model to OpenVINO + IR <#convert-model-to-openvino-ir>`__ + - `Verify model inference <#verify-model-inference>`__ + - `Preprocessing <#preprocessing>`__ + - `Postprocessing <#postprocessing>`__ + - `Select inference device <#select-inference-device>`__ + - `Test on single image <#test-on-single-image>`__ - - `Download the validation dataset <#download-the-validation-dataset>`__ - - `Define validation function <#define-validation-function>`__ - - `Configure Validator helper and create DataLoader <#configure-validator-helper-and-create-dataloader>`__ +- `Check model accuracy on the + dataset <#check-model-accuracy-on-the-dataset>`__ - - `Optimize model using NNCF Post-training Quantization API <#optimize-model-using-nncf-post-training-quantization-api>`__ - - `Validate Quantized model inference <#validate-quantized-model-inference>`__ + - `Download the validation + dataset <#download-the-validation-dataset>`__ + - `Define validation + function <#define-validation-function>`__ + - `Configure Validator helper and create + DataLoader <#configure-validator-helper-and-create-dataloader>`__ - - `Object detection: <#object-detection>`__ - - `Instance segmentation: <#instance-segmentation>`__ +- `Optimize model using NNCF Post-training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ - - `Compare Performance of the Original and Quantized Models <#compare-performance-of-the-original-and-quantized-models>`__ + - `Validate Quantized model + inference <#validate-quantized-model-inference>`__ - - `Compare performance object detection models <#compare-performance-object-detection-models>`__ - - `Instance segmentation <#instance-segmentation>`__ +- `Compare the Original and Quantized + Models <#compare-the-original-and-quantized-models>`__ - - `Validate quantized model accuracy <#validate-quantized-model-accuracy>`__ - - `Object detection <#object-detection>`__ - - `Instance segmentation <#instance-segmentation>`__ + - `Compare performance object detection + models <#compare-performance-object-detection-models>`__ + - `Validate quantized model + accuracy <#validate-quantized-model-accuracy>`__ -- `Next steps <#next-steps>`__ -- `Async inference pipeline <#async-inference-pipeline>`__ -- `Integration preprocessing to model <#integration-preprocessing-to-model>`__ +- `Next steps <#next-steps>`__ - - `Initialize PrePostProcessing API <#initialize-prepostprocessing-api>`__ - - `Define input data format <#define-input-data-format>`__ - - `Describe preprocessing steps <#describe-preprocessing-steps>`__ - - `Integrating Steps into a Model <#integrating-steps-into-a-model>`__ + - `Async inference pipeline <#async-inference-pipeline>`__ + - `Integration preprocessing to + model <#integration-preprocessing-to-model>`__ -- `Live demo <#live-demo>`__ -- `Run <#run>`__ + - `Initialize PrePostProcessing + API <#initialize-prepostprocessing-api>`__ + - `Define input data + format <#define-input-data-format>`__ + - `Describe preprocessing + steps <#describe-preprocessing-steps>`__ + - `Integrating Steps into a + Model <#integrating-steps-into-a-model>`__ - - `Run Live Object Detection and Segmentation <#run-live-object-detection-and-segmentation>`__ +- `Live demo <#live-demo>`__ -Get Pytorch model `⇑ <#top>`__ -############################################################################################################################### + - `Run Live Object Detection <#run-live-object-detection>`__ +Get PyTorch model +----------------------------------------------------------- Generally, PyTorch models represent an instance of the `torch.nn.Module `__ @@ -101,28 +93,24 @@ the YOLOv8 nano model (also known as ``yolov8n``) pre-trained on a COCO dataset, which is available in this `repo `__. Similar steps are also applicable to other YOLOv8 models. Typical steps to obtain a -pre-trained model: - -1. Create an instance of a model class. -2. Load a checkpoint state dict, which contains the pre-trained model - weights. -3. Turn the model to evaluation for switching some operations to - inference mode. +pre-trained model: 1. Create an instance of a model class. 2. Load a +checkpoint state dict, which contains the pre-trained model weights. 3. +Turn the model to evaluation for switching some operations to inference +mode. In this case, the creators of the model provide an API that enables converting the YOLOv8 model to ONNX and then to OpenVINO IR. Therefore, we do not need to do these steps manually. -Prerequisites `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Prerequisites +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Install necessary packages. .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" "nncf>=2.5.0" - !pip install -q "ultralytics==8.0.43" onnx + %pip install -q "openvino>=2023.1.0" "nncf>=2.5.0" + %pip install -q "ultralytics==8.0.43" onnx Import required utility functions. The lower cell will download the ``notebook_utils`` Python module from GitHub. @@ -147,18 +135,18 @@ Define utility functions for drawing results from typing import Tuple, Dict import cv2 import numpy as np - from PIL import Image from ultralytics.yolo.utils.plotting import colors - def plot_one_box(box:np.ndarray, img:np.ndarray, color:Tuple[int, int, int] = None, mask:np.ndarray = None, label:str = None, line_thickness:int = 5): + def plot_one_box(box:np.ndarray, img:np.ndarray, + color:Tuple[int, int, int] = None, + label:str = None, line_thickness:int = 5): """ Helper function for drawing single bounding box on image Parameters: x (np.ndarray): bounding box coordinates in format [x1, y1, x2, y2] img (no.ndarray): input image color (Tuple[int, int, int], *optional*, None): color in BGR format for drawing box, if not specified will be selected randomly - mask (np.ndarray, *optional*, None): instance segmentation mask polygon in format [N, 2], where N - number of points in contour, if not provided, only box will be drawn label (str, *optonal*, None): box label string, if not provided will not be provided as drowing result line_thickness (int, *optional*, 5): thickness for box drawing lines """ @@ -173,11 +161,7 @@ Define utility functions for drawing results c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) - if mask is not None: - image_with_mask = img.copy() - mask - cv2.fillPoly(image_with_mask, pts=[mask.astype(int)], color=color) - img = cv2.addWeighted(img, 0.5, image_with_mask, 0.5, 1) + return img @@ -189,15 +173,12 @@ Define utility functions for drawing results source_image (np.ndarray): input image for drawing label_map; (Dict[int, str]): label_id to class name mapping Returns: - + Image with boxes """ boxes = results["det"] - masks = results.get("segment") - h, w = source_image.shape[:2] for idx, (*xyxy, conf, lbl) in enumerate(boxes): label = f'{label_map[int(lbl)]} {conf:.2f}' - mask = masks[idx] if masks is not None else None - source_image = plot_one_box(xyxy, source_image, mask=mask, label=label, color=colors(int(lbl)), line_thickness=1) + source_image = plot_one_box(xyxy, source_image, label=label, color=colors(int(lbl)), line_thickness=1) return source_image .. code:: ipython3 @@ -224,21 +205,20 @@ Define utility functions for drawing results -Instantiate model `⇑ <#top>`__ -############################################################################################################################### - +Instantiate model +----------------------------------------------------------- -There are several models available in the original repository, targeted -for different tasks. For loading the model, required to specify a path -to the model checkpoint. It can be some local path or name available on -models hub (in this case model checkpoint will be downloaded -automatically). +There are `several +models `__ available in the +original repository, targeted for different tasks. For loading the +model, required to specify a path to the model checkpoint. It can be +some local path or name available on models hub (in this case model +checkpoint will be downloaded automatically). Making prediction, the model accepts a path to input image and returns list with Results class object. Results contains boxes for object -detection model and boxes and masks for segmentation model. Also it -contains utilities for processing results, for example, ``plot()`` -method for drawing. +detection model. Also it contains utilities for processing results, for +example, ``plot()`` method for drawing. Let us consider the examples: @@ -247,12 +227,9 @@ Let us consider the examples: models_dir = Path('./models') models_dir.mkdir(exist_ok=True) -Object detection `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - .. code:: ipython3 + from PIL import Image from ultralytics import YOLO DET_MODEL_NAME = "yolov8n" @@ -266,50 +243,25 @@ Object detection `⇑ <#top>`__ .. parsed-literal:: - Ultralytics YOLOv8.0.43 🚀 Python-3.8.10 torch-1.13.1+cpu CPU + 2023-10-05 19:15:51.230030: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-05 19:15:51.269549: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-05 19:15:51.909328: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Ultralytics YOLOv8.0.43 🚀 Python-3.8.10 torch-2.0.1+cpu CPU YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs - image 1/1 /home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 1 dog, 43.6ms - Speed: 0.5ms preprocess, 43.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640) - - - - -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_13_1.png - - - -Instance Segmentation: `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -.. code:: ipython3 - - SEG_MODEL_NAME = "yolov8n-seg" - - seg_model = YOLO(models_dir / f'{SEG_MODEL_NAME}.pt') - res = seg_model(IMAGE_PATH) - Image.fromarray(res[0].plot()[:, :, ::-1]) - - -.. parsed-literal:: - - Ultralytics YOLOv8.0.43 🚀 Python-3.8.10 torch-1.13.1+cpu CPU - YOLOv8n-seg summary (fused): 195 layers, 3404320 parameters, 0 gradients, 12.6 GFLOPs - - image 1/1 /home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/data/coco_bike.jpg: 480x640 1 bicycle, 2 cars, 1 dog, 43.2ms - Speed: 0.5ms preprocess, 43.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640) + image 1/1 /home/ea/work/openvino_notebooks/notebooks/230-yolov8-optimization/data/coco_bike.jpg: 480x640 2 bicycles, 2 cars, 1 dog, 48.7ms + Speed: 2.6ms preprocess, 48.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640) -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_15_1.png +.. image:: 230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.png -Convert model to OpenVINO IR `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Convert model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ YOLOv8 provides API for convenient model exporting to different formats including OpenVINO IR. ``model.export`` is responsible for model @@ -323,35 +275,20 @@ preserve dynamic shapes in the model. if not det_model_path.exists(): det_model.export(format="openvino", dynamic=True, half=False) -.. code:: ipython3 - - # instance segmentation model - seg_model_path = models_dir / f"{SEG_MODEL_NAME}_openvino_model/{SEG_MODEL_NAME}.xml" - if not seg_model_path.exists(): - seg_model.export(format="openvino", dynamic=True, half=False) - -Verify model inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Verify model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To test model work, we create inference pipeline similar to ``model.predict`` method. The pipeline consists of preprocessing step, inference of OpenVINO model and results post-processing to get results. -The main difference in models for object detection and instance -segmentation is postprocessing part. Input specification and -preprocessing are common for both cases. - -Preprocessing `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Model input is a tensor with the ``[-1, 3, -1, -1]`` shape in the -``N, C, H, W`` format, where - -- ``N`` - number of images in batch (batch size) -- ``C`` - image channels -- ``H`` - image height -- ``W`` - image width +``N, C, H, W`` format, where \* ``N`` - number of images in batch (batch +size) \* ``C`` - image channels \* ``H`` - image height \* ``W`` - image +width The model expects images in RGB channels format and normalized in [0, 1] range. Although the model supports dynamic input shape with preserving @@ -459,9 +396,8 @@ To keep a specific shape, preprocessing automatically enables padding. input_tensor = np.expand_dims(input_tensor, 0) return input_tensor -Postprocessing `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Postprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The model output contains detection boxes candidates, it is a tensor with the ``[-1,84,-1]`` shape in the ``B,84,N`` format, where: @@ -469,35 +405,20 @@ with the ``[-1,84,-1]`` shape in the ``B,84,N`` format, where: - ``B`` - batch size - ``N`` - number of detection boxes -Detection box has the [``x``, ``y``, ``h``, ``w``, ``class_no_1``, …, -``class_no_80``] format, where: - -- (``x``, ``y``) - raw coordinates of box center -- ``h``, ``w`` - raw height and width of the box -- ``class_no_1``, …, ``class_no_80`` - probability distribution over - the classes. - For getting the final prediction, we need to apply a non-maximum suppression algorithm and rescale box coordinates to the original image size. -The instance segmentation model, additionally, has an output that -contains proto mask candidates for instance segmentation. It should be -decoded by using box coordinates. It is a tensor with the -``[-1 32, -1, -1]`` shape in the ``B,C H,W`` format, where: +Finally, detection box has the [``x``, ``y``, ``h``, ``w``, +``class_no_1``, …, ``class_no_80``] format, where: -- ``B`` - batch size -- ``C`` - number of candidates -- ``H`` - mask height -- ``W`` - mask width +- (``x``, ``y``) - raw coordinates of box center +- ``h``, ``w`` - raw height and width of the box +- ``class_no_1``, …, ``class_no_80`` - probability distribution over + the classes. .. code:: ipython3 - try: - scale_segments = ops.scale_segments - except AttributeError: - scale_segments = ops.scale_coords - def postprocess( pred_boxes:np.ndarray, input_hw:Tuple[int, int], @@ -506,8 +427,6 @@ decoded by using box coordinates. It is a tensor with the nms_iou_threshold:float = 0.7, agnosting_nms:bool = False, max_detections:int = 300, - pred_masks:np.ndarray = None, - retina_mask:bool = False ): """ YOLOv8 model postprocessing function. Applied non maximum supression algorithm to detections and rescale boxes to original image size @@ -519,14 +438,10 @@ decoded by using box coordinates. It is a tensor with the nms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMS agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not max_detections (int, *optional*, 300): maximum detections after NMS - pred_masks (np.ndarray, *optional*, None): model ooutput prediction masks, if not provided only boxes will be postprocessed - retina_mask (bool, *optional*, False): retina mask postprocessing instead of native decoding Returns: - pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] and segment - segmentation polygons for each element in batch + pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] """ nms_kwargs = {"agnostic": agnosting_nms, "max_det":max_detections} - # if pred_masks is not None: - # nms_kwargs["nm"] = 32 preds = ops.non_max_suppression( torch.from_numpy(pred_boxes), min_conf_threshold, @@ -534,41 +449,29 @@ decoded by using box coordinates. It is a tensor with the nc=80, **nms_kwargs ) - results = [] - proto = torch.from_numpy(pred_masks) if pred_masks is not None else None + results = [] for i, pred in enumerate(preds): shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape if not len(pred): results.append({"det": [], "segment": []}) continue - if proto is None: - pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() - results.append({"det": pred}) - continue - if retina_mask: - pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() - masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], shape[:2]) # HWC - segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] - else: - masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], input_hw, upsample=True) - pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() - segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] - results.append({"det": pred[:, :6].numpy(), "segment": segments}) + pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() + results.append({"det": pred}) + return results -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +Select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - from openvino.runtime import Core + import openvino as ov - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -588,27 +491,23 @@ Select device from dropdown list for running inference using OpenVINO: -Test on single image `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Test on single image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, once we have defined preprocessing and postprocessing steps, we are -ready to check model prediction. - -First, object detection: +ready to check model prediction for object detection. .. code:: ipython3 - from openvino.runtime import Core, Model + core = ov.Core() - core = Core() det_ov_model = core.read_model(det_model_path) if device.value != "CPU": det_ov_model.reshape({0: [1, 3, 640, 640]}) det_compiled_model = core.compile_model(det_ov_model, device.value) - def detect(image:np.ndarray, model:Model): + def detect(image:np.ndarray, model:ov.Model): """ OpenVINO YOLOv8 model inference function. Preprocess image, runs model inference and postprocess results using NMS. Parameters: @@ -617,19 +516,14 @@ First, object detection: Returns: detections (np.ndarray): detected boxes in format [x1, y1, x2, y2, score, label] """ - num_outputs = len(model.outputs) preprocessed_image = preprocess_image(image) input_tensor = image_to_tensor(preprocessed_image) result = model(input_tensor) boxes = result[model.output(0)] - masks = None - if num_outputs > 1: - masks = result[model.output(1)] input_hw = input_tensor.shape[2:] - detections = postprocess(pred_boxes=boxes, input_hw=input_hw, orig_img=image, pred_masks=masks) + detections = postprocess(pred_boxes=boxes, input_hw=input_hw, orig_img=image) return detections - input_image = np.array(Image.open(IMAGE_PATH)) detections = detect(input_image, det_compiled_model)[0] image_with_boxes = draw_results(detections, input_image, label_map) @@ -639,47 +533,19 @@ First, object detection: -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_27_0.png - - - -Then, instance segmentation: - -.. code:: ipython3 - - seg_ov_model = core.read_model(seg_model_path) - if device.value != "CPU": - seg_ov_model.reshape({0: [1, 3, 640, 640]}) - seg_compiled_model = core.compile_model(seg_ov_model, device.value) - - - input_image = np.array(Image.open(IMAGE_PATH)) - detections = detect(input_image, seg_compiled_model)[0] - image_with_masks = draw_results(detections, input_image, label_map) - - Image.fromarray(image_with_boxes) - Image.fromarray(image_with_masks) - - - - -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_29_0.png +.. image:: 230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.png -Great! The result is the same, as produced by original models. - -Check model accuracy on the dataset `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Check model accuracy on the dataset +----------------------------------------------------------------------------- For comparing the optimized model result with the original, it is good to know some measurable results in terms of model accuracy on the validation dataset. -Download the validation dataset `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Download the validation dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ YOLOv8 is pre-trained on the COCO dataset, so to evaluate the model accuracy we need to download it. According to the instructions provided @@ -687,20 +553,17 @@ in the YOLOv8 repo, we also need to download annotations in the format used by the author of the model, for use with the original model evaluation function. -.. note:: - - The initial dataset download may take a few minutes to + **Note**: The initial dataset download may take a few minutes to complete. The download speed will vary depending on the quality of your internet connection. - .. code:: ipython3 from zipfile import ZipFile DATA_URL = "http://images.cocodataset.org/zips/val2017.zip" LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip" - CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml" + CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/8ebe94d1e928687feaa1fee6d5668987df5e43be/ultralytics/datasets/coco.yaml" OUT_DIR = Path('./datasets') @@ -731,9 +594,8 @@ evaluation function. datasets/coco.yaml: 0%| | 0.00/1.25k [00:00`__ -------------------------------------------------------------------------------------------------------------------------------- - +Define validation function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -741,13 +603,13 @@ Define validation function `⇑ <#top>`__ from ultralytics.yolo.utils.metrics import ConfusionMatrix - def test(model:Model, core:Core, data_loader:torch.utils.data.DataLoader, validator, num_samples:int = None): + def test(model:ov.Model, core:ov.Core, data_loader:torch.utils.data.DataLoader, validator, num_samples:int = None): """ OpenVINO YOLOv8 model accuracy validation function. Runs model validation on dataset and returns metrics Parameters: model (Model): OpenVINO model data_loader (torch.utils.data.DataLoader): dataset loader - validato: instalce of validator class + validator: instance of validator class num_samples (int, *optional*, None): validate model only on specified number samples, if provided Returns: stats: (Dict[str, float]) - dictionary with aggregated accuracy metrics statistics, key is metric name, value is metric value @@ -758,17 +620,13 @@ Define validation function `⇑ <#top>`__ validator.batch_i = 1 validator.confusion_matrix = ConfusionMatrix(nc=validator.nc) model.reshape({0: [1, 3, -1, -1]}) - num_outputs = len(model.outputs) compiled_model = core.compile_model(model) for batch_i, batch in enumerate(tqdm(data_loader, total=num_samples)): if num_samples is not None and batch_i == num_samples: break batch = validator.preprocess(batch) results = compiled_model(batch["img"]) - if num_outputs == 1: - preds = torch.from_numpy(results[compiled_model.output(0)]) - else: - preds = [torch.from_numpy(results[compiled_model.output(0)]), torch.from_numpy(results[compiled_model.output(1)])] + preds = torch.from_numpy(results[compiled_model.output(0)]) preds = validator.postprocess(preds) validator.update_metrics(preds, batch) stats = validator.get_stats() @@ -800,9 +658,8 @@ Define validation function `⇑ <#top>`__ pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format print(pf % ('all', total_images, total_objects, s_mp, s_mr, s_map50, s_mean_ap)) -Configure Validator helper and create DataLoader `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Configure Validator helper and create DataLoader +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The original model repository uses a ``Validator`` wrapper, which represents the accuracy validation pipeline. It creates dataloader and @@ -846,41 +703,14 @@ validator class instance. det_validator.metrics.names = det_validator.names det_validator.nc = det_model.model.model[-1].nc -.. code:: ipython3 - - seg_validator = seg_model.ValidatorClass(args=args) - seg_validator.data = check_det_dataset(args.data) - seg_data_loader = seg_validator.get_dataloader("datasets/coco/", 1) - - seg_validator.is_coco = True - seg_validator.class_map = ops.coco80_to_coco91_class() - seg_validator.names = seg_model.model.names - seg_validator.metrics.names = seg_validator.names - seg_validator.nc = seg_model.model.model[-1].nc - seg_validator.nm = 32 - seg_validator.process = ops.process_mask - seg_validator.plot_masks = [] - - -.. parsed-literal:: - - val: Scanning datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00\ **Note**: Model evaluation is time consuming +process and can take several minutes, depending on the hardware. For +reducing calculation time, we define ``num_samples`` parameter with +evaluation subset size, but in this case, accuracy can be noncomparable +with originally reported by the authors of the model, due to validation +subset difference. *To validate the models on the full dataset set +``NUM_TEST_SAMPLES = None``.* .. code:: ipython3 @@ -909,31 +739,6 @@ To validate the models on the full dataset set all 300 2145 0.594 0.543 0.579 0.417 -.. code:: ipython3 - - fp_seg_stats = test(seg_ov_model, core, seg_data_loader, seg_validator, num_samples=NUM_TEST_SAMPLES) - - - -.. parsed-literal:: - - 0%| | 0/300 [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Optimize model using NNCF Post-training Quantization API +-------------------------------------------------------------------------------------------------- `NNCF `__ provides a suite of advanced algorithms for Neural Networks inference optimization in @@ -967,8 +771,7 @@ The optimization process contains the following steps: Reuse validation dataloader in accuracy testing for quantization. For that, it should be wrapped into the ``nncf.Dataset`` object and define a -transformation function for getting only input tensors. As preprocessing -for both models is the same, we can reuse one dataset for both models. +transformation function for getting only input tensors. .. code:: ipython3 @@ -991,14 +794,6 @@ for both models is the same, we can reuse one dataset for both models. quantization_dataset = nncf.Dataset(det_data_loader, transform_fn) -.. parsed-literal:: - - 2023-07-14 18:41:29.274964: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-14 18:41:29.313487: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-14 18:41:29.989212: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -1016,12 +811,9 @@ asymmetric quantization of activations. For more accurate results, we should keep the operation in the postprocessing subgraph in floating point precision, using the ``ignored_scope`` parameter. -.. note:: - - Model post-training quantization is time-consuming process. + **Note**: Model post-training quantization is time-consuming process. Be patient, it can take several minutes depending on your hardware. - .. code:: ipython3 ignored_scope = nncf.IgnoredScope( @@ -1060,15 +852,14 @@ point precision, using the ``ignored_scope`` parameter. INFO:nncf:Not adding activation input quantizer for operation: 156 /model.22/dfl/conv/Conv INFO:nncf:Not adding activation input quantizer for operation: 178 /model.22/Sub INFO:nncf:Not adding activation input quantizer for operation: 179 /model.22/Add_10 - INFO:nncf:Not adding activation input quantizer for operation: 205 /model.22/Div_1 INFO:nncf:Not adding activation input quantizer for operation: 193 /model.22/Sub_1 INFO:nncf:Not adding activation input quantizer for operation: 218 /model.22/Mul_5 .. parsed-literal:: - Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:34<00:00, 8.79it/s] - Biases correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 22.46it/s] + Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:31<00:00, 9.54it/s] + Applying Fast Bias correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:03<00:00, 18.94it/s] .. code:: ipython3 @@ -1084,52 +875,8 @@ point precision, using the ``ignored_scope`` parameter. Quantized detection model will be saved to models/yolov8n_openvino_int8_model/yolov8n.xml -.. code:: ipython3 - - # Instance segmentation model - - quantized_seg_model = nncf.quantize( - seg_ov_model, - quantization_dataset, - preset=nncf.QuantizationPreset.MIXED, - ignored_scope=ignored_scope - ) - - -.. parsed-literal:: - - INFO:nncf:12 ignored nodes was found by name in the NNCFGraph - INFO:nncf:9 ignored nodes was found by types in the NNCFGraph - INFO:nncf:Not adding activation input quantizer for operation: 140 /model.22/Sigmoid - INFO:nncf:Not adding activation input quantizer for operation: 174 /model.22/dfl/conv/Conv - INFO:nncf:Not adding activation input quantizer for operation: 199 /model.22/Sub - INFO:nncf:Not adding activation input quantizer for operation: 200 /model.22/Add_10 - INFO:nncf:Not adding activation input quantizer for operation: 233 /model.22/Div_1 - INFO:nncf:Not adding activation input quantizer for operation: 217 /model.22/Sub_1 - INFO:nncf:Not adding activation input quantizer for operation: 250 /model.22/Mul_5 - - -.. parsed-literal:: - - Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:40<00:00, 7.45it/s] - Biases correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [00:03<00:00, 23.13it/s] - - -.. code:: ipython3 - - int8_model_seg_path = models_dir / f'{SEG_MODEL_NAME}_openvino_int8_model/{SEG_MODEL_NAME}.xml' - print(f"Quantized segmentation model will be saved to {int8_model_seg_path}") - serialize(quantized_seg_model, str(int8_model_seg_path)) - - -.. parsed-literal:: - - Quantized segmentation model will be saved to models/yolov8n-seg_openvino_int8_model/yolov8n-seg.xml - - -Validate Quantized model inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Validate Quantized model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``nncf.quantize`` returns the OpenVINO Model class instance, which is suitable for loading on a device for making predictions. ``INT8`` model @@ -1151,10 +898,6 @@ on the image. -Object detection: `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - .. code:: ipython3 if device.value != "CPU": @@ -1169,43 +912,22 @@ Object detection: `⇑ <#top>`__ -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_59_0.png - - - -Instance segmentation: `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - -.. code:: ipython3 - - if device.value != "CPU": - quantized_seg_model.reshape({0: [1, 3, 640, 640]}) - quantized_seg_compiled_model = core.compile_model(quantized_seg_model, device.value) - input_image = np.array(Image.open(IMAGE_PATH)) - detections = detect(input_image, quantized_seg_compiled_model)[0] - image_with_masks = draw_results(detections, input_image, label_map) - - Image.fromarray(image_with_masks) - - - +.. image:: 230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.png -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_61_0.png +Compare the Original and Quantized Models +----------------------------------------------------------------------------------- -Compare Performance of the Original and Quantized Models `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compare performance object detection models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Finally, use the OpenVINO `Benchmark -Tool `__ +Tool `__ to measure the inference performance of the ``FP32`` and ``INT8`` models. -.. note:: - - For more accurate performance, it is recommended to run + **Note**: For more accurate performance, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run ``benchmark_app -m -d CPU -shape ""`` to @@ -1214,11 +936,6 @@ models. ``benchmark_app --help`` to see an overview of all command-line options. - -Compare performance object detection models `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - .. code:: ipython3 device @@ -1245,18 +962,18 @@ Compare performance object detection models `⇑ <#top>`__ [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 16.88 ms + [ INFO ] Read model took 16.72 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: images) : f32 / [...] / [?,3,?,?] @@ -1265,53 +982,56 @@ Compare performance object detection models `⇑ <#top>`__ [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 11.45 ms + [ INFO ] Reshape model took 11.83 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] output0 (node: output0) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 410.99 ms + [ INFO ] Compile model took 424.62 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] MODEL_PRIORITY: Priority.MEDIUM [ INFO ] MULTI_DEVICE_PRIORITIES: CPU [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 12 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 12 [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 30.16 ms + [ INFO ] First inference took 34.66 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 19752 iterations - [ INFO ] Duration: 120070.55 ms + [ INFO ] Count: 15024 iterations + [ INFO ] Duration: 120272.02 ms [ INFO ] Latency: - [ INFO ] Median: 71.27 ms - [ INFO ] Average: 72.76 ms - [ INFO ] Min: 47.53 ms - [ INFO ] Max: 164.37 ms - [ INFO ] Throughput: 164.50 FPS + [ INFO ] Median: 77.25 ms + [ INFO ] Average: 95.83 ms + [ INFO ] Min: 60.23 ms + [ INFO ] Max: 270.56 ms + [ INFO ] Throughput: 124.92 FPS .. code:: ipython3 @@ -1326,18 +1046,18 @@ Compare performance object detection models `⇑ <#top>`__ [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.2.0-12690-0ee0b4d9561 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 27.47 ms + [ INFO ] Read model took 73.93 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: images) : f32 / [...] / [1,3,?,?] @@ -1346,236 +1066,66 @@ Compare performance object detection models `⇑ <#top>`__ [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 14.87 ms + [ INFO ] Reshape model took 56.37 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] [ INFO ] Model outputs: [ INFO ] output0 (node: output0) : f32 / [...] / [1,84,8400] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 681.89 ms + [ INFO ] Compile model took 1742.92 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 18 - [ INFO ] MODEL_PRIORITY: Priority.MEDIUM [ INFO ] MULTI_DEVICE_PRIORITIES: CPU [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 18 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] NUM_STREAMS: 18 [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 18 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] - [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! - [ INFO ] Fill input 'images' with random values - [Step 10/11] Measuring performance (Start inference asynchronously, 18 inference requests, limits: 15000 ms duration) - [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 20.61 ms - [Step 11/11] Dumping statistics report - [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 6282 iterations - [ INFO ] Duration: 15065.20 ms - [ INFO ] Latency: - [ INFO ] Median: 41.71 ms - [ INFO ] Average: 42.98 ms - [ INFO ] Min: 25.38 ms - [ INFO ] Max: 118.34 ms - [ INFO ] Throughput: 416.99 FPS - - -Instance segmentation `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - -.. code:: ipython3 - - !benchmark_app -m $seg_model_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 - - -.. parsed-literal:: - - [Step 1/11] Parsing and validating input arguments - [ INFO ] Parsing input parameters - [Step 2/11] Loading OpenVINO Runtime - [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 - [ INFO ] - [ INFO ] Device info: - [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 - [ INFO ] - [ INFO ] - [Step 3/11] Setting device configuration - [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. - [Step 4/11] Reading model files - [ INFO ] Loading model files - [ INFO ] Read model took 18.86 ms - [ INFO ] Original model I/O parameters: - [ INFO ] Model inputs: - [ INFO ] images (node: images) : f32 / [...] / [?,3,?,?] - [ INFO ] Model outputs: - [ INFO ] output0 (node: output0) : f32 / [...] / [?,116,?] - [ INFO ] output1 (node: output1) : f32 / [...] / [?,32,8..,8..] - [Step 5/11] Resizing model to match image sizes and given batch - [ INFO ] Model batch size: 1 - [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 13.15 ms - [Step 6/11] Configuring input of the model - [ INFO ] Model inputs: - [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] - [ INFO ] Model outputs: - [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,8400] - [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,160,160] - [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 420.45 ms - [Step 8/11] Querying optimal runtime parameters - [ INFO ] Model: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE [ INFO ] MODEL_PRIORITY: Priority.MEDIUM - [ INFO ] MULTI_DEVICE_PRIORITIES: CPU - [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 12 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT - [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] LOADED_FROM_CACHE: False [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! [ INFO ] Fill input 'images' with random values - [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) - [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 39.79 ms - [Step 11/11] Dumping statistics report - [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 1920 iterations - [ INFO ] Duration: 15131.06 ms - [ INFO ] Latency: - [ INFO ] Median: 92.12 ms - [ INFO ] Average: 94.20 ms - [ INFO ] Min: 55.80 ms - [ INFO ] Max: 154.59 ms - [ INFO ] Throughput: 126.89 FPS - - -.. code:: ipython3 - - !benchmark_app -m $int8_model_seg_path -d $device.value -api async -shape "[1,3,640,640]" -t 15 - - -.. parsed-literal:: - - [Step 1/11] Parsing and validating input arguments - [ INFO ] Parsing input parameters - [Step 2/11] Loading OpenVINO Runtime - [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 - [ INFO ] - [ INFO ] Device info: - [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 - [ INFO ] - [ INFO ] - [Step 3/11] Setting device configuration - [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. - [Step 4/11] Reading model files - [ INFO ] Loading model files - [ INFO ] Read model took 31.53 ms - [ INFO ] Original model I/O parameters: - [ INFO ] Model inputs: - [ INFO ] images (node: images) : f32 / [...] / [1,3,?,?] - [ INFO ] Model outputs: - [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,21..] - [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,8..,8..] - [Step 5/11] Resizing model to match image sizes and given batch - [ INFO ] Model batch size: 1 - [ INFO ] Reshaping model: 'images': [1,3,640,640] - [ INFO ] Reshape model took 16.37 ms - [Step 6/11] Configuring input of the model - [ INFO ] Model inputs: - [ INFO ] images (node: images) : u8 / [N,C,H,W] / [1,3,640,640] - [ INFO ] Model outputs: - [ INFO ] output0 (node: output0) : f32 / [...] / [1,116,8400] - [ INFO ] output1 (node: output1) : f32 / [...] / [1,32,160,160] - [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 667.41 ms - [Step 8/11] Querying optimal runtime parameters - [ INFO ] Model: - [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] MODEL_PRIORITY: Priority.MEDIUM - [ INFO ] MULTI_DEVICE_PRIORITIES: CPU - [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 12 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO - [ INFO ] NETWORK_NAME: torch_jit - [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT - [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] - [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'images'!. This input will be filled with random values! - [ INFO ] Fill input 'images' with random values - [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) + [Step 10/11] Measuring performance (Start inference asynchronously, 18 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 26.03 ms + [ INFO ] First inference took 58.19 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 4404 iterations - [ INFO ] Duration: 15067.64 ms + [ INFO ] Count: 3150 iterations + [ INFO ] Duration: 15116.07 ms [ INFO ] Latency: - [ INFO ] Median: 39.77 ms - [ INFO ] Average: 40.86 ms - [ INFO ] Min: 26.84 ms - [ INFO ] Max: 106.87 ms - [ INFO ] Throughput: 292.28 FPS - + [ INFO ] Median: 79.96 ms + [ INFO ] Average: 85.97 ms + [ INFO ] Min: 56.29 ms + [ INFO ] Max: 154.33 ms + [ INFO ] Throughput: 208.39 FPS -Validate quantized model accuracy `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Validate quantized model accuracy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As we can see, there is no significant difference between ``INT8`` and float model result in a single image test. To understand how quantization influences model prediction precision, we can compare model accuracy on a dataset. -Object detection `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - .. code:: ipython3 int8_det_stats = test(quantized_det_model, core, det_data_loader, det_validator, num_samples=NUM_TEST_SAMPLES) @@ -1608,69 +1158,27 @@ Object detection `⇑ <#top>`__ all 300 2145 0.623 0.517 0.572 0.406 -Instance segmentation `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - -.. code:: ipython3 - - int8_seg_stats = test(quantized_seg_model, core, seg_data_loader, seg_validator, num_samples=NUM_TEST_SAMPLES) - - - -.. parsed-literal:: - - 0%| | 0/300 [00:00`__ -############################################################################################################################### +Next steps +---------------------------------------------------- -This section contains suggestions on how to -additionally improve the performance of your application using OpenVINO. +This section contains suggestions on how to additionally improve the +performance of your application using OpenVINO. -Async inference pipeline `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Async inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The key advantage of the Async -API is that when a device is busy with inference, the application can -perform other tasks in parallel (for example, populating inputs or -scheduling other requests) rather than wait for the current inference to -complete first. To understand how to perform async inference using -openvino, refer to `Async API +The key advantage of the Async API is that when a device is busy with +inference, the application can perform other tasks in parallel (for +example, populating inputs or scheduling other requests) rather than +wait for the current inference to complete first. To understand how to +perform async inference using openvino, refer to `Async API tutorial <115-async-api-with-output.html>`__ -Integration preprocessing to model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Integration preprocessing to model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Preprocessing API enables making preprocessing a part of the model reducing application code and dependency on additional image processing @@ -1680,21 +1188,18 @@ on a selected device (CPU/GPU etc.) rather than always being executed on CPU as part of an application. This will improve selected device utilization. -For more information, refer to the overview of `Preprocessing API `__ . +For more information, refer to the overview of `Preprocessing +API `__. For example, we can integrate converting input data layout and normalization defined in ``image_to_tensor`` function. -The integration process consists of the following steps: - -1. Initialize a PrePostProcessing object. -2. Define the input data format. -3. Describe preprocessing steps. -4. Integrating Steps into a Model. - -Initialize PrePostProcessing API `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- +The integration process consists of the following steps: 1. Initialize a +PrePostProcessing object. 2. Define the input data format. 3. Describe +preprocessing steps. 4. Integrating Steps into a Model. +Initialize PrePostProcessing API +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``openvino.preprocess.PrePostProcessor`` class enables specifying preprocessing and postprocessing steps for a model. @@ -1705,44 +1210,39 @@ preprocessing and postprocessing steps for a model. ppp = PrePostProcessor(quantized_det_model) -Define input data format `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- +Define input data format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To address particular input of -a model/preprocessor, the ``input(input_id)`` method, where ``input_id`` -is a positional index or input tensor name for input in -``model.inputs``, if a model has a single input, ``input_id`` can be -omitted. After reading the image from the disc, it contains U8 pixels in -the ``[0, 255]`` range and is stored in the ``NHWC`` layout. To perform -a preprocessing conversion, we should provide this to the tensor -description. +To address particular input of a model/preprocessor, the +``input(input_id)`` method, where ``input_id`` is a positional index or +input tensor name for input in ``model.inputs``, if a model has a single +input, ``input_id`` can be omitted. After reading the image from the +disc, it contains U8 pixels in the ``[0, 255]`` range and is stored in +the ``NHWC`` layout. To perform a preprocessing conversion, we should +provide this to the tensor description. .. code:: ipython3 - from openvino.runtime import Type, Layout - - ppp.input(0).tensor().set_shape([1, 640, 640, 3]).set_element_type(Type.u8).set_layout(Layout('NHWC')) + ppp.input(0).tensor().set_shape([1, 640, 640, 3]).set_element_type(ov.Type.u8).set_layout(ov.Layout('NHWC')) pass To perform layout conversion, we also should provide information about layout expected by model -Describe preprocessing steps `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Describe preprocessing steps +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Our preprocessing function contains the following steps: - -- Convert the data type from ``U8`` to ``FP32``. -- Convert the data layout from ``NHWC`` to ``NCHW`` format. -- Normalize each pixel by dividing on scale factor 255. +Our preprocessing function contains the following steps: \* Convert the +data type from ``U8`` to ``FP32``. \* Convert the data layout from +``NHWC`` to ``NCHW`` format. \* Normalize each pixel by dividing on +scale factor 255. ``ppp.input(input_id).preprocess()`` is used for defining a sequence of preprocessing steps: .. code:: ipython3 - ppp.input(0).preprocess().convert_element_type(Type.f32).convert_layout(Layout('NCHW')).scale([255., 255., 255.]) + ppp.input(0).preprocess().convert_element_type(ov.Type.f32).convert_layout(ov.Layout('NCHW')).scale([255., 255., 255.]) print(ppp) @@ -1759,9 +1259,8 @@ preprocessing steps: -Integrating Steps into a Model `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Integrating Steps into a Model +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Once the preprocessing steps have been finished, the model can be finally built. Additionally, we can save a completed model to OpenVINO @@ -1777,7 +1276,7 @@ device. Now, we can skip these preprocessing steps in detect function: .. code:: ipython3 - def detect_without_preprocess(image:np.ndarray, model:Model): + def detect_without_preprocess(image:np.ndarray, model:ov.Model): """ OpenVINO YOLOv8 model with integrated preprocessing inference function. Preprocess image, runs model inference and postprocess results using NMS. Parameters: @@ -1805,13 +1304,12 @@ device. Now, we can skip these preprocessing steps in detect function: -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_91_0.png - +.. image:: 230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.png -Live demo `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Live demo +--------------------------------------------------- The following code runs model inference on a video: @@ -1823,7 +1321,7 @@ The following code runs model inference on a video: # Main processing function to run object detection. - def run_object_detection(source=0, flip=False, use_popup=False, skip_first_frames=0, model=det_model, device="AUTO"): + def run_object_detection(source=0, flip=False, use_popup=False, skip_first_frames=0, model=det_model, device=device.value): player = None if device != "CPU": model.reshape({0: [1, 3, 640, 640]}) @@ -1918,13 +1416,8 @@ The following code runs model inference on a video: if use_popup: cv2.destroyAllWindows() -Run `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -Run Live Object Detection and Segmentation `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Run Live Object Detection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use a webcam as the video input. By default, the primary webcam is set with \ ``source=0``. If you have multiple webcams, each one will be @@ -1933,16 +1426,13 @@ using a front-facing camera. Some web browsers, especially Mozilla Firefox, may cause flickering. If you experience flickering, set \ ``use_popup=True``. -.. note:: - - To use this notebook with a webcam, you need to run the + **NOTE**: To use this notebook with a webcam, you need to run the notebook on a computer with a webcam. If you run the notebook on a remote server (for example, in Binder or Google Colab service), the webcam will not work. By default, the lower cell will run model inference on a video file. If you want to try live inference on your webcam set ``WEBCAM_INFERENCE = True`` - Run the object detection: .. code:: ipython3 @@ -1973,23 +1463,7 @@ Run the object detection: -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_97_0.png - - -.. parsed-literal:: - - Source ended - - -Run instance segmentation: - -.. code:: ipython3 - - run_object_detection(source=VIDEO_SOURCE, flip=True, use_popup=False, model=seg_ov_model, device=device.value) - - - -.. image:: 230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_99_0.png +.. image:: 230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_74_0.png .. parsed-literal:: diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.jpg b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.jpg new file mode 100644 index 00000000000000..4e94755f08dc6c --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b98a5fef7380a09eda6ceeb961edca0e13fbcad4c3bed0e7de10c2c999f959 +size 110998 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.png b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.png new file mode 100644 index 00000000000000..d90aada7f55604 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_11_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b5ed3953a58c552aa19fb89e3748ae1b71ac2a7d6a4fad4d6b625316d92da1 +size 910454 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.jpg b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.jpg new file mode 100644 index 00000000000000..54446c3246150e --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33a9db3664a7c515e8fe0b4cbb3d76ffd5caa1aae461c9a2f59de83337a4b69 +size 110022 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.png b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.png new file mode 100644 index 00000000000000..450c7756c1bb40 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:002a762c093757329b1c7053bd3947253b012b23039c8dfdbd39a9652883ce5c +size 929067 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.jpg b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.jpg new file mode 100644 index 00000000000000..90588b4a194694 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932e582498ae12492125875a331a07b8cca4f381bc305ca6865c40b7040c0e64 +size 110900 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.png b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.png new file mode 100644 index 00000000000000..5c788d5c3908f0 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_45_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fae72a171d89c60b002cf544eb585b041e5b1e6bc0973287e85ffef16c63e3f +size 929911 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.jpg b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.jpg new file mode 100644 index 00000000000000..90588b4a194694 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932e582498ae12492125875a331a07b8cca4f381bc305ca6865c40b7040c0e64 +size 110900 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.png b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.png new file mode 100644 index 00000000000000..5c788d5c3908f0 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_68_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fae72a171d89c60b002cf544eb585b041e5b1e6bc0973287e85ffef16c63e3f +size 929911 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_74_0.png b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_74_0.png new file mode 100644 index 00000000000000..7d15baa9717e59 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/230-yolov8-object-detection-with-output_74_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34aba75212ffc8c00f004c94b5fb1579f4cd3090e977b25749333a0b22af2edc +size 491904 diff --git a/docs/notebooks/230-yolov8-object-detection-with-output_files/index.html b/docs/notebooks/230-yolov8-object-detection-with-output_files/index.html new file mode 100644 index 00000000000000..7caea3730dfc26 --- /dev/null +++ b/docs/notebooks/230-yolov8-object-detection-with-output_files/index.html @@ -0,0 +1,15 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/230-yolov8-object-detection-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/230-yolov8-object-detection-with-output_files/


../
+230-yolov8-object-detection-with-output_11_1.jpg   31-Oct-2023 00:35              110998
+230-yolov8-object-detection-with-output_11_1.png   31-Oct-2023 00:35              910454
+230-yolov8-object-detection-with-output_22_0.jpg   31-Oct-2023 00:35              110022
+230-yolov8-object-detection-with-output_22_0.png   31-Oct-2023 00:35              929067
+230-yolov8-object-detection-with-output_45_0.jpg   31-Oct-2023 00:35              110900
+230-yolov8-object-detection-with-output_45_0.png   31-Oct-2023 00:35              929911
+230-yolov8-object-detection-with-output_68_0.jpg   31-Oct-2023 00:35              110900
+230-yolov8-object-detection-with-output_68_0.png   31-Oct-2023 00:35              929911
+230-yolov8-object-detection-with-output_74_0.png   31-Oct-2023 00:35              491904
+

+ diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_13_1.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_13_1.png deleted file mode 100644 index 618bad4dc42ce0..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_13_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b5eb1fedadba67b60f9fb1667b4e40e336ccfbfa71e4fed305f0b87470cb78b -size 909775 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_15_1.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_15_1.png deleted file mode 100644 index be71ac654812c2..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_15_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c01f6059215cb2ce6b7d9708b6d26bea27ec1f4eeff776b2223acc7715595eb2 -size 733379 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_27_0.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_27_0.png deleted file mode 100644 index 612876d409d02f..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_27_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:888620bbf31706418626981052e021fdf577e74049767d97e994fbea672a7b6f -size 931247 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_29_0.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_29_0.png deleted file mode 100644 index eb580adb85806e..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_29_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5532598f2bcbf2e0991b88e4ed13f59acf3a9a137fbb48815d3e7f6531907ceb -size 913676 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_59_0.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_59_0.png deleted file mode 100644 index 975c9b09939147..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_59_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fc131c6290d40ee9eaa328a07b56bd84951b2d8d4699a2e6577fe706c66062d -size 930875 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_61_0.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_61_0.png deleted file mode 100644 index eadb7c4d7f5350..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_61_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2222119ec8c50cca18983fd246ef71351ce36524102cfa6fadb1b387304e9264 -size 912345 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_91_0.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_91_0.png deleted file mode 100644 index 975c9b09939147..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_91_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fc131c6290d40ee9eaa328a07b56bd84951b2d8d4699a2e6577fe706c66062d -size 930875 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_97_0.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_97_0.png deleted file mode 100644 index d86fabc033aae7..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_97_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b19e014c1ee03adc8fedc0932c528157c6c82f1766ba7570ee20ad704bee5784 -size 492170 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_99_0.png b/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_99_0.png deleted file mode 100644 index 72b77ba095fc5b..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/230-yolov8-optimization-with-output_99_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87bfc5fa32aff4adefd85a6a3e47170fb94ae8d7e66168024aa3f623d583a35e -size 497983 diff --git a/docs/notebooks/230-yolov8-optimization-with-output_files/index.html b/docs/notebooks/230-yolov8-optimization-with-output_files/index.html deleted file mode 100644 index 01c2ac260a0eb5..00000000000000 --- a/docs/notebooks/230-yolov8-optimization-with-output_files/index.html +++ /dev/null @@ -1,15 +0,0 @@ - -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/230-yolov8-optimization-with-output_files/ - -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/230-yolov8-optimization-with-output_files/


../
-230-yolov8-optimization-with-output_13_1.png       16-Aug-2023 01:31              909775
-230-yolov8-optimization-with-output_15_1.png       16-Aug-2023 01:31              733379
-230-yolov8-optimization-with-output_27_0.png       16-Aug-2023 01:31              931247
-230-yolov8-optimization-with-output_29_0.png       16-Aug-2023 01:31              913676
-230-yolov8-optimization-with-output_59_0.png       16-Aug-2023 01:31              930875
-230-yolov8-optimization-with-output_61_0.png       16-Aug-2023 01:31              912345
-230-yolov8-optimization-with-output_91_0.png       16-Aug-2023 01:31              930875
-230-yolov8-optimization-with-output_97_0.png       16-Aug-2023 01:31              492170
-230-yolov8-optimization-with-output_99_0.png       16-Aug-2023 01:31              497983
-

- diff --git a/docs/notebooks/231-instruct-pix2pix-image-editing-with-output.rst b/docs/notebooks/231-instruct-pix2pix-image-editing-with-output.rst index 308a358d1c51fc..9412662ee2384a 100644 --- a/docs/notebooks/231-instruct-pix2pix-image-editing-with-output.rst +++ b/docs/notebooks/231-instruct-pix2pix-image-editing-with-output.rst @@ -1,8 +1,6 @@ Image Editing with InstructPix2Pix and OpenVINO =============================================== - - The InstructPix2Pix is a conditional diffusion model that edits images based on written instructions provided by the user. Generative image editing models traditionally target a single editing task like style @@ -25,92 +23,50 @@ model using OpenVINO. Notebook contains the following steps: -1. Convert PyTorch models to ONNX format. -2. Convert ONNX models to OpenVINO IR format, using model conversion +1. Convert PyTorch models to OpenVINO IR format, using Model Conversion API. -3. Run InstructPix2Pix pipeline with OpenVINO. +2. Run InstructPix2Pix pipeline with OpenVINO. +3. Optimize InstructPix2Pix pipeline with + `NNCF `__ quantization. +4. Compare results of original and optimized pipelines. +**Table of contents:** -.. _top: -**Table of contents**: +- `Prerequisites <#prerequisites>`__ +- `Create Pytorch Models + pipeline <#create-pytorch-models-pipeline>`__ +- `Convert Models to OpenVINO + IR <#convert-models-to-openvino-ir>`__ -- `Prerequisites <#prerequisites>`__ -- `Create Pytorch Models pipeline <#create-pytorch-models-pipeline>`__ -- `Convert Models to OpenVINO IR <#convert-models-to-openvino-ir>`__ + - `Text Encoder <#text-encoder>`__ + - `VAE <#vae>`__ + - `Unet <#unet>`__ - - `Text Encoder <#text-encoder>`__ - - `VAE <#vae>`__ - - `Unet <#unet>`__ +- `Prepare Inference Pipeline <#prepare-inference-pipeline>`__ +- `Quantization <#quantization>`__ -- `Prepare Inference Pipeline <#prepare-inference-pipeline>`__ + - `Prepare calibration + dataset <#prepare-calibration-dataset>`__ + - `Run quantization <#run-quantization>`__ + - `Compare inference time of the FP16 and INT8 + models <#compare-inference-time-of-the-fp-and-int-models>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +- `Interactive demo with + Gradio <#interactive-demo-with-gradio>`__ +Prerequisites +------------------------------------------------------- Install necessary packages .. code:: ipython3 - !pip install "transformers>=4.25.1" accelerate - !pip install "git+https://github.com/huggingface/diffusers.git" - - -.. parsed-literal:: - - Requirement already satisfied: transformers>=4.25.1 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (4.25.1) - Requirement already satisfied: accelerate in /home/ea/work/notebooks_env/lib/python3.8/site-packages (0.13.2) - Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (0.11.1) - Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (0.13.2) - Requirement already satisfied: filelock in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (3.9.0) - Requirement already satisfied: regex!=2019.12.17 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (2022.10.31) - Requirement already satisfied: packaging>=20.0 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (23.0) - Requirement already satisfied: numpy>=1.17 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (1.23.4) - Requirement already satisfied: pyyaml>=5.1 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (6.0) - Requirement already satisfied: requests in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (2.28.2) - Requirement already satisfied: tqdm>=4.27 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from transformers>=4.25.1) (4.64.1) - Requirement already satisfied: torch>=1.4.0 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from accelerate) (1.13.1+cpu) - Requirement already satisfied: psutil in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from accelerate) (5.9.4) - Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.10.0->transformers>=4.25.1) (4.4.0) - Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->transformers>=4.25.1) (1.26.14) - Requirement already satisfied: idna<4,>=2.5 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->transformers>=4.25.1) (3.4) - Requirement already satisfied: charset-normalizer<4,>=2 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->transformers>=4.25.1) (2.1.1) - Requirement already satisfied: certifi>=2017.4.17 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->transformers>=4.25.1) (2022.12.7) - - [notice] A new release of pip available: 22.3.1 -> 23.0 - [notice] To update, run: pip install --upgrade pip - Collecting git+https://github.com/huggingface/diffusers.git - Cloning https://github.com/huggingface/diffusers.git to /tmp/pip-req-build-tj6ekfd9 - Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers.git /tmp/pip-req-build-tj6ekfd9 - Resolved https://github.com/huggingface/diffusers.git to commit 1e5eaca754bce676ce9142cab7ccaaee78df4696 - Installing build dependencies ... done - Getting requirements to build wheel ... done - Preparing metadata (pyproject.toml) ... done - Requirement already satisfied: huggingface-hub>=0.10.0 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from diffusers==0.14.0.dev0) (0.11.1) - Requirement already satisfied: regex!=2019.12.17 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from diffusers==0.14.0.dev0) (2022.10.31) - Requirement already satisfied: numpy in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from diffusers==0.14.0.dev0) (1.23.4) - Requirement already satisfied: filelock in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from diffusers==0.14.0.dev0) (3.9.0) - Requirement already satisfied: importlib-metadata in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from diffusers==0.14.0.dev0) (4.13.0) - Requirement already satisfied: Pillow in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from diffusers==0.14.0.dev0) (9.4.0) - Requirement already satisfied: requests in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from diffusers==0.14.0.dev0) (2.28.2) - Requirement already satisfied: pyyaml>=5.1 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from huggingface-hub>=0.10.0->diffusers==0.14.0.dev0) (6.0) - Requirement already satisfied: tqdm in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from huggingface-hub>=0.10.0->diffusers==0.14.0.dev0) (4.64.1) - Requirement already satisfied: packaging>=20.9 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from huggingface-hub>=0.10.0->diffusers==0.14.0.dev0) (23.0) - Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from huggingface-hub>=0.10.0->diffusers==0.14.0.dev0) (4.4.0) - Requirement already satisfied: zipp>=0.5 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from importlib-metadata->diffusers==0.14.0.dev0) (3.11.0) - Requirement already satisfied: idna<4,>=2.5 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->diffusers==0.14.0.dev0) (3.4) - Requirement already satisfied: certifi>=2017.4.17 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->diffusers==0.14.0.dev0) (2022.12.7) - Requirement already satisfied: charset-normalizer<4,>=2 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->diffusers==0.14.0.dev0) (2.1.1) - Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ea/work/notebooks_env/lib/python3.8/site-packages (from requests->diffusers==0.14.0.dev0) (1.26.14) - - [notice] A new release of pip available: 22.3.1 -> 23.0 - [notice] To update, run: pip install --upgrade pip - - -Create Pytorch Models pipeline `⇑ <#top>`__ -############################################################################################################################### + %pip install -q "transformers>=4.25.1" accelerate gradio datasets diffusers + %pip install -q "openvino>=2023.1.0" +Create Pytorch Models pipeline +------------------------------------------------------------------------ ``StableDiffusionInstructPix2PixPipeline`` is an end-to-end inference pipeline that you can use to edit images from text instructions with @@ -119,9 +75,7 @@ just a few lines of code provided as part First, we load the pre-trained weights of all components of the model. -.. note:: - - Initially, model loading can take some time due to + **NOTE**: Initially, model loading can take some time due to downloading the weights. Also, the download speed depends on your internet connection. @@ -141,31 +95,15 @@ First, we load the pre-trained weights of all components of the model. del pipe +Convert Models to OpenVINO IR +----------------------------------------------------------------------- - -.. parsed-literal:: - - Fetching 15 files: 0%| | 0/15 [00:00`__ -############################################################################################################################### - - -OpenVINO supports PyTorch through export to the ONNX format. We will use -``torch.onnx.export`` function for obtaining an ONNX model. For more -information, refer to the `PyTorch -documentation `__. We need to -provide a model object, input data for model tracing and a path for -saving the model. Optionally, we can provide target onnx opset for -conversion and other parameters specified in the documentation (for -example, input and output names or dynamic shapes). - -While ONNX models are directly supported by OpenVINO™ runtime, it can be -useful to convert them to OpenVINO Intermediate Representation (IR) -format to take the advantage of advanced OpenVINO optimization tools and -features. We will use OpenVINO Model Optimizer to convert the model to -IR format and compress weights to the ``FP16`` format. +OpenVINO supports PyTorch models using `Model Conversion +API `__ +to convert the model to IR format. ``ov.convert_model`` function accepts +PyTorch model object and example input and then converts it to +``ov.Model`` class instance that ready to use for loading on device or +can be saved on disk using ``ov.save_model``. The InstructPix2Pix model is based on Stable Diffusion, a large-scale text-to-image latent diffusion model. You can find more details about @@ -183,9 +121,8 @@ The model consists of three important parts: Let us convert each part. -Text Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text-encoder is responsible for transforming the input prompt, for example, “a photo of an astronaut riding a horse” into an embedding @@ -198,64 +135,58 @@ indexes of tokens from text processed by tokenizer and padded to maximum length accepted by the model. Model outputs are two tensors: ``last_hidden_state`` - hidden state from the last MultiHeadAttention layer in the model and ``pooler_out`` - pooled output for whole model -hidden states. You will use ``opset_version=14``, since model contains -``triu`` operation, supported in ONNX only starting from this opset. +hidden states. .. code:: ipython3 from pathlib import Path - from openvino.tools import mo - from openvino.runtime import serialize, Core + import openvino as ov + import gc - core = Core() + core = ov.Core() - TEXT_ENCODER_ONNX_PATH = Path('text_encoder.onnx') - TEXT_ENCODER_OV_PATH = TEXT_ENCODER_ONNX_PATH.with_suffix('.xml') + TEXT_ENCODER_OV_PATH = Path("text_encoder.xml") + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() - def convert_encoder_onnx(text_encoder, onnx_path: Path): + + def convert_encoder(text_encoder: torch.nn.Module, ir_path:Path): """ - Convert Text Encoder model to ONNX. - Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, + Convert Text Encoder mode. + Function accepts text encoder model, and prepares example inputs for conversion, Parameters: - text_encoder: InstrcutPix2Pix text_encoder model - onnx_path (Path): File for storing onnx model + text_encoder (torch.nn.Module): text_encoder model from Stable Diffusion pipeline + ir_path (Path): File for storing model Returns: None """ - if not onnx_path.exists(): - # switch model to inference mode - text_encoder.eval() - input_ids = torch.ones((1, 77), dtype=torch.long) - - # disable gradients calculation for reducing memory consumption - with torch.no_grad(): - # infer model, just to make sure that it works - text_encoder(input_ids) - # export model to ONNX format - torch.onnx.export( - text_encoder, # model instance - input_ids, # inputs for model tracing - onnx_path, # output file for saving result - # model input name for onnx representation - input_names=['input_ids'], - # model output names for onnx representation - output_names=['last_hidden_state', 'pooler_out'], - opset_version=14 # onnx opset version for export - ) - print('Text Encoder successfully converted to ONNX') - + input_ids = torch.ones((1, 77), dtype=torch.long) + # switch model to inference mode + text_encoder.eval() + + # disable gradients calculation for reducing memory consumption + with torch.no_grad(): + # Export model to IR format + ov_model = ov.convert_model(text_encoder, example_input=input_ids, input=[(1,77),]) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print(f'Text Encoder successfully converted to IR and saved to {ir_path}') + if not TEXT_ENCODER_OV_PATH.exists(): - convert_encoder_onnx(text_encoder, TEXT_ENCODER_ONNX_PATH) - text_encoder = mo.convert_model( - TEXT_ENCODER_ONNX_PATH, compress_to_fp16=True) - serialize(text_encoder, str(TEXT_ENCODER_OV_PATH)) - print('Text Encoder successfully converted to IR') + convert_encoder(text_encoder, TEXT_ENCODER_OV_PATH) else: print(f"Text encoder will be loaded from {TEXT_ENCODER_OV_PATH}") del text_encoder + gc.collect() .. parsed-literal:: @@ -263,10 +194,17 @@ hidden states. You will use ``opset_version=14``, since model contains Text encoder will be loaded from text_encoder.xml -VAE `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. parsed-literal:: + + 32 + + + +VAE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The VAE model consists of two parts: an encoder and a decoder. - The encoder is used to convert the image into a low dimensional @@ -283,18 +221,16 @@ into two independent models. .. code:: ipython3 - VAE_ENCODER_ONNX_PATH = Path('vae_encoder.onnx') - VAE_ENCODER_OV_PATH = VAE_ENCODER_ONNX_PATH.with_suffix('.xml') - + VAE_ENCODER_OV_PATH = Path("vae_encoder.xml") - def convert_vae_encoder_onnx(vae, onnx_path: Path): + def convert_vae_encoder(vae: torch.nn.Module, ir_path: Path): """ - Convert VAE model to ONNX, then IR format. - Function accepts pipeline, creates wrapper class for export only necessary for inference part, - prepares example inputs for ONNX conversion via torch.export, + Convert VAE model for encoding to IR format. + Function accepts vae model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, Parameters: - vae: InstrcutPix2Pix VAE model - onnx_path (Path): File for storing onnx model + vae (torch.nn.Module): VAE model from StableDiffusio pipeline + ir_path (Path): File for storing model Returns: None """ @@ -304,47 +240,33 @@ into two independent models. self.vae = vae def forward(self, image): - return self.vae.encode(image).latent_dist.mode() - - if not onnx_path.exists(): - vae_encoder = VAEEncoderWrapper(vae) - vae_encoder.eval() - image = torch.zeros((1, 3, 512, 512)) - with torch.no_grad(): - torch.onnx.export(vae_encoder, image, onnx_path, input_names=[ - 'image'], output_names=['image_latent']) - print('VAE encoder successfully converted to ONNX') + return self.vae.encode(x=image)["latent_dist"].sample() + vae_encoder = VAEEncoderWrapper(vae) + vae_encoder.eval() + image = torch.zeros((1, 3, 512, 512)) + with torch.no_grad(): + ov_model = ov.convert_model(vae_encoder, example_input=image, input=[((1,3,512,512),)]) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print(f'VAE encoder successfully converted to IR and saved to {ir_path}') if not VAE_ENCODER_OV_PATH.exists(): - convert_vae_encoder_onnx(vae, VAE_ENCODER_ONNX_PATH) - vae_encoder = mo.convert_model(VAE_ENCODER_ONNX_PATH, compress_to_fp16=True) - serialize(vae_encoder, str(VAE_ENCODER_OV_PATH)) - print('VAE encoder successfully converted to IR') - del vae_encoder + convert_vae_encoder(vae, VAE_ENCODER_OV_PATH) else: print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH}") - - -.. parsed-literal:: - - VAE encoder will be loaded from vae_encoder.xml - - -.. code:: ipython3 - - VAE_DECODER_ONNX_PATH = Path('vae_decoder.onnx') - VAE_DECODER_OV_PATH = VAE_DECODER_ONNX_PATH.with_suffix('.xml') + VAE_DECODER_OV_PATH = Path('vae_decoder.xml') - def convert_vae_decoder_onnx(vae, onnx_path: Path): + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path): """ - Convert VAE model to ONNX, then IR format. - Function accepts pipeline, creates wrapper class for export only necessary for inference part, - prepares example inputs for ONNX conversion via torch.export, + Convert VAE model for decoding to IR format. + Function accepts vae model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, Parameters: - vae: InstrcutPix2Pix VAE model - onnx_path (Path): File for storing onnx model + vae (torch.nn.Module): VAE model frm StableDiffusion pipeline + ir_path (Path): File for storing model Returns: None """ @@ -355,38 +277,45 @@ into two independent models. def forward(self, latents): return self.vae.decode(latents) + + vae_decoder = VAEDecoderWrapper(vae) + latents = torch.zeros((1, 4, 64, 64)) - if not onnx_path.exists(): - vae_decoder = VAEDecoderWrapper(vae) - latents = torch.zeros((1, 4, 64, 64)) - - vae_decoder.eval() - with torch.no_grad(): - torch.onnx.export(vae_decoder, latents, onnx_path, input_names=[ - 'latents'], output_names=['sample']) - print('VAE decoder successfully converted to ONNX') + vae_decoder.eval() + with torch.no_grad(): + ov_model = ov.convert_model(vae_decoder, example_input=latents, input=[((1,4,64,64),)]) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print(f'VAE decoder successfully converted to IR and saved to {ir_path}') if not VAE_DECODER_OV_PATH.exists(): - convert_vae_decoder_onnx(vae, VAE_DECODER_ONNX_PATH) - vae_decoder = mo.convert_model(VAE_DECODER_ONNX_PATH, compress_to_fp16=True) - print('VAE decoder successfully converted to IR') - serialize(vae_decoder, str(VAE_DECODER_OV_PATH)) - del vae_decoder + convert_vae_decoder(vae, VAE_DECODER_OV_PATH) else: print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") + del vae + gc.collect() .. parsed-literal:: - VAE decoder successfully converted to IR + VAE encoder will be loaded from vae_encoder.xml + VAE decoder will be loaded from vae_decoder.xml + + + + +.. parsed-literal:: + 0 -Unet `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Unet +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The Unet model has three inputs: - ``scaled_latent_model_input`` - the latent image sample from previous @@ -401,59 +330,70 @@ Model predicts the ``sample`` state for the next step. import numpy as np - UNET_ONNX_PATH = Path('unet/unet.onnx') - UNET_OV_PATH = UNET_ONNX_PATH.parents[1] / 'unet.xml' + UNET_OV_PATH = Path("unet.xml") + + dtype_mapping = { + torch.float32: ov.Type.f32, + torch.float64: ov.Type.f64 + } - def convert_unet_onnx(unet, onnx_path: Path): + def convert_unet(unet:torch.nn.Module, ir_path:Path): """ - Convert Unet model to ONNX, then IR format. - Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, + Convert U-net model to IR format. + Function accepts unet model, prepares example inputs for conversion, Parameters: - unet: InstrcutPix2Pix unet model - onnx_path (Path): File for storing onnx model + unet (StableDiffusionPipeline): unet from Stable Diffusion pipeline + ir_path (Path): File for storing model Returns: None """ - if not onnx_path.exists(): - # prepare inputs - latents_shape = (3, 8, 512 // 8, 512 // 8) - latents = torch.randn(latents_shape) - t = torch.from_numpy(np.array(1, dtype=float)) - encoder_hidden_state = torch.randn((3,77,768)) - - # if the model size > 2Gb, it will be represented as ONNX with external data files and we will store it in a separate directory to avoid having a lot of files in current directory - onnx_path.parent.mkdir(exist_ok=True, parents=True) - with torch.no_grad(): - torch.onnx.export( - unet, - (latents, t, encoder_hidden_state), str(onnx_path), - input_names=['scaled_latent_model_input', - 'timestep', 'text_embeddings'], - output_names=['sample'] - ) - print('Unet successfully converted to ONNX') + # prepare inputs + encoder_hidden_state = torch.ones((3, 77, 768)) + latents_shape = (3, 8, 512 // 8, 512 // 8) + latents = torch.randn(latents_shape) + t = torch.from_numpy(np.array(1, dtype=float)) + dummy_inputs = (latents, t, encoder_hidden_state) + input_info = [] + for input_tensor in dummy_inputs: + shape = ov.PartialShape(tuple(input_tensor.shape)) + element_type = dtype_mapping[input_tensor.dtype] + input_info.append((shape, element_type)) + + unet.eval() + with torch.no_grad(): + ov_model = ov.convert_model(unet, example_input=dummy_inputs, input=input_info) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print(f'Unet successfully converted to IR and saved to {ir_path}') if not UNET_OV_PATH.exists(): - convert_unet_onnx(unet, UNET_ONNX_PATH) - unet = mo.convert_model(UNET_ONNX_PATH, compress_to_fp16=True) - serialize(unet, str(UNET_OV_PATH)) - print('Unet successfully converted to IR') + convert_unet(unet, UNET_OV_PATH) + gc.collect() else: - print(f"Unet successfully loaded from {UNET_OV_PATH}") + print(f"Unet will be loaded from {UNET_OV_PATH}") del unet + gc.collect() .. parsed-literal:: - Unet successfully loaded from unet.xml + Unet will be loaded from unet.xml -Prepare Inference Pipeline `⇑ <#top>`__ -############################################################################################################################### +.. parsed-literal:: + + 0 + + + +Prepare Inference Pipeline +-------------------------------------------------------------------- + Putting it all together, let us now take a closer look at how the model inference works by illustrating the logical flow. @@ -480,8 +420,7 @@ decoder part of the variational auto encoder. .. code:: ipython3 - from diffusers.pipeline_utils import DiffusionPipeline - from openvino.runtime import Model, Core + from diffusers import DiffusionPipeline from transformers import CLIPTokenizer from typing import Union, List, Optional, Tuple import PIL @@ -560,12 +499,12 @@ decoder part of the variational auto encoder. self, tokenizer: CLIPTokenizer, scheduler: EulerAncestralDiscreteScheduler, - core: Core, - text_encoder: Model, - vae_encoder: Model, - unet: Model, - vae_decoder: Model, - device:str = "AUTO" + core: ov.Core, + text_encoder: ov.Model, + vae_encoder: ov.Model, + unet: ov.Model, + vae_decoder: ov.Model, + device: str = "AUTO" ): super().__init__() self.tokenizer = tokenizer @@ -574,7 +513,7 @@ decoder part of the variational auto encoder. self.load_models(core, device, text_encoder, vae_encoder, unet, vae_decoder) - def load_models(self, core: Core, device: str, text_encoder: Model, vae_encoder: Model, unet: Model, vae_decoder: Model): + def load_models(self, core: ov.Core, device: str, text_encoder: ov.Model, vae_encoder: ov.Model, unet: ov.Model, vae_decoder: ov.Model): """ Function for loading models on device using OpenVINO @@ -590,11 +529,13 @@ decoder part of the variational auto encoder. """ self.text_encoder = core.compile_model(text_encoder, device) self.text_encoder_out = self.text_encoder.output(0) - self.vae_encoder = core.compile_model(vae_encoder, device) + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device != "CPU" else {} + self.vae_encoder = core.compile_model(vae_encoder, device, ov_config) self.vae_encoder_out = self.vae_encoder.output(0) - self.unet = core.compile_model(unet, device) + # We have to register UNet in config to be able to change it externally to collect calibration data + self.register_to_config(unet=core.compile_model(unet, device)) self.unet_out = self.unet.output(0) - self.vae_decoder = core.compile_model(vae_decoder) + self.vae_decoder = core.compile_model(vae_decoder, device, ov_config) self.vae_decoder_out = self.vae_decoder.output(0) def __call__( @@ -899,18 +840,18 @@ decoder part of the variational auto encoder. import matplotlib.pyplot as plt - def visualize_results(orig_img:PIL.Image.Image, processed_img:PIL.Image.Image, prompt:str): + def visualize_results(orig_img:PIL.Image.Image, processed_img:PIL.Image.Image, img1_title:str, img2_title:str): """ Helper function for results visualization Parameters: orig_img (PIL.Image.Image): original image processed_img (PIL.Image.Image): processed image after editing - prompt (str): text instruction used for editing + img1_title (str): title for the image on the left + img2_title (str): title for the image on the right Returns: fig (matplotlib.pyplot.Figure): matplotlib generated figure contains drawing result """ - orig_title = "Original image" im_w, im_h = orig_img.size is_horizontal = im_h <= im_w figsize = (20, 30) if is_horizontal else (30, 20) @@ -925,8 +866,8 @@ decoder part of the variational auto encoder. a.grid(False) list_axes[0].imshow(np.array(orig_img)) list_axes[1].imshow(np.array(processed_img)) - list_axes[0].set_title(orig_title, fontsize=20) - list_axes[1].set_title(f"Prompt: {prompt}", fontsize=20) + list_axes[0].set_title(img1_title, fontsize=20) + list_axes[1].set_title(img2_title, fontsize=20) fig.subplots_adjust(wspace=0.0 if is_horizontal else 0.01 , hspace=0.01 if is_horizontal else 0.0) fig.tight_layout() fig.savefig("result.png", bbox_inches='tight') @@ -949,6 +890,15 @@ can provide device selecting one from available in dropdown list. device + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + .. code:: ipython3 from transformers import CLIPTokenizer @@ -958,6 +908,13 @@ can provide device selecting one from available in dropdown list. ov_pipe = OVInstructPix2PixPipeline(tokenizer, scheduler, core, TEXT_ENCODER_OV_PATH, VAE_ENCODER_OV_PATH, UNET_OV_PATH, VAE_DECODER_OV_PATH, device=device.value) + +.. parsed-literal:: + + /home/ltalamanova/env_ci/lib/python3.8/site-packages/diffusers/configuration_utils.py:134: FutureWarning: Accessing config attribute `unet` directly via 'OVInstructPix2PixPipeline' object attribute is deprecated. Please access 'unet' over 'OVInstructPix2PixPipeline's config object instead, e.g. 'scheduler.config.unet'. + deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) + + Now, you are ready to define editing instructions and an image for running the inference pipeline. You can find example results generated by the model on this @@ -965,12 +922,9 @@ by the model on this need inspiration. Optionally, you can also change the random generator seed for latent state initialization and number of steps. -.. note:: - - Consider increasing ``steps`` to get more precise results. + **Note**: Consider increasing ``steps`` to get more precise results. A suggested value is ``100``, but it will take more time to process. - .. code:: ipython3 style = {'description_width': 'initial'} @@ -993,10 +947,9 @@ seed for latent state initialization and number of steps. VBox(children=(Text(value=' Make it in galaxy', description='your text'), IntSlider(value=42, description='see… -.. note:: - - Diffusion process can take some time, depending on what hardware you select. + **Note**: Diffusion process can take some time, depending on what + hardware you select. .. code:: ipython3 @@ -1036,11 +989,351 @@ generation. .. code:: ipython3 - fig = visualize_results(image, processed_image[0], text_prompt.value) + fig = visualize_results(image, processed_image[0], img1_title="Original image", img2_title=f"Prompt: {text_prompt.value}") -.. image:: 231-instruct-pix2pix-image-editing-with-output_files/231-instruct-pix2pix-image-editing-with-output_25_0.png +.. image:: 231-instruct-pix2pix-image-editing-with-output_files/231-instruct-pix2pix-image-editing-with-output_24_0.png Nice. As you can see, the picture has quite a high definition 🔥. + +Quantization +------------------------------------------------------- + +`NNCF `__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. + +According to ``InstructPix2Pix`` pipeline structure, UNet used for +iterative denoising of input. It means that model runs in the cycle +repeating inference on each diffusion step, while other parts of +pipeline take part only once. That is why computation cost and speed of +UNet denoising becomes the critical path in the pipeline. + +The optimization process contains the following steps: + +1. Create a calibration dataset for quantization. +2. Run ``nncf.quantize()`` to obtain quantized model. +3. Save the ``INT8`` model using ``openvino.save_model()`` function. + +Please select below whether you would like to run quantization to +improve model inference speed. + +.. code:: ipython3 + + to_quantize = widgets.Checkbox( + value=True, + description='Quantization', + disabled=False, + ) + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + +.. code:: ipython3 + + import sys + sys.path.append("../utils") + + %load_ext skip_kernel_extension + +Prepare calibration dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We use a portion of +`fusing/instructpix2pix-1000-samples `__ +dataset from Hugging Face as calibration data. To collect intermediate +model inputs for calibration we should customize ``CompiledModel``. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import datasets + from tqdm.notebook import tqdm + from transformers import Pipeline + from typing import Any, Dict, List + + class CompiledModelDecorator(ov.CompiledModel): + def __init__(self, compiled_model, prob: float, data_cache: List[Any] = None): + super().__init__(compiled_model) + self.data_cache = data_cache if data_cache else [] + self.prob = np.clip(prob, 0, 1) + + def __call__(self, *args, **kwargs): + if np.random.rand() >= self.prob: + self.data_cache.append(*args) + return super().__call__(*args, **kwargs) + + def collect_calibration_data(pix2pix_pipeline: Pipeline, subset_size: int) -> List[Dict]: + original_unet = pix2pix_pipeline.unet + pix2pix_pipeline.unet = CompiledModelDecorator(original_unet, prob=0.3) + dataset = datasets.load_dataset("fusing/instructpix2pix-1000-samples", split="train", streaming=True).shuffle(seed=42) + pix2pix_pipeline.set_progress_bar_config(disable=True) + + # Run inference for data collection + pbar = tqdm(total=subset_size) + diff = 0 + for batch in dataset: + prompt = batch["edit_prompt"] + image = batch["input_image"].convert("RGB") + _ = pix2pix_pipeline(prompt, image) + collected_subset_size = len(pix2pix_pipeline.unet.data_cache) + if collected_subset_size >= subset_size: + pbar.update(subset_size - pbar.n) + break + pbar.update(collected_subset_size - diff) + diff = collected_subset_size + + calibration_dataset = pix2pix_pipeline.unet.data_cache + pix2pix_pipeline.set_progress_bar_config(disable=False) + pix2pix_pipeline.unet = original_unet + return calibration_dataset + +.. code:: ipython3 + + %%skip not $to_quantize.value + + UNET_INT8_OV_PATH = Path("unet_int8.xml") + if not UNET_INT8_OV_PATH.exists(): + subset_size = 300 + unet_calibration_data = collect_calibration_data(ov_pipe, subset_size=subset_size) + + +.. parsed-literal:: + + /home/ltalamanova/env_ci/lib/python3.8/site-packages/diffusers/configuration_utils.py:134: FutureWarning: Accessing config attribute `unet` directly via 'OVInstructPix2PixPipeline' object attribute is deprecated. Please access 'unet' over 'OVInstructPix2PixPipeline's config object instead, e.g. 'scheduler.config.unet'. + deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) + + + +.. parsed-literal:: + + 0%| | 0/300 [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/231-instruct-pix2pix-image-editing-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/231-instruct-pix2pix-image-editing-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/231-instruct-pix2pix-image-editing-with-output_files/


../
-231-instruct-pix2pix-image-editing-with-output_..> 16-Aug-2023 01:31             2122470
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/231-instruct-pix2pix-image-editing-with-output_files/


../
+231-instruct-pix2pix-image-editing-with-output_..> 31-Oct-2023 00:35             3699096
+231-instruct-pix2pix-image-editing-with-output_..> 31-Oct-2023 00:35             3646410
 

diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output.rst b/docs/notebooks/232-clip-language-saliency-map-with-output.rst index c7bd680d28289d..75c61fbd0e19d6 100644 --- a/docs/notebooks/232-clip-language-saliency-map-with-output.rst +++ b/docs/notebooks/232-clip-language-saliency-map-with-output.rst @@ -77,31 +77,46 @@ used to build the saliency map. Here is how it can be done: ``crop`` is closer to the ``query``, and it should be a red region on the saliency map. If negative, it should be blue. 5. Update the corresponding region on the ``saliency map``. -6. Repeat steps 2-5 multiple times (``n_iters``). +6. Repeat steps 2-5 multiple times (``n_iters``). + +**Table of contents:** +--- + +- `Initial Implementation with Transformers and + Pytorch <#initial-implementation-with-transformers-and-pytorch>`__ +- `Separate Text and Visual + Processing <#separate-text-and-visual-processing>`__ +- `Convert to OpenVINO™ Intermediate Representation (IR) + Format <#convert-to-openvino-intermediate-representation-ir-format>`__ +- `Inference with OpenVINO™ <#inference-with-openvino>`__ + + - `Select inference device <#select-inference-device>`__ + +- `Accelerate Inference with + AsyncInferQueue <#accelerate-inference-with-asyncinferqueue>`__ +- `Pack the Pipeline into a + Function <#pack-the-pipeline-into-a-function>`__ +- `Interactive demo with + Gradio <#interactive-demo-with-gradio>`__ +- `What To Do Next <#what-to-do-next>`__ .. |image0| image:: https://user-images.githubusercontent.com/29454499/218967961-9858efd5-fff2-4eb0-bde9-60852f4b31cb.JPG .. |image1| image:: https://openaiassets.blob.core.windows.net/$web/clip/draft/20210104b/overview-a.svg -Initial Implementation with Transformers and Pytorch ----------------------------------------------------- +Initial Implementation with Transformers and Pytorch +---------------------------------------------------------------------------------------------- .. code:: ipython3 # Install requirements - !pip install -q "openvino-dev>=2023.0.0" - !pip install -q onnx transformers torch - - -.. parsed-literal:: - - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + %pip install -q "openvino>=2023.1.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers torch gradio .. code:: ipython3 from pathlib import Path - from typing import Tuple, Union + from typing import Tuple, Union, Optional + from urllib.request import urlretrieve from matplotlib import colors import matplotlib.pyplot as plt @@ -112,6 +127,15 @@ Initial Implementation with Transformers and Pytorch from PIL import Image from transformers import CLIPModel, CLIPProcessor + +.. parsed-literal:: + + 2023-09-12 14:10:49.435909: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-12 14:10:49.470573: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-09-12 14:10:50.130215: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + To get the CLIP model, you will use the ``transformers`` library and the official ``openai/clip-vit-base-patch16`` from OpenAI. You can use any CLIP model from the HuggingFace Hub by simply replacing a model @@ -130,15 +154,6 @@ steps. model = CLIPModel.from_pretrained(model_checkpoint).eval() processor = CLIPProcessor.from_pretrained(model_checkpoint) - -.. parsed-literal:: - - 2023-07-18 23:28:44.655634: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-18 23:28:44.687925: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-18 23:28:45.260957: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - Let us write helper functions first. You will generate crop coordinates and size with ``get_random_crop_params``, and get the actual crop with ``get_crop_image``. To update the saliency map with the calculated @@ -181,13 +196,16 @@ formula above. ) -> Union[np.ndarray, torch.Tensor]: return one @ other.T / (np.linalg.norm(one) * np.linalg.norm(other)) -Parameters to be defined: - ``n_iters`` - number of times the procedure -will be repeated. Larger is better, but will require more time to -inference - ``min_crop_size`` - minimum size of the crop window. A -smaller size will increase the resolution of the saliency map but may -require more iterations - ``query`` - text that will be used to query -the image - ``image`` - the actual image that will be queried. You will -download the image from a link +Parameters to be defined: + +- ``n_iters`` - number of times the procedure will be repeated. Larger + is better, but will require more time to inference +- ``min_crop_size`` - minimum size of the crop window. A smaller size + will increase the resolution of the saliency map but may require more + iterations +- ``query`` - text that will be used to query the image +- ``image`` - the actual image that will be queried. You will download + the image from a link The image at the beginning was acquired with ``n_iters=2000`` and ``min_crop_size=50``. You will start with the lower number of inferences @@ -200,8 +218,9 @@ parameters at the end, when you get an optimized model. min_crop_size = 50 query = "Who developed the Theory of General Relativity?" - image_url = "https://pbs.twimg.com/media/Ee_aDODUMAAEBjW?format=jpg&name=small" - image = Image.open(requests.get(image_url, stream=True).raw) + image_path = Path("example.jpg") + urlretrieve("https://www.storypick.com/wp-content/uploads/2016/01/AE-2.jpg", image_path) + image = Image.open(image_path) im_tensor = np.array(image) x_dim, y_dim = image.size @@ -280,8 +299,8 @@ Let us overlay the saliency map on the image: .. code:: ipython3 - def plot_saliency_map(image_tensor: np.array, saliency_map: np.array, query: str) -> None: - plt.figure(dpi=150) + def plot_saliency_map(image_tensor: np.ndarray, saliency_map: np.ndarray, query: Optional[str]) -> None: + fig = plt.figure(dpi=150) plt.imshow(image_tensor) plt.imshow( saliency_map, @@ -289,20 +308,22 @@ Let us overlay the saliency map on the image: cmap="jet", alpha=0.5, # make saliency map trasparent to see original picture ) - plt.title(f'Query: "{query}"') + if query: + plt.title(f'Query: "{query}"') plt.axis("off") - plt.show() + return fig + - plot_saliency_map(im_tensor, saliency_map, query) + plot_saliency_map(im_tensor, saliency_map, query); .. image:: 232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_17_0.png -Separate Text and Visual Processing ------------------------------------ +Separate Text and Visual Processing +----------------------------------------------------------------------------- The code above is functional, but there are some repeated computations that can be avoided. The text embedding can be computed once because it @@ -333,7 +354,7 @@ obtain embeddings for the cropped images. similarity = cosine_similarity(text_embeds, image_embeds).item() - initial_similarity update_saliency_map(saliency_map, similarity, x, y, crop_size) - plot_saliency_map(im_tensor, saliency_map, query) + plot_saliency_map(im_tensor, saliency_map, query); @@ -349,8 +370,8 @@ obtain embeddings for the cropped images. The result might be slightly different because you use random crops to build a saliency map. -Convert to OpenVINO™ Intermediate Representation (IR) Format ------------------------------------------------------------- +Convert to OpenVINO™ Intermediate Representation (IR) Format +------------------------------------------------------------------------------------------------------ The process of building a saliency map can be quite time-consuming. To speed it up, you will use OpenVINO. OpenVINO is an inference framework @@ -358,93 +379,39 @@ designed to run pre-trained neural networks efficiently. One way to use it is to convert a model from its original framework representation to an OpenVINO Intermediate Representation (IR) format and then load it for inference. The model currently uses PyTorch. To get an IR, you need to -first convert the PyTorch model to the ONNX format. It can be done with -the ``torch.onnx.export`` function. See the `PyTorch -documentation `__ for more -information on ONNX conversion. +use Model Conversion API. ``ov.convert_model`` function accepts PyTorch +model object and example input and converts it to OpenVINO Model +instance, that ready to load on device using ``ov.compile_model`` or can +be saved on disk using ``ov.save_model``. To separate model on text and +image parts, we overload forward method with ``get_text_features`` and +``get_image_features`` methods respectively. Internally, PyTorch +conversion to OpenVINO involves TorchScript tracing. For achieving +better conversion results, we need to guarantee that model can be +successfully traced. ``model.config.torchscript = True`` parameters +allows to prepare HuggingFace models for TorchScript tracing. More +details about that can be found in HuggingFace Transformers +`documentation `__ .. code:: ipython3 - model_name = model_checkpoint.split('/')[-1] - - onnx_model_path = Path("onnx") / f"{model_name}.onnx" - onnx_model_path.parent.mkdir(exist_ok=True) - - torch.onnx.export( - model, # model is being run - dict(inputs), - onnx_model_path, # where to save the model - opset_version=14, # the ONNX version to export the model to - input_names=["input_ids", "pixel_values", "attention_mask"], # the model's input names - output_names=["logits_per_image", "logits_per_text", "text_embeds", "image_embeds"], # the model's output names - dynamic_axes={ # variable length axes - "input_ids": {0: "batch", 1: "sequence"}, - "pixel_values": {0: "batch", 1: "num_channels", 2: "height", 3: "width"}, - "attention_mask": {0: "batch", 1: "sequence"}, - "logits_per_image": {0: "batch"}, - "logits_per_text": {0: "batch"}, - "text_embeds": {0: "batch"}, - "image_embeds": {0: "batch"} - } - ) - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-453/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:286: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-453/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:326: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-453/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:294: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-453/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:303: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attention_mask.size() != (bsz, 1, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-453/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:5408: UserWarning: Exporting aten::index operator of advanced indexing in opset 14 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results. - warnings.warn( - - -Currently, you can load an ONNX file to OpenVINO and serialize the -resulting model into an IR. This may not be optimal for your use case. -The CLIP model contains two separate parts: the image encoder and the -text encoder. You can split the CLIP into two models and call them -separately. - -To convert the model to IR, you can use `Model Optimizer -(MO) `__. -When you convert a model to the OpenVINO format, Model Optimizer enables -specifying the inputs and outputs you want to use. During the -conversion, it will trim the remaining parts of the model. Therefore, -when you pass the text inputs and outputs, the MO will “extract” only -the text part of the model. - -You already know the required outputs: ``text_embeds`` and -``image_embeds``. The input for the image is ``pixel_values``, and the -remaining ``input_ids`` and ``attention_mask`` correspond to the text. -You also make the image input of the model static because there is no -variation in the input size after preprocessing. - -.. code:: ipython3 - - from openvino.runtime import serialize - from openvino.tools import mo + import openvino as ov + model_name = model_checkpoint.split("/")[-1] - text_ov_model = mo.convert_model( - onnx_model_path, - compress_to_fp16=True, - input="input_ids,attention_mask", - output="text_embeds", + model.config.torchscript = True + model.forward = model.get_text_features + text_ov_model = ov.convert_model( + model, + example_input={"input_ids": inputs.input_ids, "attention_mask": inputs.attention_mask} ) # get image size after preprocessing from the processor crops_info = processor.image_processor.crop_size.values() if hasattr(processor, "image_processor") else processor.feature_extractor.crop_size.values() - processed_image_height_width = ",".join(map(str, crops_info)) - image_ov_model = mo.convert_model( - onnx_model_path, - compress_to_fp16=True, - input="pixel_values", - input_shape=f"[1,3,{processed_image_height_width}]", - output="image_embeds", + model.forward = model.get_image_features + image_ov_model = ov.convert_model( + model, + example_input={"pixel_values": inputs.pixel_values}, + input=[1,3, *crops_info], ) ov_dir = Path("ir") @@ -453,23 +420,55 @@ variation in the input size after preprocessing. image_model_path = ov_dir / f"{model_name}_image.xml" # write resulting models on disk - serialize(text_ov_model, str(text_model_path)) - serialize(image_ov_model, str(image_model_path)) + ov.save_model(text_ov_model, text_model_path) + ov.save_model(image_ov_model, image_model_path) + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. .. parsed-literal:: + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + + +.. parsed-literal:: + + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:287: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:295: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:304: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attention_mask.size() != (bsz, 1, tgt_len, src_len): + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:327: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): Now, you have two separate models for text and images, stored on disk and ready to be loaded and inferred with OpenVINO™. -Inference with OpenVINO™ ------------------------- +Inference with OpenVINO™ +------------------------------------------------------------------ 1. Create an instance of the ``Core`` object that will handle any interaction with OpenVINO runtime for you. @@ -480,17 +479,15 @@ Inference with OpenVINO™ .. code:: ipython3 - from openvino.runtime import Core - - core = Core() + core = ov.Core() text_model = core.read_model(text_model_path) image_model = core.read_model(image_model_path) -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -510,7 +507,7 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') @@ -536,8 +533,8 @@ the inference process is mostly similar. ) image_inputs = text_inputs.pop("pixel_values") - text_embeds = text_model(text_inputs)[text_model.output()] - image_embeds = image_model(image_inputs)[image_model.output()] + text_embeds = text_model(text_inputs)[0] + image_embeds = image_model(image_inputs)[0] initial_similarity = cosine_similarity(text_embeds, image_embeds) saliency_map = np.zeros((y_dim, x_dim)) @@ -552,7 +549,7 @@ the inference process is mostly similar. similarity = cosine_similarity(text_embeds, image_embeds) - initial_similarity update_saliency_map(saliency_map, similarity, x, y, crop_size) - plot_saliency_map(im_tensor, saliency_map, query) + plot_saliency_map(im_tensor, saliency_map, query); @@ -562,11 +559,11 @@ the inference process is mostly similar. -.. image:: 232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_31_1.png +.. image:: 232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_29_1.png -Accelerate Inference with ``AsyncInferQueue`` ---------------------------------------------- +Accelerate Inference with ``AsyncInferQueue`` +--------------------------------------------------------------------------------------- Up until now, the pipeline was synchronous, which means that the data preparation, model input population, model inference, and output @@ -597,7 +594,6 @@ performance hint. .. code:: ipython3 from typing import Dict, Any - from openvino.runtime import AsyncInferQueue, InferRequest image_model = core.read_model(image_model_path) @@ -622,9 +618,11 @@ performance hint. saliency_map = np.zeros((y_dim, x_dim)) Your callback should do the same thing that you did after inference in -the sync mode: - Pull the image embeddings from an inference request. - -Compute cosine similarity between text and image embeddings. - Update -saliency map based. +the sync mode: + +- Pull the image embeddings from an inference request. +- Compute cosine similarity between text and image embeddings. +- Update saliency map based. If you do not change the progress bar, it will show the progress of pushing data to the inference queue. To track the actual progress, you @@ -634,7 +632,7 @@ should pass a progress bar object and call ``update`` method after .. code:: ipython3 def completion_callback( - infer_request: InferRequest, # inferente result + infer_request: ov.InferRequest, # inferente result user_data: Dict[str, Any], # data that you passed along with input pixel values ) -> None: pbar = user_data.pop("pbar") @@ -648,38 +646,40 @@ should pass a progress bar object and call ``update`` method after pbar.update(1) # update the progress bar - infer_queue = AsyncInferQueue(image_model) + infer_queue = ov.AsyncInferQueue(image_model) infer_queue.set_callback(completion_callback) .. code:: ipython3 - with tqdm.notebook.tqdm(total=n_iters) as pbar: - for _ in range(n_iters): - x, y, crop_size = get_random_crop_params(y_dim, x_dim, min_crop_size) - im_crop = get_cropped_image(im_tensor, x, y, crop_size) - - image_inputs = processor(images=[im_crop], return_tensors="np") + def infer(im_tensor, x_dim, y_dim, text_embeds, image_embeds, initial_similarity, saliency_map, query, n_iters, min_crop_size, _tqdm=tqdm.notebook.tqdm, include_query=True): + with _tqdm(total=n_iters) as pbar: + for _ in range(n_iters): + x, y, crop_size = get_random_crop_params(y_dim, x_dim, min_crop_size) + im_crop = get_cropped_image(im_tensor, x, y, crop_size) + + image_inputs = processor(images=[im_crop], return_tensors="np") + + # push data to the queue + infer_queue.start_async( + # pass inference data as usual + image_inputs.pixel_values, + # the data that will be passed to the callback after the inference complete + { + "text_embeds": text_embeds, + "saliency_map": saliency_map, + "initial_similarity": initial_similarity, + "x": x, + "y": y, + "crop_size": crop_size, + "pbar": pbar, + } + ) - # push data to the queue - infer_queue.start_async( - # pass inference data as usual - image_inputs.pixel_values, - # the data that will be passed to the callback after the inference complete - { - "text_embeds": text_embeds, - "saliency_map": saliency_map, - "initial_similarity": initial_similarity, - "x": x, - "y": y, - "crop_size": crop_size, - "pbar": pbar, - } - ) + # after you pushed all data to the queue you wait until all callbacks finished + infer_queue.wait_all() - # after you pushed all data to the queue you wait until all callbacks finished - infer_queue.wait_all() - - plot_saliency_map(im_tensor, saliency_map, query) + return plot_saliency_map(im_tensor, saliency_map, query if include_query else None) + infer(im_tensor, x_dim, y_dim, text_embeds, image_embeds, initial_similarity, saliency_map, query, n_iters, min_crop_size, _tqdm=tqdm.notebook.tqdm, include_query=True); @@ -689,11 +689,11 @@ should pass a progress bar object and call ``update`` method after -.. image:: 232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_37_1.png +.. image:: 232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_35_1.png -Pack the Pipeline into a Function ---------------------------------- +Pack the Pipeline into a Function +--------------------------------------------------------------------------- Let us wrap all code in the function and add a user interface to it. @@ -702,7 +702,7 @@ Let us wrap all code in the function and add a user interface to it. import ipywidgets as widgets - def build_saliency_map(image: Image, query: str, n_iters: int = n_iters, min_crop_size=min_crop_size): + def build_saliency_map(image: Image, query: str, n_iters: int = n_iters, min_crop_size=min_crop_size, _tqdm=tqdm.notebook.tqdm, include_query=True): x_dim, y_dim = image.size im_tensor = np.array(image) @@ -717,27 +717,7 @@ Let us wrap all code in the function and add a user interface to it. initial_similarity = cosine_similarity(text_embeds, image_embeds) saliency_map = np.zeros((y_dim, x_dim)) - with tqdm.notebook.tqdm(total=n_iters) as pbar: - for _ in range(n_iters): - x, y, crop_size = get_random_crop_params(y_dim, x_dim, min_crop_size) - im_crop = get_cropped_image(im_tensor, x, y, crop_size) - - image_inputs = processor(images=[im_crop], return_tensors="np") - infer_queue.start_async( - image_inputs.pixel_values, - { - "text_embeds": text_embeds, - "saliency_map": saliency_map, - "initial_similarity": initial_similarity, - "x": x, - "y": y, - "crop_size": crop_size, - "pbar": pbar, - } - ) - infer_queue.wait_all() - - plot_saliency_map(im_tensor, saliency_map, query) + return infer(im_tensor, x_dim, y_dim, text_embeds, image_embeds, initial_similarity, saliency_map, query, n_iters, min_crop_size, _tqdm=_tqdm, include_query=include_query) The first version will enable passing a link to the image, as you have done so far in the notebook. @@ -820,19 +800,72 @@ The second version will enable loading the image from your computer. interactive(children=(FileUpload(value=(), accept='image/*', description='Image file'), Text(value='', continu… -What To Do Next ---------------- +Interactive demo with Gradio +---------------------------------------------------------------------- + +.. code:: ipython3 + + import gradio as gr + + + def _process(image, query, n_iters, min_crop_size, _=gr.Progress(track_tqdm=True)): + saliency_map = build_saliency_map(image, query, n_iters, min_crop_size, _tqdm=tqdm.tqdm, include_query=False) + + return saliency_map + + + demo = gr.Interface( + _process, + [ + gr.Image(label="Image", type="pil"), + gr.Textbox(label="Query"), + gr.Slider(1, 10000, n_iters, label="Number of iterations"), + gr.Slider(1, 200, min_crop_size, label="Minimum crop size"), + ], + gr.Plot(label="Result"), + examples=[[image_path, query]], + ) + try: + demo.queue().launch(debug=False) + except Exception: + demo.queue().launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ + +What To Do Next +--------------------------------------------------------- Now that you have a convenient interface and accelerated inference, you -can explore the CLIP capabilities further. For example: - Can CLIP read? -Can it detect text regions in general and specific words on the image? - -Which famous people and places does CLIP know? - Can CLIP identify -places on a map? Or planets, stars, and constellations? - Explore -different CLIP models from HuggingFace Hub: just change the -``model_checkpoint`` at the beginning of the notebook. - Add batch -processing to the pipeline: modify ``get_random_crop_params``, -``get_cropped_image`` and ``update_saliency_map`` functions to process -multiple crop images at once and accelerate the pipeline even more. - -Optimize models with -`NNCF `__ -to get further acceleration. +can explore the CLIP capabilities further. For example: + +- Can CLIP read? Can it detect text regions in general and specific + words on the image? +- Which famous people and places does CLIP know? +- Can CLIP identify places on a map? Or planets, stars, and + constellations? +- Explore different CLIP models from HuggingFace Hub: just change the + ``model_checkpoint`` at the beginning of the notebook. +- Add batch processing to the pipeline: modify + ``get_random_crop_params``, ``get_cropped_image`` and + ``update_saliency_map`` functions to process multiple crop images at + once and accelerate the pipeline even more. +- Optimize models with + `NNCF `__ + to get further acceleration. You can find example how to quantize + CLIP model in `this + notebook <../228-clip-zero-shot-image-classification>`__ diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_15_0.png b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_15_0.png index d9334001b5c35a..bac75bcf983091 100644 --- a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_15_0.png +++ b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_15_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de55a51d782774cc789cdf9d8759541f9e5aabef78730786641c2affc8bfb09e -size 73946 +oid sha256:80b23dd6a69c615e3d374ecfce473bbbfa1491c0e538a391d1c2ca88ea53ca37 +size 74041 diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_17_0.png b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_17_0.png index 69ac5c8055b673..4beb6c5c300ecb 100644 --- a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_17_0.png +++ b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_17_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:369238063ffcca3d69de022cae0f47ef5dab6c9440f15a60cbcf325ba4543e9e -size 499941 +oid sha256:754dcfb3e248c5ab823db8785e22d9c59a3571a0b6ccd6d028dab59446345797 +size 473017 diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_19_1.png b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_19_1.png index 286025078a3401..8d966d74bee754 100644 --- a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_19_1.png +++ b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_19_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38cfca02bb2ce94e34e3b1e8302a4deb274696b21b3c144c502f9019441530e5 -size 502742 +oid sha256:2bf4b05549342476c8b2e37070ff4e8e6b141ae8d3c20c1ac2b18babbf1208d4 +size 472958 diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_29_1.png b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_29_1.png new file mode 100644 index 00000000000000..ce4a0fb00a9f06 --- /dev/null +++ b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_29_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a1f5350d5312a5a71439c08b6fdab7fb2cd48ca31945e5b96e500ca33ec02a +size 475731 diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_31_1.png b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_31_1.png deleted file mode 100644 index 53305025624bbe..00000000000000 --- a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_31_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dfd7bb983b20aa47ced3e0c311860da50090522d81d2e0f75fb92cd09e1648cb -size 501301 diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_35_1.png b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_35_1.png new file mode 100644 index 00000000000000..dbaf3943e91332 --- /dev/null +++ b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_35_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef15ba5b5e6ade63354a852c28352cdcb5bce50f0014ac67d00a909fc2704723 +size 471008 diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_37_1.png b/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_37_1.png deleted file mode 100644 index 9544a8a17ec38d..00000000000000 --- a/docs/notebooks/232-clip-language-saliency-map-with-output_files/232-clip-language-saliency-map-with-output_37_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2683fdc809e1aaa072f65543a725c245870345c895b17cd430bac2c55e3f2409 -size 496940 diff --git a/docs/notebooks/232-clip-language-saliency-map-with-output_files/index.html b/docs/notebooks/232-clip-language-saliency-map-with-output_files/index.html new file mode 100644 index 00000000000000..12311594f67477 --- /dev/null +++ b/docs/notebooks/232-clip-language-saliency-map-with-output_files/index.html @@ -0,0 +1,11 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/232-clip-language-saliency-map-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/232-clip-language-saliency-map-with-output_files/


../
+232-clip-language-saliency-map-with-output_15_0..> 31-Oct-2023 00:35               74041
+232-clip-language-saliency-map-with-output_17_0..> 31-Oct-2023 00:35              473017
+232-clip-language-saliency-map-with-output_19_1..> 31-Oct-2023 00:35              472958
+232-clip-language-saliency-map-with-output_29_1..> 31-Oct-2023 00:35              475731
+232-clip-language-saliency-map-with-output_35_1..> 31-Oct-2023 00:35              471008
+

+ diff --git a/docs/notebooks/233-blip-convert-with-output.rst b/docs/notebooks/233-blip-convert-with-output.rst new file mode 100644 index 00000000000000..2061f204b64aec --- /dev/null +++ b/docs/notebooks/233-blip-convert-with-output.rst @@ -0,0 +1,680 @@ +Visual Question Answering and Image Captioning using BLIP and OpenVINO +====================================================================== + +Humans perceive the world through vision and language. A longtime goal +of AI is to build intelligent agents that can understand the world +through vision and language inputs to communicate with humans through +natural language. In order to achieve this goal, vision-language +pre-training has emerged as an effective approach, where deep neural +network models are pre-trained on large scale image-text datasets to +improve performance on downstream vision-language tasks, such as +image-text retrieval, image captioning, and visual question answering. + +`BLIP `__ is a language-image +pre-training framework for unified vision-language understanding and +generation. BLIP achieves state-of-the-art results on a wide range of +vision-language tasks. This tutorial demonstrates how to use BLIP for +visual question answering and image captioning. + +The tutorial consists of the following parts: + +1. Instantiate a BLIP model. +2. Convert the BLIP model to OpenVINO IR. +3. Run visual question answering and image captioning with OpenVINO. + +**Table of contents:** + + +- `Background <#background>`__ + + - `Image Captioning <#image-captioning>`__ + - `Visual Question Answering <#visual-question-answering>`__ + +- `Instantiate Model <#instantiate-model>`__ +- `Convert Models to OpenVINO + IR <#convert-models-to-openvino-ir>`__ + + - `Vision Model <#vision-model>`__ + - `Text Encoder <#text-encoder>`__ + - `Text Decoder <#text-decoder>`__ + +- `Run OpenVINO Model <#run-openvino-model>`__ + + - `Prepare Inference + Pipeline <#prepare-inference-pipeline>`__ + - `Select inference device <#select-inference-device>`__ + - `Image Captioning <#image-captioning>`__ + - `Question Answering <#question-answering>`__ + +- `Interactive demo <#interactive-demo>`__ +- `Next steps <#next-steps>`__ + +Background +---------------------------------------------------- + +Visual language processing is a branch of artificial intelligence that +focuses on creating algorithms designed to enable computers to more +accurately understand images and their content. + +Popular tasks include: + +- **Text to Image Retrieval** - a semantic task that aims to find the + most relevant image for a given text description. +- **Image Captioning** - a semantic task that aims to provide a text + description for image content. +- **Visual Question Answering** - a semantic task that aims to answer + questions based on image content. + +As shown in the diagram below, these three tasks differ in the input +provided to the AI system. For text-to-image retrieval, you have a +predefined gallery of images for search and a user-requested text +description (query). Image captioning can be represented as a particular +case of visual question answering, where you have a predefined question +“What is in the picture?” and various images provided by a user. For +visual question answering, both the text-based question and image +context are variables requested by a user. + +|image0| + +This notebook does not focus on Text to Image retrieval. Instead, it +considers Image Captioning and Visual Question Answering. + +Image Captioning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Image Captioning is the task of describing the content of an image in +words. This task lies at the intersection of computer vision and natural +language processing. Most image captioning systems use an +encoder-decoder framework, where an input image is encoded into an +intermediate representation of the information in the image, and then +decoded into a descriptive text sequence. + +|image1| + +Visual Question Answering +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Visual Question Answering (VQA) is the task of answering text-based +questions about image content. + +|image2| + +For a better understanding of how VQA works, let us consider a +traditional NLP task like Question Answering, which aims to retrieve the +answer to a question from a given text input. Typically, a question +answering pipeline consists of three steps: + +|image3| + +1. Question analysis - analysis of provided question in natural language + form to understand the object in the question and additional context. + For example, if you have a question like “How many bridges in + Paris?”, question words *“how many”* gives a hint that the answer is + more likely to be a number, *“bridges”* is the target object of the + question and *" in Paris"* serves as additional context for the + search. +2. Build query for search - use analyzed results to formalize query for + finding the most relevant information. +3. Perform a search in the knowledge base - send the query to a + knowledge base, typically provided text documents or databases serve + as a source of knowledge. + +|image4| + +The difference between text-based question answering and visual question +answering is that an image is used as context and the knowledge base. + +|image5| + +Answering arbitrary questions about images is a complex problem because +it requires involving a lot of computer vision sub-tasks. In the table +below, you can find an example of questions and the required computer +vision skills to find answers. + ++-----------------------------+----------------------------------------+ +| Computer vision task | Question examples | ++=============================+========================================+ +| Object recognition | What is shown in the picture? What is | +| | it? | ++-----------------------------+----------------------------------------+ +| Object detection | Is there any object (dog, man, book) | +| | in the image? Where is … located? | ++-----------------------------+----------------------------------------+ +| Object and image attribute | What color is an umbrella? Does this | +| recognition | man wear glasses? Is there color in | +| | the image? | ++-----------------------------+----------------------------------------+ +| Scene recognition | Is it rainy? What celebration is | +| | pictured? | ++-----------------------------+----------------------------------------+ +| Object counting | How many players are there on the | +| | football field? How many steps are | +| | there on the stairs? | ++-----------------------------+----------------------------------------+ +| Activity recognition | Is the baby crying? What is the woman | +| | cooking? What are they doing? | ++-----------------------------+----------------------------------------+ +| Spatial relationships among | What is located between the sofa and | +| objects | the armchair? What is in the bottom | +| | left corner? | ++-----------------------------+----------------------------------------+ +| Commonsense reasoning | Does she have 100% vision? Does this | +| | person have children? | ++-----------------------------+----------------------------------------+ +| Knowledge-based reasoning | Is it a vegetarian pizza? | ++-----------------------------+----------------------------------------+ +| Text recognition | What is the title of the book? What is | +| | shown on the screen? | ++-----------------------------+----------------------------------------+ + +There are a lot of applications for visual question answering: + +- Aid Visually Impaired Persons: VQA models can be used to reduce + barriers for visually impaired people by helping them get information + about images from the web and the real world. +- Education: VQA models can be used to improve visitor experiences at + museums by enabling observers to directly ask questions they are + interested in or to bring more interactivity to schoolbooks for + children interested in acquiring specific knowledge. +- E-commerce: VQA models can retrieve information about products using + photos from online stores. +- Independent expert assessment: VQA models can be provide objective + assessments in sports competitions, medical diagnosis, and forensic + examination. + +.. |image0| image:: https://user-images.githubusercontent.com/29454499/221755717-a5b51b7e-523c-461f-b30c-4edbfaf9a134.png +.. |image1| image:: https://user-images.githubusercontent.com/29454499/221640847-1868117c-aac0-4806-99a4-34f218e98bb8.png +.. |image2| image:: https://user-images.githubusercontent.com/29454499/221641984-3c6d8b2f-dd0d-4302-a4d8-0f8564fca772.png +.. |image3| image:: https://user-images.githubusercontent.com/29454499/221760881-378f1ea8-eadc-4610-aff0-69ecabf62fff.png +.. |image4| image:: https://user-images.githubusercontent.com/29454499/222094861-3cafdf9f-d700-4741-b6c5-fb09c1a4da9a.png +.. |image5| image:: https://user-images.githubusercontent.com/29454499/222095118-3d5826e4-2662-4d1c-abf2-a515f23d6d6a.png + +Instantiate Model +----------------------------------------------------------- + +The BLIP model was proposed in the `BLIP: Bootstrapping Language-Image +Pre-training for Unified Vision-Language Understanding and +Generation `__ paper. + +.. figure:: https://github.com/salesforce/BLIP/raw/main/BLIP.gif + :alt: blip.gif + + blip.gif + +To pre-train a unified vision-language model with both understanding and +generation capabilities, BLIP introduces a multimodal mixture of an +encoder-decoder and a multi-task model which can operate in one of the +three modes: + +- **Unimodal encoders**, which separately encode images and text. The + image encoder is a vision transformer. The text encoder is the same + as BERT. +- **Image-grounded text encoder**, which injects visual information by + inserting a cross-attention layer between the self-attention layer + and the feed-forward network for each transformer block of the text + encoder. +- **Image-grounded text decoder**, which replaces the bi-directional + self-attention layers in the text encoder with causal self-attention + layers. + +More details about the model can be found in the `research +paper `__, `Salesforce +blog `__, +`GitHub repo `__ and `Hugging Face +model +documentation `__. + +In this tutorial, you will use the +`blip-vqa-base `__ +model available for download from `Hugging +Face `__. The same actions are also applicable +to other similar models from the BLIP family. Although this model class +is designed to perform question answering, its components can also be +reused for image captioning. + +To start working with the model, you need to instantiate the +``BlipForQuestionAnswering`` class, using ``from_pretrained`` method. +``BlipProcessor`` is a helper class for preparing input data for both +text and vision modalities and postprocessing of generation results. + +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision + %pip install -q "transformers >= 4.26.0" gradio "openvino>=2023.1.0" matplotlib + +.. code:: ipython3 + + import sys + import time + from PIL import Image + from transformers import BlipProcessor, BlipForQuestionAnswering + + sys.path.append("../utils") + from notebook_utils import download_file + + # get model and processor + processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") + model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") + + # setup test input: download and read image, prepare question + img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' + download_file(img_url, "demo.jpg") + raw_image = Image.open("demo.jpg").convert('RGB') + question = "how many dogs are in the picture?" + # preprocess input data + inputs = processor(raw_image, question, return_tensors="pt") + + start = time.perf_counter() + # perform generation + out = model.generate(**inputs) + end = time.perf_counter() - start + + # postprocess result + answer = processor.decode(out[0], skip_special_tokens=True) + + +.. parsed-literal:: + + 2023-10-27 13:39:08.110243: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-27 13:39:08.267533: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-27 13:39:09.184395: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /home/ea/work/ov_venv/lib/python3.8/site-packages/torch/cuda/__init__.py:138: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 11080). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.) + return torch._C._cuda_getDeviceCount() > 0 + + +.. parsed-literal:: + + 'demo.jpg' already exists. + + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/generation/utils.py:1260: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation. + warnings.warn( + + +.. code:: ipython3 + + print(f"Processing time: {end:.4f} s") + + +.. parsed-literal:: + + Processing time: 0.5272 s + + +.. code:: ipython3 + + from utils import visualize_results + + fig = visualize_results(raw_image, answer, question) + + + +.. image:: 233-blip-convert-with-output_files/233-blip-convert-with-output_7_0.png + + +Convert Models to OpenVINO IR +----------------------------------------------------------------------- + +Starting from OpenVINO 2023.0 release, OpenVINO supports direct PyTorch +models conversion to OpenVINO Intermediate Representation (IR) format to +take the advantage of advanced OpenVINO optimization tools and features. +You need to provide a model object, input data for model tracing to +OpenVINO Model Conversion API. ``ov.convert_model`` function convert +PyTorch model instance to ``ov.Model`` object that can be used for +compilation on device or saved on disk using ``ov.save_model`` in +compressed to FP16 format. + +The model consists of three parts: + +- vision_model - an encoder for image representation. +- text_encoder - an encoder for input query, used for question + answering and text-to-image retrieval only. +- text_decoder - a decoder for output answer. + +To be able to perform multiple tasks, using the same model components, +you should convert each part independently. + +Vision Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The vision model accepts float input tensors with the [1,3,384,384] +shape, containing RGB image pixel values normalized in the [0,1] range. + +.. code:: ipython3 + + import torch + from pathlib import Path + import openvino as ov + + VISION_MODEL_OV = Path("blip_vision_model.xml") + vision_model = model.vision_model + vision_model.eval() + + # check that model works and save it outputs for reusage as text encoder input + with torch.no_grad(): + vision_outputs = vision_model(inputs["pixel_values"]) + + # if openvino model does not exist, convert it to IR + if not VISION_MODEL_OV.exists(): + + # export pytorch model to ov.Model + with torch.no_grad(): + ov_vision_model = ov.convert_model(vision_model, example_input=inputs["pixel_values"]) + # save model on disk for next usages + ov.save_model(ov_vision_model, VISION_MODEL_OV) + print(f"Vision model successfuly converted and saved to {VISION_MODEL_OV}") + else: + print(f"Vision model will be loaded from {VISION_MODEL_OV}") + + +.. parsed-literal:: + + Vision model will be loaded from blip_vision_model.xml + + +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The text encoder is used by visual question answering tasks to build a +question embedding representation. It takes ``input_ids`` with a +tokenized question and output image embeddings obtained from the vision +model and attention masks for them. + +.. code:: ipython3 + + TEXT_ENCODER_OV = Path("blip_text_encoder.xml") + + + text_encoder = model.text_encoder + text_encoder.eval() + + # if openvino model does not exist, convert it to IR + if not TEXT_ENCODER_OV.exists(): + # prepare example inputs + image_embeds = vision_outputs[0] + image_attention_mask = torch.ones(image_embeds.size()[:-1], dtype=torch.long) + input_dict = {"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "encoder_hidden_states": image_embeds, "encoder_attention_mask": image_attention_mask} + # export PyTorch model + with torch.no_grad(): + ov_text_encoder = ov.convert_model(text_encoder, example_input=input_dict) + # save model on disk for next usages + ov.save_model(ov_text_encoder, TEXT_ENCODER_OV) + print(f"Text encoder successfuly converted and saved to {TEXT_ENCODER_OV}") + else: + print(f"Text encoder will be loaded from {TEXT_ENCODER_OV}") + + +.. parsed-literal:: + + Text encoder will be loaded from blip_text_encoder.xml + + +Text Decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The text decoder is responsible for generating the sequence of tokens to +represent model output (answer to question or caption), using an image +(and question, if required) representation. The generation approach is +based on the assumption that the probability distribution of a word +sequence can be decomposed into the product of conditional next word +distributions. In other words, model predicts the next token in the loop +guided by previously generated tokens until the stop-condition will be +not reached (generated sequence of maximum length or end of string token +obtained). The way the next token will be selected over predicted +probabilities is driven by the selected decoding methodology. You can +find more information about the most popular decoding methods in this +`blog `__. The entry point +for the generation process for models from the Hugging Face Transformers +library is the ``generate`` method. You can find more information about +its parameters and configuration in the +`documentation `__. +To preserve flexibility in the selection decoding methodology, you will +convert only model inference for one step. + +To optimize the generation process and use memory more efficiently, the +``use_cache=True`` option is enabled. Since the output side is +auto-regressive, an output token hidden state remains the same once +computed for every further generation step. Therefore, recomputing it +every time you want to generate a new token seems wasteful. With the +cache, the model saves the hidden state once it has been computed. The +model only computes the one for the most recently generated output token +at each time step, re-using the saved ones for hidden tokens. This +reduces the generation complexity from O(n^3) to O(n^2) for a +transformer model. More details about how it works can be found in this +`article `__. +With this option, the model gets the previous step’s hidden states as +input and additionally provides hidden states for the current step as +output. Initially, you have no previous step hidden states, so the first +step does not require you to provide them, but we should initialize them +by default values. In PyTorch, past hidden state outputs are represented +as a list of pairs (hidden state for key, hidden state for value] for +each transformer layer in the model. OpenVINO model does not support +nested outputs, they will be flattened. + +Similar to ``text_encoder``, ``text_decoder`` can work with input +sequences of different lengths and requires preserving dynamic input +shapes. + +.. code:: ipython3 + + text_decoder = model.text_decoder + text_decoder.eval() + + TEXT_DECODER_OV = Path("blip_text_decoder_with_past.xml") + + # prepare example inputs + input_ids = torch.tensor([[30522]]) # begin of sequence token id + attention_mask = torch.tensor([[1]]) # attention mask for input_ids + encoder_hidden_states = torch.rand((1, 10, 768)) # encoder last hidden state from text_encoder + encoder_attention_mask = torch.ones((1, 10), dtype=torch.long) # attention mask for encoder hidden states + + input_dict = {"input_ids": input_ids, "attention_mask": attention_mask, "encoder_hidden_states": encoder_hidden_states, "encoder_attention_mask": encoder_attention_mask} + text_decoder_outs = text_decoder(**input_dict) + # extend input dictionary with hidden states from previous step + input_dict["past_key_values"] = text_decoder_outs["past_key_values"] + + text_decoder.config.torchscript = True + if not TEXT_DECODER_OV.exists(): + # export PyTorch model + with torch.no_grad(): + ov_text_decoder = ov.convert_model(text_decoder, example_input=input_dict) + # save model on disk for next usages + ov.save_model(ov_text_decoder, TEXT_DECODER_OV) + print(f"Text decoder successfuly converted and saved to {TEXT_DECODER_OV}") + else: + print(f"Text decoder will be loaded from {TEXT_DECODER_OV}") + + +.. parsed-literal:: + + Text decoder will be loaded from blip_text_decoder_with_past.xml + + +Run OpenVINO Model +------------------------------------------------------------ + +Prepare Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As discussed before, the model consists of several blocks which can be +reused for building pipelines for different tasks. In the diagram below, +you can see how image captioning works: + +|image21| + +The visual model accepts the image preprocessed by ``BlipProcessor`` as +input and produces image embeddings, which are directly passed to the +text decoder for generation caption tokens. When generation is finished, +output sequence of tokens is provided to ``BlipProcessor`` for decoding +to text using a tokenizer. + +The pipeline for question answering looks similar, but with additional +question processing. In this case, image embeddings and question +tokenized by ``BlipProcessor`` are provided to the text encoder and then +multimodal question embedding is passed to the text decoder for +performing generation of answers. + +|image31| + +The next step is implementing both pipelines using OpenVINO models. + +.. |image21| image:: https://user-images.githubusercontent.com/29454499/221865836-a56da06e-196d-449c-a5dc-4136da6ab5d5.png +.. |image31| image:: https://user-images.githubusercontent.com/29454499/221868167-d0081add-d9f3-4591-80e7-4753c88c1d0a.png + +.. code:: ipython3 + + # create OpenVINO Core object instance + core = ov.Core() + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + # load models on device + ov_vision_model = core.compile_model(VISION_MODEL_OV, device.value) + ov_text_encoder = core.compile_model(TEXT_ENCODER_OV, device.value) + ov_text_decoder_with_past = core.compile_model(TEXT_DECODER_OV, device.value) + +.. code:: ipython3 + + from functools import partial + from blip_model import text_decoder_forward + + text_decoder.forward = partial(text_decoder_forward, ov_text_decoder_with_past=ov_text_decoder_with_past) + +The model helper class has two methods for generation: +**generate_answer** - used for visual question answering, +**generate_caption** - used for caption generation. For initialization, +model class accepts compiled OpenVINO models for the text encoder, +vision model and text decoder, and also configuration for generation and +initial token for decoder work. + +.. code:: ipython3 + + from blip_model import OVBlipModel + + ov_model = OVBlipModel(model.config, model.decoder_start_token_id, ov_vision_model, ov_text_encoder, text_decoder) + out = ov_model.generate_answer(**inputs, max_length=20) + +Now, the model is ready for generation. + +Image Captioning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + out = ov_model.generate_caption(inputs["pixel_values"], max_length=20) + caption = processor.decode(out[0], skip_special_tokens=True) + fig = visualize_results(raw_image, caption) + + + +.. image:: 233-blip-convert-with-output_files/233-blip-convert-with-output_25_0.png + + +Question Answering +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + start = time.perf_counter() + out = ov_model.generate_answer(**inputs, max_length=20) + end = time.perf_counter() - start + answer = processor.decode(out[0], skip_special_tokens=True) + fig = visualize_results(raw_image, answer, question) + + + +.. image:: 233-blip-convert-with-output_files/233-blip-convert-with-output_27_0.png + + +.. code:: ipython3 + + print(f"Processing time: {end:.4f}") + + +.. parsed-literal:: + + Processing time: 0.1617 + + +Interactive demo +---------------------------------------------------------- + +.. code:: ipython3 + + import gradio as gr + + + def generate_answer(img, question): + if img is None: + raise gr.Error("Please upload an image or choose one from the examples list") + start = time.perf_counter() + inputs = processor(img, question, return_tensors="pt") + output = ( + ov_model.generate_answer(**inputs, max_length=20) + if len(question) + else ov_model.generate_caption(inputs["pixel_values"], max_length=20) + ) + answer = processor.decode(output[0], skip_special_tokens=True) + elapsed = time.perf_counter() - start + html = f"

Processing time: {elapsed:.4f}

" + return answer, html + + + demo = gr.Interface( + generate_answer, + [ + gr.Image(label="Image"), + gr.Textbox( + label="Question", + info="If this field is empty, an image caption will be generated", + ), + ], + [gr.Text(label="Answer"), gr.HTML()], + examples=[["demo.jpg", ""], ["demo.jpg", question]], + allow_flagging="never" + ) + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + +Next steps +---------------------------------------------------- + +Open the `233-blip-optimize <233-blip-optimize.ipynb>`__ notebook to +quantize vision and text encoder models with the Post-training +Quantization API of NNCF and compress weights of the text decoder. Then +compare the converted and optimized OpenVINO models. diff --git a/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_25_0.png b/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_25_0.png new file mode 100644 index 00000000000000..549f0ae3a89396 --- /dev/null +++ b/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_25_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a856ae0104bb590a299847d1e240ef6ed95045eeb5a3df5d6ea6cb296ef331 +size 206940 diff --git a/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_27_0.png b/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_27_0.png new file mode 100644 index 00000000000000..a5092e34c09426 --- /dev/null +++ b/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_27_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926b7c82f0a4c32a8b4a7d57b927a63b2819491e95c0a9c662009ad5226d9409 +size 210551 diff --git a/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_7_0.png b/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_7_0.png new file mode 100644 index 00000000000000..a5092e34c09426 --- /dev/null +++ b/docs/notebooks/233-blip-convert-with-output_files/233-blip-convert-with-output_7_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926b7c82f0a4c32a8b4a7d57b927a63b2819491e95c0a9c662009ad5226d9409 +size 210551 diff --git a/docs/notebooks/233-blip-convert-with-output_files/index.html b/docs/notebooks/233-blip-convert-with-output_files/index.html new file mode 100644 index 00000000000000..1bbb317e912229 --- /dev/null +++ b/docs/notebooks/233-blip-convert-with-output_files/index.html @@ -0,0 +1,9 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/233-blip-convert-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/233-blip-convert-with-output_files/


../
+233-blip-convert-with-output_25_0.png              31-Oct-2023 00:35              206940
+233-blip-convert-with-output_27_0.png              31-Oct-2023 00:35              210551
+233-blip-convert-with-output_7_0.png               31-Oct-2023 00:35              210551
+

+ diff --git a/docs/notebooks/233-blip-optimize-with-output.rst b/docs/notebooks/233-blip-optimize-with-output.rst new file mode 100644 index 00000000000000..1f38fe4a212663 --- /dev/null +++ b/docs/notebooks/233-blip-optimize-with-output.rst @@ -0,0 +1,447 @@ +Post-Training Quantization and Weights Compression of OpenAI BLIP model with NNCF +================================================================================= + +The goal of this tutorial is to demonstrate how to speed up the model by +applying 8-bit post-training quantization and data free int8 weight +compression from `NNCF `__ +(Neural Network Compression Framework) to OpenVINO IR models and infer +optimized BLIP model via OpenVINO™ Toolkit. The optimization process +contains the following steps: + +1. Download and preprocess dataset for quantization. +2. Quantize the converted vision and text encoder OpenVINO models from + `notebook <233-blip-convert.ipynb>`__ with NNCF. +3. Compress weights of the OpenVINO text decoder model from + `notebook <233-blip-convert.ipynb>`__ with NNCF. +4. Check the model result using the same input data from the + `notebook <233-blip-convert.ipynb>`__. +5. Compare model size of converted and optimized models. +6. Compare performance of converted and optimized models. + +.. + + **NOTE**: you should run + `233-blip-convert <233-blip-convert.ipynb>`__ notebook first to + generate OpenVINO IR models that are used for optimization. + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Quantize <#quantize>`__ + + - `Prepare dataset <#prepare-dataset>`__ + - `Quantize Vision Model <#quantize-vision-model>`__ + - `Quantize Text Encoder <#quantize-text-encoder>`__ + +- `Compress text decoder weights <#compress-weights>`__ +- `Run optimized OpenVINO + model <#run-optimized-openvino-model>`__ + + - `Image Captioning <#image-captioning>`__ + - `Question Answering <#question-answering>`__ + - `Compare file sizes <#compare-file-sizes>`__ + - `Compare inference time of the FP16 and optimized + models <#compare-inference-time-of-the-fp-and-optimized-models>`__ + +Prerequisites +------------------------------------------------------- + +.. code:: ipython3 + + %pip install -q datasets + +.. code:: ipython3 + + from pathlib import Path + + VISION_MODEL_OV = Path("blip_vision_model.xml") + TEXT_ENCODER_OV = Path("blip_text_encoder.xml") + TEXT_DECODER_OV = Path("blip_text_decoder_with_past.xml") + + if not (VISION_MODEL_OV.exists() and TEXT_ENCODER_OV.exists() and TEXT_DECODER_OV.exists()): + raise RuntimeError('This notebook should be run after 233-blip-convert notebook') + +.. code:: ipython3 + + from transformers import BlipProcessor + + processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") + +Quantize +-------------------------------------------------- + +`NNCF `__ enables +post-training quantization by adding the quantization layers into the +model graph and then using a subset of the training dataset to +initialize the parameters of these additional quantization layers. The +framework is designed so that modifications to your original training +code are minor. + +The optimization process contains the following steps: + +1. Create a dataset for quantization. +2. Run ``nncf.quantize`` to get a quantized model from the pre-trained + ``FP16`` model. +3. Serialize the ``INT8`` model using ``openvino.save_model`` function. + +.. + + **NOTE**: Quantization is time and memory consuming operation. + Running quantization code below may take some time. + +Prepare dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `VQAv2 `__ is a dataset containing +open-ended questions about images. These questions require an +understanding of vision, language and commonsense knowledge to answer. + +.. code:: ipython3 + + import numpy as np + from datasets import load_dataset + from tqdm.notebook import tqdm + + def preprocess_batch(batch, vision_model, inputs_info): + """ + Preprocesses a dataset batch by loading and transforming image and text data. + VQAv2 dataset contains multiple questions to image. + To reduce dataset preparation time we will store preprocessed images in `inputs_info`. + """ + image_id = batch["image_id"] + if image_id in inputs_info: + inputs = processor(text=batch['question'], return_tensors="np") + pixel_values = inputs_info[image_id]["pixel_values"] + encoder_hidden_states = inputs_info[image_id]["encoder_hidden_states"] + else: + inputs = processor(images=batch["image"], text=batch["question"], return_tensors="np") + pixel_values = inputs["pixel_values"] + encoder_hidden_states = vision_model(pixel_values)[vision_model.output(0)] + inputs_info[image_id] = { + "pixel_values": pixel_values, + "encoder_hidden_states": encoder_hidden_states, + "text_encoder_inputs": [] + } + + text_encoder_inputs = { + "input_ids": inputs["input_ids"], + "attention_mask": inputs["attention_mask"] + } + inputs_info[image_id]["text_encoder_inputs"].append(text_encoder_inputs) + + + def prepare_input_data(dataloader, vision_model, opt_init_steps): + """ + Store calibration subset in List to reduce quantization time. + """ + inputs_info = {} + for batch in tqdm(dataloader, total=opt_init_steps, desc="Prepare calibration data"): + preprocess_batch(batch, vision_model, inputs_info) + + calibration_subset = [] + for image_id in inputs_info: + pixel_values = inputs_info[image_id]["pixel_values"] + encoder_hidden_states = inputs_info[image_id]["encoder_hidden_states"] + encoder_attention_mask = np.ones(encoder_hidden_states.shape[:-1], dtype=int) + for text_encoder_inputs in inputs_info[image_id]["text_encoder_inputs"]: + text_encoder_inputs["encoder_hidden_states"] = encoder_hidden_states + text_encoder_inputs["encoder_attention_mask"] = encoder_attention_mask + blip_inputs = { + "vision_model_inputs": {"pixel_values": pixel_values}, + "text_encoder_inputs": text_encoder_inputs, + } + calibration_subset.append(blip_inputs) + return calibration_subset + + + def prepare_dataset(vision_model, opt_init_steps=300, streaming=True): + """ + Prepares a vision-text dataset for quantization. + """ + dataset = load_dataset("HuggingFaceM4/VQAv2", split="train", streaming=streaming) + train_dataset = dataset.shuffle(seed=42).take(opt_init_steps) + calibration_subset = prepare_input_data(train_dataset, vision_model, opt_init_steps) + return calibration_subset + +Loading and processing the dataset in streaming mode may take a long +time and depends on your internet connection. + +.. code:: ipython3 + + import nncf + import openvino as ov + + comp_vision_model = ov.compile_model(VISION_MODEL_OV) + calibration_data = prepare_dataset(comp_vision_model) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + Repo card metadata block was not found. Setting CardData to empty. + + + +.. parsed-literal:: + + Prepare calibration data: 0%| | 0/300 [00:00`__. + +.. code:: ipython3 + + q_ov_vision_model = ov.compile_model(VISION_MODEL_OV_INT8) + q_ov_text_encoder = ov.compile_model(TEXT_ENCODER_OV_INT8) + q_ov_text_decoder_with_past = ov.compile_model(TEXT_DECODER_OV_INT8) + +.. code:: ipython3 + + from functools import partial + from transformers import BlipForQuestionAnswering + from blip_model import OVBlipModel, text_decoder_forward + + model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") + text_decoder = model.text_decoder + text_decoder.eval() + + text_decoder.forward = partial(text_decoder_forward, ov_text_decoder_with_past=q_ov_text_decoder_with_past) + int8_model = OVBlipModel(model.config, model.decoder_start_token_id, q_ov_vision_model, q_ov_text_encoder, text_decoder) + +.. code:: ipython3 + + from PIL import Image + + raw_image = Image.open("demo.jpg").convert('RGB') + question = "how many dogs are in the picture?" + # preprocess input data + inputs = processor(raw_image, question, return_tensors="pt") + +Image Captioning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + from utils import visualize_results + + out = int8_model.generate_caption(inputs["pixel_values"], max_length=20) + caption = processor.decode(out[0], skip_special_tokens=True) + fig = visualize_results(raw_image, caption) + + + +.. image:: 233-blip-optimize-with-output_files/233-blip-optimize-with-output_23_0.png + + +Question Answering +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + out = int8_model.generate_answer(**inputs, max_length=20) + answer = processor.decode(out[0], skip_special_tokens=True) + fig = visualize_results(raw_image, answer, question) + + + +.. image:: 233-blip-optimize-with-output_files/233-blip-optimize-with-output_25_0.png + + +Compare file sizes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + def calculate_compression_rate(ov_model_path): + fp16_ir_model_size = Path(ov_model_path).with_suffix(".bin").stat().st_size / 1024 + int8_model_path = str(ov_model_path).replace(".xml", "_int8.xml") + quantized_model_size = Path(int8_model_path).with_suffix(".bin").stat().st_size / 1024 + print(f'{ov_model_path.as_posix().split(".")[0]}') + print(f" * FP16 IR model size: {fp16_ir_model_size:.2f} KB") + print(f" * INT8 model size: {quantized_model_size:.2f} KB") + print(f" * Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") + +.. code:: ipython3 + + for model_path in [VISION_MODEL_OV, TEXT_ENCODER_OV, TEXT_DECODER_OV]: + calculate_compression_rate(model_path) + + +.. parsed-literal:: + + blip_vision_model + * FP16 IR model size: 168145.68 KB + * INT8 model size: 84915.75 KB + * Model compression rate: 1.980 + blip_text_encoder + * FP16 IR model size: 268087.17 KB + * INT8 model size: 134677.23 KB + * Model compression rate: 1.991 + blip_text_decoder_with_past + * FP16 IR model size: 269303.42 KB + * INT8 model size: 135450.65 KB + * Model compression rate: 1.988 + + +Compare inference time of the FP16 and optimized models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To measure the inference performance of the ``FP16`` and ``INT8`` +models, we use median inference time on 100 samples of the calibration +dataset. So we can approximately estimate the speed up of the dynamic +quantized models. + + **NOTE**: For the most accurate performance estimation, it is + recommended to run ``benchmark_app`` in a terminal/command prompt + after closing other applications with static shapes. + +.. code:: ipython3 + + import time + import torch + + def calculate_inference_time(blip_model, calibration_data, generate_caption): + inference_time = [] + for inputs in calibration_data: + pixel_values = torch.from_numpy(inputs["vision_model_inputs"]["pixel_values"]) + input_ids = torch.from_numpy(inputs["text_encoder_inputs"]["input_ids"]) + attention_mask = torch.from_numpy(inputs["text_encoder_inputs"]["attention_mask"]) + + start = time.perf_counter() + if generate_caption: + _ = blip_model.generate_caption(pixel_values, max_length=20) + else: + _ = blip_model.generate_answer(pixel_values=pixel_values, input_ids=input_ids, attention_mask=attention_mask, max_length=20) + end = time.perf_counter() + delta = end - start + inference_time.append(delta) + return np.median(inference_time) + +.. code:: ipython3 + + fp_original_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") + fp_text_decoder = fp_original_model.text_decoder + fp_text_decoder.eval() + + comp_text_encoder = ov.compile_model(TEXT_ENCODER_OV) + comp_text_decoder_with_past = ov.compile_model(TEXT_DECODER_OV) + fp_text_decoder.forward = partial(text_decoder_forward, ov_text_decoder_with_past=comp_text_decoder_with_past) + fp16_model = OVBlipModel(model.config, model.decoder_start_token_id, comp_vision_model, comp_text_encoder, fp_text_decoder) + +.. code:: ipython3 + + validation_data = calibration_data[:100] + + int8_caption_latency = calculate_inference_time(int8_model, validation_data, generate_caption=True) + fp16_caption_latency = calculate_inference_time(fp16_model, validation_data, generate_caption=True) + + print(f"Image Captioning speed up: {fp16_caption_latency / int8_caption_latency:.3f}") + +.. code:: ipython3 + + int8_generate_answer_latency = calculate_inference_time(int8_model, validation_data, generate_caption=False) + fp16_generate_answer_latency = calculate_inference_time(fp16_model, validation_data, generate_caption=False) + print(f"Question Answering speed up: {fp16_generate_answer_latency / int8_generate_answer_latency:.3f}") diff --git a/docs/notebooks/233-blip-optimize-with-output_files/233-blip-optimize-with-output_23_0.png b/docs/notebooks/233-blip-optimize-with-output_files/233-blip-optimize-with-output_23_0.png new file mode 100644 index 00000000000000..a922eb824d2f34 --- /dev/null +++ b/docs/notebooks/233-blip-optimize-with-output_files/233-blip-optimize-with-output_23_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72633b087de144c1c2546856b4d1889785babc0e953bd9a7baac667cd61409d6 +size 206216 diff --git a/docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_30_0.png b/docs/notebooks/233-blip-optimize-with-output_files/233-blip-optimize-with-output_25_0.png similarity index 100% rename from docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_30_0.png rename to docs/notebooks/233-blip-optimize-with-output_files/233-blip-optimize-with-output_25_0.png diff --git a/docs/notebooks/233-blip-optimize-with-output_files/index.html b/docs/notebooks/233-blip-optimize-with-output_files/index.html new file mode 100644 index 00000000000000..251fe888dd3f38 --- /dev/null +++ b/docs/notebooks/233-blip-optimize-with-output_files/index.html @@ -0,0 +1,8 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/233-blip-optimize-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/233-blip-optimize-with-output_files/


../
+233-blip-optimize-with-output_23_0.png             31-Oct-2023 00:35              206216
+233-blip-optimize-with-output_25_0.png             31-Oct-2023 00:35              210551
+

+ diff --git a/docs/notebooks/233-blip-visual-language-processing-with-output.rst b/docs/notebooks/233-blip-visual-language-processing-with-output.rst deleted file mode 100644 index 8468422b451f40..00000000000000 --- a/docs/notebooks/233-blip-visual-language-processing-with-output.rst +++ /dev/null @@ -1,943 +0,0 @@ -Visual Question Answering and Image Captioning using BLIP and OpenVINO -====================================================================== - - - -Humans perceive the world through vision and language. A longtime goal -of AI is to build intelligent agents that can understand the world -through vision and language inputs to communicate with humans through -natural language. In order to achieve this goal, vision-language -pre-training has emerged as an effective approach, where deep neural -network models are pre-trained on large scale image-text datasets to -improve performance on downstream vision-language tasks, such as -image-text retrieval, image captioning, and visual question answering. - -`BLIP `__ is a language-image -pre-training framework for unified vision-language understanding and -generation. BLIP achieves state-of-the-art results on a wide range of -vision-language tasks. This tutorial demonstrates how to use BLIP for -visual question answering and image captioning. - -The tutorial consists of the following parts: - -1. Instantiate a BLIP model. -2. Convert the BLIP model to OpenVINO IR. -3. Run visual question answering and image captioning with OpenVINO. - -.. _top: - -**Table of contents**: - -- `Background <#background>`__ - - - `Image Captioning <#image-captioning>`__ - - `Visual Question Answering <#visual-question-answering>`__ - -- `Instantiate Model <#instantiate-model>`__ -- `Convert Models to OpenVINO IR <#convert-models-to-openvino-ir>`__ - - - `Vision Model <#vision-model>`__ - - `Text Encoder <#text-encoder>`__ - - `Text Decoder <#text-decoder>`__ - -- `Run OpenVINO Model <#run-openvino-model>`__ - - - `Prepare Inference Pipeline <#prepare-inference-pipeline>`__ - - `Select inference device <#select-inference-device>`__ - - `Image Captioning <#image-captioning>`__ - - `Question Answering <#question-answering>`__ - -Background `⇑ <#top>`__ -############################################################################################################################### - - -Visual language processing is a branch of artificial intelligence that -focuses on creating algorithms designed to enable computers to more -accurately understand images and their content. - -Popular tasks include: - -- **Text to Image Retrieval** - a semantic task that aims to find the - most relevant image for a given text description. -- **Image Captioning** - a semantic task that aims to provide a text - description for image content. -- **Visual Question Answering** - a semantic task that aims to answer - questions based on image content. - -As shown in the diagram below, these three tasks differ in the input -provided to the AI system. For text-to-image retrieval, you have a -predefined gallery of images for search and a user-requested text -description (query). Image captioning can be represented as a particular -case of visual question answering, where you have a predefined question -“What is in the picture?” and various images provided by a user. For -visual question answering, both the text-based question and image -context are variables requested by a user. - -|image0| - -This notebook does not focus on Text to Image retrieval. Instead, it -considers Image Captioning and Visual Question Answering. - -Image Captioning `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -Image Captioning is the task of describing the content of an image in -words. This task lies at the intersection of computer vision and natural -language processing. Most image captioning systems use an -encoder-decoder framework, where an input image is encoded into an -intermediate representation of the information in the image, and then -decoded into a descriptive text sequence. - -|image1| - -Visual Question Answering `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Visual Question Answering (VQA) is the task of answering text-based questions about image content. - -|image2| - -For a better understanding of how VQA works, let us consider a -traditional NLP task like Question Answering, which aims to retrieve the -answer to a question from a given text input. Typically, a question -answering pipeline consists of three steps: - -|image3| - -1. Question analysis - analysis of provided question in natural language - form to understand the object in the question and additional context. - For example, if you have a question like “How many bridges in - Paris?”, question words *“how many”* gives a hint that the answer is - more likely to be a number, *“bridges”* is the target object of the - question and *" in Paris"* serves as additional context for the - search. -2. Build query for search - use analyzed results to formalize query for - finding the most relevant information. -3. Perform a search in the knowledge base - send the query to a - knowledge base, typically provided text documents or databases serve - as a source of knowledge. - -|image4| - -The difference between text-based question answering and visual question -answering is that an image is used as context and the knowledge base. - -|image5| - -Answering arbitrary questions about images is a complex problem because -it requires involving a lot of computer vision sub-tasks. In the table -below, you can find an example of questions and the required computer -vision skills to find answers. - -+-----------------------------+----------------------------------------+ -| Computer vision task | Question examples | -+=============================+========================================+ -| Object recognition | What is shown in the picture? What is | -| | it? | -+-----------------------------+----------------------------------------+ -| Object detection | Is there any object (dog, man, book) | -| | in the image? Where is … located? | -+-----------------------------+----------------------------------------+ -| Object and image attribute | What color is an umbrella? Does this | -| recognition | man wear glasses? Is there color in | -| | the image? | -+-----------------------------+----------------------------------------+ -| Scene recognition | Is it rainy? What celebration is | -| | pictured? | -+-----------------------------+----------------------------------------+ -| Object counting | How many players are there on the | -| | football field? How many steps are | -| | there on the stairs? | -+-----------------------------+----------------------------------------+ -| Activity recognition | Is the baby crying? What is the woman | -| | cooking? What are they doing? | -+-----------------------------+----------------------------------------+ -| Spatial relationships among | What is located between the sofa and | -| objects | the armchair? What is in the bottom | -| | left corner? | -+-----------------------------+----------------------------------------+ -| Commonsense reasoning | Does she have 100% vision? Does this | -| | person have children? | -+-----------------------------+----------------------------------------+ -| Knowledge-based reasoning | Is it a vegetarian pizza? | -+-----------------------------+----------------------------------------+ -| Text recognition | What is the title of the book? What is | -| | shown on the screen? | -+-----------------------------+----------------------------------------+ - -There are a lot of applications for visual question answering: - -- Aid Visually Impaired Persons: VQA models can be used to reduce - barriers for visually impaired people by helping them get information - about images from the web and the real world. -- Education: VQA models can be used to improve visitor experiences at - museums by enabling observers to directly ask questions they are - interested in or to bring more interactivity to schoolbooks for - children interested in acquiring specific knowledge. -- E-commerce: VQA models can retrieve information about products using - photos from online stores. -- Independent expert assessment: VQA models can be provide objective - assessments in sports competitions, medical diagnosis, and forensic - examination. - -.. |image0| image:: https://user-images.githubusercontent.com/29454499/221755717-a5b51b7e-523c-461f-b30c-4edbfaf9a134.png -.. |image1| image:: https://user-images.githubusercontent.com/29454499/221640847-1868117c-aac0-4806-99a4-34f218e98bb8.png -.. |image2| image:: https://user-images.githubusercontent.com/29454499/221641984-3c6d8b2f-dd0d-4302-a4d8-0f8564fca772.png -.. |image3| image:: https://user-images.githubusercontent.com/29454499/221760881-378f1ea8-eadc-4610-aff0-69ecabf62fff.png -.. |image4| image:: https://user-images.githubusercontent.com/29454499/222094861-3cafdf9f-d700-4741-b6c5-fb09c1a4da9a.png -.. |image5| image:: https://user-images.githubusercontent.com/29454499/222095118-3d5826e4-2662-4d1c-abf2-a515f23d6d6a.png - -Instantiate Model `⇑ <#top>`__ -############################################################################################################################### - - -The BLIP model was proposed in the `BLIP: Bootstrapping Language-Image -Pre-training for Unified Vision-Language Understanding and -Generation `__ paper. - -.. figure:: https://github.com/salesforce/BLIP/raw/main/BLIP.gif - :alt: blip.gif - - blip.gif - -To pre-train a unified vision-language model with both understanding and -generation capabilities, BLIP introduces a multimodal mixture of an -encoder-decoder and a multi-task model which can operate in one of the -three modes: - -- **Unimodal encoders**, which separately encode images and text. The - image encoder is a vision transformer. The text encoder is the same - as BERT. -- **Image-grounded text encoder**, which injects visual information by - inserting a cross-attention layer between the self-attention layer - and the feed-forward network for each transformer block of the text - encoder. -- **Image-grounded text decoder**, which replaces the bi-directional - self-attention layers in the text encoder with causal self-attention - layers. - -More details about the model can be found in the `research -paper `__, `Salesforce -blog `__, -`GitHub repo `__ and `Hugging Face -model -documentation `__. - -In this tutorial, you will use the -`blip-vqa-base `__ -model available for download from `Hugging -Face `__. The same actions are also applicable -to other similar models from the BLIP family. Although this model class -is designed to perform question answering, its components can also be -reused for image captioning. - -To start working with the model, you need to instantiate the -``BlipForQuestionAnswering`` class, using ``from_pretrained`` method. -``BlipProcessor`` is a helper class for preparing input data for both -text and vision modalities and postprocessing of generation results. - -.. code:: ipython3 - - !pip install "transformers >= 4.26.0" - - -.. parsed-literal:: - - Requirement already satisfied: transformers>=4.26.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.31.0) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (3.12.2) - Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (0.16.4) - Requirement already satisfied: numpy>=1.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (1.23.5) - Requirement already satisfied: packaging>=20.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (23.1) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (6.0.1) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (2023.8.8) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (2.31.0) - Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (0.13.3) - Requirement already satisfied: safetensors>=0.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (0.3.2) - Requirement already satisfied: tqdm>=4.27 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers>=4.26.0) (4.66.1) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.26.0) (2023.6.0) - Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.26.0) (4.7.1) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers>=4.26.0) (3.2.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers>=4.26.0) (3.4) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers>=4.26.0) (1.26.16) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers>=4.26.0) (2023.7.22) - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - - -.. code:: ipython3 - - import sys - import time - from PIL import Image - from transformers import BlipProcessor, BlipForQuestionAnswering - - sys.path.append("../utils") - from notebook_utils import download_file - - # get model and processor - processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") - model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") - - # setup test input: download and read image, prepare question - img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' - download_file(img_url, "demo.jpg") - raw_image = Image.open("demo.jpg").convert('RGB') - question = "how many dogs are in the picture?" - # preprocess input data - inputs = processor(raw_image, question, return_tensors="pt") - - start = time.perf_counter() - # perform generation - out = model.generate(**inputs) - end = time.perf_counter() - start - - # postprocess result - answer = processor.decode(out[0], skip_special_tokens=True) - - -.. parsed-literal:: - - 2023-08-15 23:34:17.871379: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-15 23:34:17.904962: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-15 23:34:18.440790: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - - -.. parsed-literal:: - - demo.jpg: 0%| | 0.00/485k [00:00`__ -############################################################################################################################### - - -OpenVINO supports PyTorch through export to the ONNX format. You will -use the ``torch.onnx.export`` function for obtaining ONNX model. For -more information, refer to to the `PyTorch -documentation `__. You need -to provide a model object, input data for model tracing, and a path for -saving the model. Optionally, you can provide a target onnx opset for -conversion and other parameters specified in the documentation (for -example, input and output names or dynamic shapes). - -While ONNX models are directly supported by OpenVINO™ runtime, it can be -useful to convert them to OpenVINO Intermediate Representation (IR) -format to take the advantage of advanced OpenVINO optimization tools and -features. You will use model conversion API to convert the model to IR -format and compress weights to ``FP16`` format. - -The model consists of three parts: - -- vision_model - an encoder for image representation. -- text_encoder - an encoder for input query, used for question - answering and text-to-image retrieval only. -- text_decoder - a decoder for output answer. - -To be able to perform multiple tasks, using the same model components, -you should convert each part independently. - -Vision Model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -The vision model accepts float input tensors with the [1,3,384,384] -shape, containing RGB image pixel values normalized in the [0,1] range. - -.. code:: ipython3 - - import torch - from pathlib import Path - from openvino.tools import mo - from openvino.runtime import Core, serialize - - VISION_MODEL_OV = Path("blip_vision_model.xml") - VISION_MODEL_ONNX = VISION_MODEL_OV.with_suffix(".onnx") - vision_model = model.vision_model - vision_model.eval() - - # check that model works and save it outputs for reusage as text encoder input - with torch.no_grad(): - vision_outputs = vision_model(inputs["pixel_values"]) - - # if openvino model does not exist, convert it to onnx and then to IR - if not VISION_MODEL_OV.exists(): - - # export pytorch model to ONNX - if not VISION_MODEL_ONNX.exists(): - with torch.no_grad(): - torch.onnx.export(vision_model, inputs["pixel_values"], VISION_MODEL_ONNX, input_names=["pixel_values"]) - # convert ONNX model to IR using model conversion Python API, use compress_to_fp16=True for compressing model weights to FP16 precision - ov_vision_model = mo.convert_model(VISION_MODEL_ONNX, compress_to_fp16=True) - # save model on disk for next usages - serialize(ov_vision_model, str(VISION_MODEL_OV)) - print(f"Vision model successfuly converted and saved to {VISION_MODEL_OV}") - else: - print(f"Vision model will be loaded from {VISION_MODEL_OV}") - - -.. parsed-literal:: - - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - Vision model successfuly converted and saved to blip_vision_model.xml - - -Text Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -The text encoder is used by visual question answering tasks to build a -question embedding representation. It takes ``input_ids`` with a -tokenized question and output image embeddings obtained from the vision -model and attention masks for them. - -The number of tokens after tokenizing input can be different depending -on the question text. You should preserve dynamic shapes for model -inputs working with tokens, the ``dynamic_axes`` parameter is -responsible for preserving dynamic specific dimensions of inputs in -``torch.onnx.export``. For consistency in mapping between dynamic axes -and inputs, the ``input_names`` is parameter provided. For more -information about how these export parameters work, see the `PyTorch -tutorial `__ - -.. code:: ipython3 - - TEXT_ENCODER_OV = Path("blip_text_encoder.xml") - TEXT_ENCODER_ONNX = TEXT_ENCODER_OV.with_suffix(".onnx") - - text_encoder = model.text_encoder - text_encoder.eval() - - # if openvino model does not exist, convert it to onnx and then to IR - if not TEXT_ENCODER_OV.exists(): - if not TEXT_ENCODER_ONNX.exists(): - # prepare example inputs for ONNX export - image_embeds = vision_outputs[0] - image_attention_mask = torch.ones(image_embeds.size()[:-1], dtype=torch.long) - input_dict = {"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "encoder_hidden_states": image_embeds, "encoder_attention_mask": image_attention_mask} - # specify variable length axes - dynamic_axes = {"input_ids": {1: "seq_len"}, "attention_mask": {1: "seq_len"}} - # export PyTorch model to ONNX - with torch.no_grad(): - torch.onnx.export(text_encoder, input_dict, TEXT_ENCODER_ONNX, input_names=list(input_dict), dynamic_axes=dynamic_axes) - # convert ONNX model to IR using model conversion Python API, use compress_to_fp16=True for compressing model weights to FP16 precision - ov_text_encoder = mo.convert_model(TEXT_ENCODER_ONNX, compress_to_fp16=True) - # save model on disk for next usages - serialize(ov_text_encoder, str(TEXT_ENCODER_OV)) - print(f"Text encoder successfuly converted and saved to {TEXT_ENCODER_OV}") - else: - print(f"Text encoder will be loaded from {TEXT_ENCODER_OV}") - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/blip/modeling_blip_text.py:712: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if is_decoder: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/blip/modeling_blip_text.py:631: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if is_decoder: - - -.. parsed-literal:: - - Text encoder successfuly converted and saved to blip_text_encoder.xml - - -Text Decoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -The text decoder is responsible for generating the sequence of tokens to -represent model output (answer to question or caption), using an image -(and question, if required) representation. The generation approach is -based on the assumption that the probability distribution of a word -sequence can be decomposed into the product of conditional next word -distributions. In other words, model predicts the next token in the loop -guided by previously generated tokens until the stop-condition will be -not reached (generated sequence of maximum length or end of string token -obtained). The way the next token will be selected over predicted -probabilities is driven by the selected decoding methodology. You can -find more information about the most popular decoding methods in this -`blog `__. The entry point -for the generation process for models from the Hugging Face Transformers -library is the ``generate`` method. You can find more information about -its parameters and configuration in -the\ `documentation `__. -To preserve flexibility in the selection decoding methodology, you will -convert only model inference for one step. - -To optimize the generation process and use memory more efficiently, the -``use_cache=True`` option is enabled. Since the output side is -auto-regressive, an output token hidden state remains the same once -computed for every further generation step. Therefore, recomputing it -every time you want to generate a new token seems wasteful. With the -cache, the model saves the hidden state once it has been computed. The -model only computes the one for the most recently generated output token -at each time step, re-using the saved ones for hidden tokens. This -reduces the generation complexity from O(n^3) to O(n^2) for a -transformer model. More details about how it works can be found in this -`article `__. -With this option, the model gets the previous step’s hidden states as -input and additionally provides hidden states for the current step as -output. Initially, you have no previous step hidden states, so the first -step does not require you to provide them. ONNX export prevents a -variable number of inputs in the model, which means that you should -handle the first step as a separate model. ``blip_text_decoder`` will be -used for the first step generation, and ``blip_text_decoder_with_past`` -for the next steps. The first step model has hidden state representation -outputs. In PyTorch, they are represented as a list of pairs (hidden -state for key, hidden state for value] for each transformer layer in the -model. ONNX model does not support nested outputs, they will be -flattened. For preserving corresponding between hidden state keys and -layers ``output_names`` parameter for ONNX export. - -Similar to ``text_encoder``, ``text_decoder`` can work with input -sequences of different lengths and requires preserving dynamic input -shapes. - -.. code:: ipython3 - - text_decoder = model.text_decoder - text_decoder.eval() - - TEXT_DECODER_OV = Path("blip_text_decoder.xml") - TEXT_DECODER_ONNX = TEXT_DECODER_OV.with_suffix(".onnx") - - # prepare example inputs for ONNX export - input_ids = torch.tensor([[30522]]) # begin of sequence token id - attention_mask = torch.tensor([[1]]) # attention mask for input_ids - encoder_hidden_states = torch.rand((1, 10, 768)) # encoder last hidden state from text_encoder - encoder_attention_mask = torch.ones((1, 10), dtype=torch.long) # attention mask for encoder hidden states - - input_dict = {"input_ids": input_ids, "attention_mask": attention_mask, "encoder_hidden_states": encoder_hidden_states, "encoder_attention_mask": encoder_attention_mask} - # specify variable length axes - dynamic_axes = {"input_ids": {1: "seq_len"}, "attention_mask": {1: "seq_len"}, "encoder_hidden_states": {1: "enc_seq_len"}, "encoder_attention_mask": {1: "enc_seq_len"}} - - # specify output names, logits is main output of model - output_names = ["logits"] - - # past key values outputs are output for caching model hidden state - past_key_values_outs = [] - text_decoder_outs = text_decoder(**input_dict) - for idx, _ in enumerate(text_decoder_outs["past_key_values"]): - past_key_values_outs.extend([f"out_past_key_value.{idx}.key", f"out_past_key_value.{idx}.value"]) - - # if openvino model does not exist, convert it to onnx and then to IR - if not TEXT_DECODER_OV.exists(): - # export PyTorch model to ONNX - if not TEXT_DECODER_ONNX.exists(): - with torch.no_grad(): - torch.onnx.export(text_decoder, input_dict, TEXT_DECODER_ONNX, input_names=list(input_dict), output_names=output_names + past_key_values_outs, dynamic_axes=dynamic_axes) - # convert ONNX model to IR using model conversion Python API, use compress_to_fp16=True for compressing model weights to FP16 precision - ov_text_decoder = mo.convert_model(TEXT_DECODER_ONNX, compress_to_fp16=True) - # save model on disk for next usages - serialize(ov_text_decoder, str(TEXT_DECODER_OV)) - print(f"Text decoder successfuly converted and saved to {TEXT_DECODER_OV}") - else: - print(f"Text decoder will be loaded from {TEXT_DECODER_OV}") - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/blip/modeling_blip_text.py:640: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if causal_mask.shape[1] < attention_mask.shape[1]: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/blip/modeling_blip_text.py:888: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if return_logits: - - -.. parsed-literal:: - - Text decoder successfuly converted and saved to blip_text_decoder.xml - - -For the text decoder in the following steps, there are also additional -inputs for hidden states from the previous step. Similar to the outputs, -they will be flattened after the model is exported to ONNX format. You -need to update ``dynamic_axes`` and ``input_names`` with new input -layers. - -.. code:: ipython3 - - # extend input dictionary with hidden states from previous step - input_dict_with_past = {**input_dict, "past_key_values": text_decoder_outs["past_key_values"]} - - # provide names for past_key_value inputs in ONNX model - past_inputs = [k.replace("out_", "in_") for k in past_key_values_outs] - - # extend input names list and dynamic axes with new inputs - input_names_with_past = list(input_dict) + past_inputs - dynamic_axes_with_past = {**dynamic_axes} - for k in past_inputs: - dynamic_axes_with_past[k] = {2: "prev_seq_len"} - - TEXT_DECODER_WITH_PAST_OV = Path("blip_text_decoder_with_past.xml") - TEXT_DECODER_WITH_PAST_ONNX = TEXT_DECODER_WITH_PAST_OV.with_suffix(".onnx") - - # if openvino model does not exist, convert it to onnx and then to IR - if not TEXT_DECODER_WITH_PAST_OV.exists(): - # export PyTorch model to ONNX - if not TEXT_DECODER_WITH_PAST_ONNX.exists(): - with torch.no_grad(): - torch.onnx.export(text_decoder, input_dict_with_past, TEXT_DECODER_WITH_PAST_ONNX, input_names=input_names_with_past, output_names=output_names + past_key_values_outs, dynamic_axes=dynamic_axes_with_past) - # convert ONNX model to IR using model conversion Python API, use compress_to_fp16=True for compressing model weights to FP16 precision - ov_text_decoder = mo.convert_model(TEXT_DECODER_WITH_PAST_ONNX, compress_to_fp16=True) - # save model on disk for next usages - serialize(ov_text_decoder, str(TEXT_DECODER_WITH_PAST_OV)) - print(f"Text decoder with past successfuly converted and saved to {TEXT_DECODER_WITH_PAST_OV}") - else: - print(f"Text decoder with past will be loaded from {TEXT_DECODER_WITH_PAST_OV}") - - -.. parsed-literal:: - - Text decoder with past successfuly converted and saved to blip_text_decoder_with_past.xml - - -Run OpenVINO Model `⇑ <#top>`__ -############################################################################################################################### - - -Prepare Inference Pipeline `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -As discussed before, the model consists of several blocks which can be -reused for building pipelines for different tasks. In the diagram below, -you can see how image captioning works: - -|image01| - -The visual model accepts the image preprocessed by ``BlipProcessor`` as -input and produces image embeddings, which are directly passed to the -text decoder for generation caption tokens. When generation is finished, -output sequence of tokens is provided to ``BlipProcessor`` for decoding -to text using a tokenizer. - -The pipeline for question answering looks similar, but with additional -question processing. In this case, image embeddings and question -tokenized by ``BlipProcessor`` are provided to the text encoder and then -multimodal question embedding is passed to the text decoder for -performing generation of answers. - -|image02| - -The next step is implementing both pipelines using OpenVINO models. - -.. |image01| image:: https://user-images.githubusercontent.com/29454499/221865836-a56da06e-196d-449c-a5dc-4136da6ab5d5.png -.. |image02| image:: https://user-images.githubusercontent.com/29454499/221868167-d0081add-d9f3-4591-80e7-4753c88c1d0a.png - -.. code:: ipython3 - - # create OpenVINO Core object instance - core = Core() - -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -Select device from dropdown list for running inference using OpenVINO: - -.. code:: ipython3 - - import ipywidgets as widgets - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value='AUTO', - description='Device:', - disabled=False, - ) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - # load models on device - ov_vision_model = core.compile_model(VISION_MODEL_OV, device.value) - ov_text_encoder = core.compile_model(TEXT_ENCODER_OV, device.value) - ov_text_decoder = core.compile_model(TEXT_DECODER_OV, device.value) - ov_text_decoder_with_past = core.compile_model(TEXT_DECODER_WITH_PAST_OV, device.value) - -.. code:: ipython3 - - from typing import List, Tuple, Dict - from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions - - - def prepare_past_inputs(past_key_values:List[Tuple[torch.Tensor, torch.Tensor]]): - """ - Helper function for rearrange input hidden states inputs to OpenVINO model expected format - Parameters: - past_key_values (List[Tuple[torch.Tensor, torch.Tensor]]): list of pairs key, value attention hidden states obtained as model outputs from previous step - Returns: - inputs (Dict[str, torch.Tensor]): dictionary with inputs for model - """ - inputs = {} - for idx, (key, value) in enumerate(past_key_values): - inputs[f"in_past_key_value.{idx}.key"] = key - inputs[f"in_past_key_value.{idx}.value"] = value - return inputs - - - def postprocess_text_decoder_outputs(output:Dict): - """ - Helper function for rearranging model outputs and wrapping to CausalLMOutputWithCrossAttentions - Parameters: - output (Dict): dictionary with model output - Returns - wrapped_outputs (CausalLMOutputWithCrossAttentions): outputs wrapped to CausalLMOutputWithCrossAttentions format - """ - outs = {k.any_name: v for k, v in output.items()} - logits = torch.from_numpy(outs["logits"]) - past_kv = [] - for i in range(0, len(past_key_values_outs), 2): - key = past_key_values_outs[i] - value = key.replace(".key", ".value") - past_kv.append((torch.from_numpy(outs[key]), torch.from_numpy(outs[value]))) - return CausalLMOutputWithCrossAttentions( - loss=None, - logits=logits, - past_key_values=past_kv, - hidden_states=None, - attentions=None, - cross_attentions=None - ) - - - def text_decoder_forward(input_ids:torch.Tensor, attention_mask:torch.Tensor, past_key_values:List[Tuple[torch.Tensor, torch.Tensor]], encoder_hidden_states:torch.Tensor, encoder_attention_mask:torch.Tensor, **kwargs): - """ - Inference function for text_decoder in one generation step - Parameters: - input_ids (torch.Tensor): input token ids - attention_mask (torch.Tensor): attention mask for input token ids - past_key_values (List[Tuple[torch.Tensor, torch.Tensor]]): list of cached decoder hidden states from previous step - encoder_hidden_states (torch.Tensor): encoder (vision or text) hidden states - encoder_attention_mask (torch.Tensor): attnetion mask for encoder hidden states - Returns - model outputs (CausalLMOutputWithCrossAttentions): model prediction wrapped to CausalLMOutputWithCrossAttentions class including predicted logits and hidden states for caching - """ - input_dict = { - "input_ids": input_ids, - "attention_mask": attention_mask, - "encoder_hidden_states": encoder_hidden_states, - "encoder_attention_mask": encoder_attention_mask - } - if past_key_values is None: - outputs = ov_text_decoder(input_dict) - else: - input_dict.update(prepare_past_inputs(past_key_values)) - outputs = ov_text_decoder_with_past(input_dict) - return postprocess_text_decoder_outputs(outputs) - - - text_decoder.forward = text_decoder_forward - - - class OVBlipModel: - """ - Model class for inference BLIP model with OpenVINO - """ - def __init__(self, config, decoder_start_token_id:int, vision_model, text_encoder, text_decoder): - """ - Initialization class parameters - """ - self.vision_model = vision_model - self.vision_model_out = vision_model.output(0) - self.text_encoder = text_encoder - self.text_encoder_out = text_encoder.output(0) - self.text_decoder = text_decoder - self.config = config - self.decoder_start_token_id = decoder_start_token_id - self.decoder_input_ids = config.text_config.bos_token_id - - def generate_answer(self, pixel_values:torch.Tensor, input_ids:torch.Tensor, attention_mask:torch.Tensor, **generate_kwargs): - """ - Visual Question Answering prediction - Parameters: - pixel_values (torch.Tensor): preprocessed image pixel values - input_ids (torch.Tensor): question token ids after tokenization - attention_mask (torch.Tensor): attention mask for question tokens - Retruns: - generation output (torch.Tensor): tensor which represents sequence of generated answer token ids - """ - image_embed = self.vision_model(pixel_values.detach().numpy())[self.vision_model_out] - image_attention_mask = np.ones(image_embed.shape[:-1], dtype=int) - if isinstance(input_ids, list): - input_ids = torch.LongTensor(input_ids) - question_embeds = self.text_encoder([input_ids.detach().numpy(), attention_mask.detach().numpy(), image_embed, image_attention_mask])[self.text_encoder_out] - question_attention_mask = np.ones(question_embeds.shape[:-1], dtype=int) - - bos_ids = np.full((question_embeds.shape[0], 1), fill_value=self.decoder_start_token_id) - - outputs = self.text_decoder.generate( - input_ids=torch.from_numpy(bos_ids), - eos_token_id=self.config.text_config.sep_token_id, - pad_token_id=self.config.text_config.pad_token_id, - encoder_hidden_states=torch.from_numpy(question_embeds), - encoder_attention_mask=torch.from_numpy(question_attention_mask), - **generate_kwargs, - ) - return outputs - - def generate_caption(self, pixel_values:torch.Tensor, input_ids:torch.Tensor = None, attention_mask:torch.Tensor = None, **generate_kwargs): - """ - Image Captioning prediction - Parameters: - pixel_values (torch.Tensor): preprocessed image pixel values - input_ids (torch.Tensor, *optional*, None): pregenerated caption token ids after tokenization, if provided caption generation continue provided text - attention_mask (torch.Tensor): attention mask for caption tokens, used only if input_ids provided - Retruns: - generation output (torch.Tensor): tensor which represents sequence of generated caption token ids - """ - batch_size = pixel_values.shape[0] - - image_embeds = self.vision_model(pixel_values.detach().numpy())[self.vision_model_out] - - image_attention_mask = torch.ones(image_embeds.shape[:-1], dtype=torch.long) - - if isinstance(input_ids, list): - input_ids = torch.LongTensor(input_ids) - elif input_ids is None: - input_ids = ( - torch.LongTensor([[self.config.text_config.bos_token_id, self.config.text_config.eos_token_id]]) - .repeat(batch_size, 1) - ) - input_ids[:, 0] = self.config.text_config.bos_token_id - attention_mask = attention_mask[:, :-1] if attention_mask is not None else None - - outputs = self.text_decoder.generate( - input_ids=input_ids[:, :-1], - eos_token_id=self.config.text_config.sep_token_id, - pad_token_id=self.config.text_config.pad_token_id, - attention_mask=attention_mask, - encoder_hidden_states=torch.from_numpy(image_embeds), - encoder_attention_mask=image_attention_mask, - **generate_kwargs, - ) - - return outputs - -The model helper class has two methods for generation: -**generate_answer** - used for visual question answering, -**generate_caption** - used for caption generation. For initialization, -model class accepts compiled OpenVINO models for the text encoder, -vision model and text decoder, and also configuration for generation and -initial token for decoder work. - -.. code:: ipython3 - - ov_model = OVBlipModel(model.config, model.decoder_start_token_id, ov_vision_model, ov_text_encoder, text_decoder) - out = ov_model.generate_answer(**inputs, max_length=20) - -Now, the model is ready for generation. - -Image Captioning `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -.. code:: ipython3 - - out = ov_model.generate_caption(inputs["pixel_values"], max_length=20) - caption = processor.decode(out[0], skip_special_tokens=True) - fig = visualize_results(raw_image, caption) - - - -.. image:: 233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_28_0.png - - -Question Answering `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -.. code:: ipython3 - - start = time.perf_counter() - out = ov_model.generate_answer(**inputs, max_length=20) - end = time.perf_counter() - start - answer = processor.decode(out[0], skip_special_tokens=True) - fig = visualize_results(raw_image, answer, question) - - - -.. image:: 233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_30_0.png - - -.. code:: ipython3 - - print(f"Processing time: {end:.4f}") - - -.. parsed-literal:: - - Processing time: 0.1504 - diff --git a/docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_28_0.png b/docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_28_0.png deleted file mode 100644 index 427258f88c26d7..00000000000000 --- a/docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_28_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8be02952eab1479ccdfdcb4f163381bfe020f46763c6bd3007aeb0e5cdb2e92b -size 206940 diff --git a/docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_8_0.png b/docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_8_0.png deleted file mode 100644 index 865bdc41b355b4..00000000000000 --- a/docs/notebooks/233-blip-visual-language-processing-with-output_files/233-blip-visual-language-processing-with-output_8_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79ce734018edfa650785d11570ccfdb1a768d3bcef81c89f6e61042591cd4475 -size 210551 diff --git a/docs/notebooks/233-blip-visual-language-processing-with-output_files/index.html b/docs/notebooks/233-blip-visual-language-processing-with-output_files/index.html deleted file mode 100644 index 10f201080e309c..00000000000000 --- a/docs/notebooks/233-blip-visual-language-processing-with-output_files/index.html +++ /dev/null @@ -1,9 +0,0 @@ - -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/233-blip-visual-language-processing-with-output_files/ - -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/233-blip-visual-language-processing-with-output_files/


../
-233-blip-visual-language-processing-with-output..> 16-Aug-2023 01:31              206940
-233-blip-visual-language-processing-with-output..> 16-Aug-2023 01:31              210551
-233-blip-visual-language-processing-with-output..> 16-Aug-2023 01:31              210551
-

- diff --git a/docs/notebooks/234-encodec-audio-compression-with-output.rst b/docs/notebooks/234-encodec-audio-compression-with-output.rst index cd05bd7302413b..419ccc2cbe16b6 100644 --- a/docs/notebooks/234-encodec-audio-compression-with-output.rst +++ b/docs/notebooks/234-encodec-audio-compression-with-output.rst @@ -1,8 +1,6 @@ Audio compression with EnCodec and OpenVINO =========================================== - - Compression is an important part of the Internet today because it enables people to easily share high-quality photos, listen to audio messages, stream their favorite shows, and so much more. Even when using @@ -28,39 +26,47 @@ and original `repo `__. image.png -.. _top: +**Table of contents:** -**Table of contents**: -- `Prerequisites <#prerequisites>`__ -- `Instantiate audio compression pipeline <#instantiate-audio-compression-pipeline>`__ -- `Explore EnCodec pipeline <#explore-encodec-pipeline>`__ +- `Prerequisites <#prerequisites>`__ +- `Instantiate audio compression + pipeline <#instantiate-audio-compression-pipeline>`__ +- `Explore EnCodec pipeline <#explore-encodec-pipeline>`__ - - `Preprocessing <#preprocessing>`__ - - `Encoding <#encoding>`__ - - `Decompression <#decompression>`__ + - `Preprocessing <#preprocessing>`__ + - `Encoding <#encoding>`__ + - `Decompression <#decompression>`__ -- `Convert model to OpenVINO Intermediate Representation format <#convert-model-to-openvino-intermediate-representation-format>`__ -- `Integrate OpenVINO to EnCodec pipeline <#integrate-openvino-to-encodec-pipeline>`__ +- `Convert model to OpenVINO Intermediate Representation + format <#convert-model-to-openvino-intermediate-representation-format>`__ +- `Integrate OpenVINO to EnCodec + pipeline <#integrate-openvino-to-encodec-pipeline>`__ - - `Select inference device <#select-inference-device>`__ + - `Select inference device <#select-inference-device>`__ -- `Run EnCodec with OpenVINO <#run-encodec-with-openvino>`__ - -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +- `Run EnCodec with OpenVINO <#run-encodec-with-openvino>`__ +Prerequisites +------------------------------------------------------- Install required dependencies: .. code:: ipython3 - !python -W ignore -m pip install -q -r requirements.txt + %pip install -q -r requirements.txt + -Instantiate audio compression pipeline `⇑ <#top>`__ -############################################################################################################################### +.. parsed-literal:: + + DEPRECATION: git+https://\*\*\*\*@github.com/eaidova/encodec#egg=encodec;python_version=="3.7" contains an egg fragment with a non-PEP 508 name pip 25.0 will enforce this behaviour change. A possible replacement is to use the req @ url syntax, and remove the egg fragment. Discussion can be found at https://github.com/pypa/pip/issues/11617 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.\*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. +Instantiate audio compression pipeline +-------------------------------------------------------------------------------- + `Codecs `__, which act as encoders and decoders for streams of data, help empower most of the audio compression people currently use online. Some examples of commonly used @@ -116,9 +122,8 @@ bandwidth. model = EncodecModel.encodec_model_24khz() model.set_target_bandwidth(6.0) -Explore EnCodec pipeline `⇑ <#top>`__ -############################################################################################################################### - +Explore EnCodec pipeline +------------------------------------------------------------------ Let us explore model capabilities on example audio: @@ -168,9 +173,8 @@ Let us explore model capabilities on example audio: .. image:: 234-encodec-audio-compression-with-output_files/234-encodec-audio-compression-with-output_6_2.png -Preprocessing `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Preprocessing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To achieve the best result, audio should have the number of channels and sample rate expected by the model. If audio does not fulfill these @@ -197,9 +201,8 @@ number of channels using the ``convert_audio`` function. wav = convert_audio(wav, sr, model_sr, model_channels) -Encoding `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Encoding +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Audio waveform should be split by chunks and then encoded by Encoder model, then compressed by quantizer for reducing memory. The result of @@ -247,9 +250,8 @@ Let us compare obtained compression result: Great! Now, we see the power of hyper compression. Binary size of a file becomes 60 times smaller and more suitable for sending via network. -Decompression `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Decompression +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After successful sending of the compressed audio, it should be decompressed on the recipient’s side. The decoder model is responsible @@ -297,18 +299,18 @@ audio. Nice! Audio sounds close to original. -Convert model to OpenVINO Intermediate Representation format. `⇑ <#top>`__ -############################################################################################################################### +Convert model to OpenVINO Intermediate Representation format +------------------------------------------------------------------------------------------------------ For best results with OpenVINO, it is recommended to convert the model to OpenVINO IR format. OpenVINO supports PyTorch via ONNX conversion. We will use ``torch.onnx.export`` for exporting the ONNX model from PyTorch. We need to provide initialized model’s instance and example of -inputs for shape inference. We will use ``mo.convert_model`` -functionality to convert the ONNX models. The ``mo.convert_model`` +inputs for shape inference. We will use ``ov.convert_model`` +functionality to convert the ONNX models. The ``ov.convert_model`` Python function returns an OpenVINO model ready to load on the device and start making predictions. We can save it on disk for the next usage -with ``openvino.runtime.serialize``. +with ``ov.save_model``. .. code:: ipython3 @@ -340,41 +342,41 @@ with ``openvino.runtime.serialize``. .. code:: ipython3 - from openvino.tools import mo - from openvino.runtime import Core, serialize + import openvino as ov + - core = Core() + core = ov.Core() OV_ENCODER_PATH = Path("encodec_encoder.xml") if not OV_ENCODER_PATH.exists(): torch.onnx.export(encoder, torch.zeros(1, 1, 480000), "encodec_encoder.onnx") - encoder_ov = mo.convert_model("encodec_encoder.onnx", compress_to_fp16=True) - serialize(encoder_ov, str(OV_ENCODER_PATH)) + encoder_ov = ov.convert_model("encodec_encoder.onnx") + ov.save_model(encoder_ov, OV_ENCODER_PATH) else: encoder_ov = core.read_model(OV_ENCODER_PATH) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:4315: UserWarning: Exporting a model to ONNX with a batch_size other than 1, with a variable length with LSTM can cause an error when running the ONNX model with a different batch size. Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:4315: UserWarning: Exporting a model to ONNX with a batch_size other than 1, with a variable length with LSTM can cause an error when running the ONNX model with a different batch size. Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( @@ -383,25 +385,24 @@ with ``openvino.runtime.serialize``. OV_DECODER_PATH = Path("encodec_decoder.xml") if not OV_DECODER_PATH.exists(): torch.onnx.export(decoder, torch.zeros([1, 8, 1500], dtype=torch.long), "encodec_decoder.onnx", input_names=["codes", "scale"]) - decoder_ov = mo.convert_model("encodec_decoder.onnx", compress_to_fp16=True) - serialize(decoder_ov, str(OV_DECODER_PATH)) + decoder_ov = ov.convert_model("encodec_decoder.onnx") + ov.save_model(decoder_ov, OV_DECODER_PATH) else: decoder_ov = core.read_model(OV_DECODER_PATH) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] -Integrate OpenVINO to EnCodec pipeline `⇑ <#top>`__ -############################################################################################################################### - +Integrate OpenVINO to EnCodec pipeline +-------------------------------------------------------------------------------- The following steps are required for integration of OpenVINO to EnCodec pipeline: @@ -411,11 +412,10 @@ pipeline: 3. Replace the original frame processing functions with OpenVINO based algorithms. -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -476,9 +476,8 @@ Select device from dropdown list for running inference using OpenVINO: model._encode_frame = encode_frame model._decode_frame = decode_frame -Run EnCodec with OpenVINO `⇑ <#top>`__ -############################################################################################################################### - +Run EnCodec with OpenVINO +------------------------------------------------------------------- The process of running encodec with OpenVINO under hood will be the same like with the original PyTorch models. @@ -522,7 +521,7 @@ like with the original PyTorch models. @@ -536,7 +535,6 @@ like with the original PyTorch models. .. code:: ipython3 import gradio as gr - from socket import gethostname, gethostbyname from typing import Tuple import numpy as np @@ -575,13 +573,18 @@ like with the original PyTorch models. examples=['test_24k.wav'] ) - ipaddr = gethostbyname(gethostname()) - demo.launch(server_name=ipaddr) + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ .. parsed-literal:: - Running on local URL: http://10.211.120.12:7860 + Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. @@ -589,5 +592,5 @@ like with the original PyTorch models. .. .. raw:: html -..
+..
diff --git a/docs/notebooks/234-encodec-audio-compression-with-output_files/234-encodec-audio-compression-with-output_38_1.png b/docs/notebooks/234-encodec-audio-compression-with-output_files/234-encodec-audio-compression-with-output_38_1.png index 05361ad49555f9..e87ac388104511 100644 --- a/docs/notebooks/234-encodec-audio-compression-with-output_files/234-encodec-audio-compression-with-output_38_1.png +++ b/docs/notebooks/234-encodec-audio-compression-with-output_files/234-encodec-audio-compression-with-output_38_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5e5d5707edeacba948cc1c3f8fb15bb2397c3bf5e8aaa388fc972f564870d3b -size 44009 +oid sha256:163c03d2e54146fc13d51ca270e2b8601a8545292cf8d2e62394f818ef754548 +size 44358 diff --git a/docs/notebooks/234-encodec-audio-compression-with-output_files/index.html b/docs/notebooks/234-encodec-audio-compression-with-output_files/index.html index a45a6a0a7c70d1..e5b083f2014592 100644 --- a/docs/notebooks/234-encodec-audio-compression-with-output_files/index.html +++ b/docs/notebooks/234-encodec-audio-compression-with-output_files/index.html @@ -1,9 +1,9 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/234-encodec-audio-compression-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/234-encodec-audio-compression-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/234-encodec-audio-compression-with-output_files/


../
-234-encodec-audio-compression-with-output_19_1.png 16-Aug-2023 01:31               44358
-234-encodec-audio-compression-with-output_38_1.png 16-Aug-2023 01:31               44009
-234-encodec-audio-compression-with-output_6_2.png  16-Aug-2023 01:31               45005
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/234-encodec-audio-compression-with-output_files/


../
+234-encodec-audio-compression-with-output_19_1.png 31-Oct-2023 00:35               44358
+234-encodec-audio-compression-with-output_38_1.png 31-Oct-2023 00:35               44358
+234-encodec-audio-compression-with-output_6_2.png  31-Oct-2023 00:35               45005
 

diff --git a/docs/notebooks/235-controlnet-stable-diffusion-with-output.rst b/docs/notebooks/235-controlnet-stable-diffusion-with-output.rst index 471e72ca3d0aea..949b6258bbc7cd 100644 --- a/docs/notebooks/235-controlnet-stable-diffusion-with-output.rst +++ b/docs/notebooks/235-controlnet-stable-diffusion-with-output.rst @@ -1,8 +1,6 @@ Text-to-Image Generation with ControlNet Conditioning ===================================================== - - Diffusion models make a revolution in AI-generated art. This technology enables creation of high-quality images simply by writing a text prompt. Even though this technology gives very promising results, the diffusion @@ -141,54 +139,49 @@ of the target in the image: This tutorial focuses mainly on conditioning by pose. However, the discussed steps are also applicable to other annotation modes. -.. _top: - -**Table of contents**: +**Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Instantiating Generation Pipeline <#instantiating-generation-pipeline>`__ - - `ControlNet in Diffusers library <#controlnet-in-diffusers-library>`__ - - `OpenPose <#openpose>`__ +- `Prerequisites <#prerequisites>`__ +- `Instantiating Generation + Pipeline <#instantiating-generation-pipeline>`__ -- `Convert models to OpenVINO Intermediate representation (IR) format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ + - `ControlNet in Diffusers + library <#controlnet-in-diffusers-library>`__ + - `OpenPose <#openpose>`__ - - `OpenPose conversion <#openpose-conversion>`__ +- `Convert models to OpenVINO Intermediate representation (IR) + format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ -- `Select inference device <#select-inference-device>`__ + - `OpenPose conversion <#openpose-conversion>`__ - - `ControlNet conversion <#controlnet-conversion>`__ - - `UNet conversion <#unet-conversion>`__ - - `Text Encoder <#text-encoder>`__ - - `VAE Decoder conversion <#vae-decoder-conversion>`__ +- `Select inference device <#select-inference-device>`__ -- `Prepare Inference pipeline <#prepare-inference-pipeline>`__ -- `Running Text-to-Image Generation with ControlNet Conditioning and OpenVINO <#running-text-to-image-generation-with-controlnet-conditioning-and-openvino>`__ -- `Select inference device <#select-inference-device>`__ + - `ControlNet conversion <#controlnet-conversion>`__ + - `UNet conversion <#unet-conversion>`__ + - `Text Encoder <#text-encoder>`__ + - `VAE Decoder conversion <#vae-decoder-conversion>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +- `Prepare Inference pipeline <#prepare-inference-pipeline>`__ +- `Running Text-to-Image Generation with ControlNet Conditioning and + OpenVINO <#running-text-to-image-generation-with-controlnet-conditioning-and-openvino>`__ +- `Select inference device for Stable Diffusion + pipeline <#select-inference-device-for-stable-diffusion-pipeline>`__ +Prerequisites +------------------------------------------------------- .. code:: ipython3 - !pip install -q "diffusers==0.14.0" "controlnet-aux>=0.0.6" "gradio>=3.36" - - -.. parsed-literal:: - - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - -Instantiating Generation Pipeline `⇑ <#top>`__ -############################################################################################################################### + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch" "torchvision" + %pip install -q "diffusers>=0.14.0" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" + %pip install -q "openvino>=2023.1.0" +Instantiating Generation Pipeline +--------------------------------------------------------------------------- -ControlNet in Diffusers library `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +ControlNet in Diffusers library +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For working with Stable Diffusion and ControlNet models, we will use Hugging Face `Diffusers `__ @@ -217,16 +210,27 @@ controlnet model and ``stable-diffusion-v1-5``: .. parsed-literal:: - 2023-07-16 15:33:13.040077: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-16 15:33:13.079142: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-08-29 19:05:09.752880: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-08-29 19:05:09.791513: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-16 15:33:13.688517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - `text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden. + 2023-08-29 19:05:10.519110: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + + +.. parsed-literal:: + + Fetching 15 files: 0%| | 0/15 [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +OpenPose +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Annotation is an important part of working with ControlNet. `OpenPose `__ @@ -254,7 +258,7 @@ The code below demonstrates how to instantiate the OpenPose model. .. parsed-literal:: - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/controlnet_aux/mediapipe_face/mediapipe_face_common.py:7: UserWarning: The module 'mediapipe' is not installed. The package will have limited functionality. Please install it using the command: pip install 'mediapipe' + /home/ea/work/ov_venv/lib/python3.8/site-packages/controlnet_aux/mediapipe_face/mediapipe_face_common.py:7: UserWarning: The module 'mediapipe' is not installed. The package will have limited functionality. Please install it using the command: pip install 'mediapipe' warnings.warn( @@ -314,24 +318,14 @@ Now, let us check its result on example image: .. image:: 235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_8_0.png -Convert models to OpenVINO Intermediate representation (IR) format. `⇑ <#top>`__ -############################################################################################################################### - -OpenVINO supports PyTorch through export to the ONNX format. We will use -the ``torch.onnx.export`` function for obtaining the ONNX model, we can -learn more in the `PyTorch -documentation `__. We need to -provide a model object, input data for model tracing, and a path for -saving the model. Optionally, we can provide a target ONNX opset for -conversion and other parameters specified in the documentation (for -example, input and output names or dynamic shapes). +Convert models to OpenVINO Intermediate representation (IR) format +------------------------------------------------------------------------------------------------------------ -While ONNX models are directly supported by OpenVINO™ runtime, it can be -useful to convert them to IR format to take the advantage of advanced -OpenVINO optimization tools and features. We will use `model conversion -API `__ -to convert a model to IR format and compression weights to ``FP16`` -format. +Starting from 2023.0 release, OpenVINO supports PyTorch models +conversion directly. We need to provide a model object, input data for +model tracing to ``ov.convert_model`` function to obtain OpenVINO +``ov.Model`` object instance. Model can be saved on disk for next +deployment using ``ov.save_model`` function. The pipeline consists of five important parts: @@ -344,9 +338,8 @@ The pipeline consists of five important parts: Let us convert each part: -OpenPose conversion `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +OpenPose conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenPose model is represented in the pipeline as a wrapper on the PyTorch model which not only detects poses on an input image but is also @@ -358,15 +351,25 @@ estimation part, which is located inside the wrapper from pathlib import Path import torch + import openvino as ov - OPENPOSE_ONNX_PATH = Path("openpose.onnx") - OPENPOSE_OV_PATH = OPENPOSE_ONNX_PATH.with_suffix(".xml") + OPENPOSE_OV_PATH = Path("openpose.xml") + + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() if not OPENPOSE_OV_PATH.exists(): - if not OPENPOSE_ONNX_PATH.exists(): - torch.onnx.export(pose_estimator.body_estimation.model, torch.zeros([1, 3, 184, 136]), OPENPOSE_ONNX_PATH) - !mo --input_model $OPENPOSE_ONNX_PATH --compress_to_fp16 + with torch.no_grad(): + ov_model = ov.convert_model(pose_estimator.body_estimation.model, example_input=torch.zeros([1, 3, 184, 136]), input=[[1,3,184,136]]) + ov.save_model(ov_model, OPENPOSE_OV_PATH) + del ov_model + cleanup_torchscript_cache() print('OpenPose successfully converted to IR') else: print(f"OpenPose will be loaded from {OPENPOSE_OV_PATH}") @@ -382,10 +385,8 @@ model with the OpenVINO model, using the following code: .. code:: ipython3 - from openvino.runtime import Model, Core from collections import namedtuple - class OpenPoseOVModel: """ Helper wrapper for OpenPose model inference""" def __init__(self, core, model_path, device="AUTO"): @@ -428,13 +429,12 @@ model with the OpenVINO model, using the following code: - core = Core() - -Select inference device `⇑ <#top>`__ -############################################################################################################################### + core = ov.Core() +Select inference device +----------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -454,7 +454,7 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GNA', 'AUTO'), value='AUTO') @@ -475,9 +475,8 @@ Select device from dropdown list for running inference using OpenVINO: Great! As we can see, it works perfectly. -ControlNet conversion `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +ControlNet conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ControlNet model accepts the same inputs like UNet in Stable Diffusion pipeline and additional condition sample - skeleton key points @@ -494,8 +493,8 @@ blocks, which serves additional context for the UNet model. .. code:: ipython3 - from torch.onnx import _export as torch_onnx_export import gc + from functools import partial inputs = { "sample": torch.randn((2, 4, 64, 64)), @@ -504,26 +503,26 @@ blocks, which serves additional context for the UNet model. "controlnet_cond": torch.randn((2,3,512,512)) } + input_info = [(name, ov.PartialShape(inp.shape)) for name, inp in inputs.items()] - CONTROLNET_ONNX_PATH = Path('controlnet-pose.onnx') - CONTROLNET_OV_PATH = CONTROLNET_ONNX_PATH.with_suffix('.xml') + CONTROLNET_OV_PATH = Path('controlnet-pose.xml') controlnet.eval() with torch.no_grad(): down_block_res_samples, mid_block_res_sample = controlnet(**inputs, return_dict=False) - - controlnet_output_names = [f"down_block_res_sample_{i}" for i in range(len(down_block_res_samples))] - controlnet_output_names.append("mid_block_res_sample") - - + if not CONTROLNET_OV_PATH.exists(): - if not CONTROLNET_ONNX_PATH.exists(): - - with torch.no_grad(): - torch_onnx_export(controlnet, inputs, CONTROLNET_ONNX_PATH, input_names=list(inputs), output_names=controlnet_output_names, onnx_shape_inference=False) - !mo --input_model $CONTROLNET_ONNX_PATH --compress_to_fp16 + with torch.no_grad(): + controlnet.forward = partial(controlnet.forward, return_dict=False) + ov_model = ov.convert_model(controlnet, example_input=inputs, input=input_info) + ov.save_model(ov_model, CONTROLNET_OV_PATH) + del ov_model + cleanup_torchscript_cache() print('ControlNet successfully converted to IR') else: print(f"ControlNet will be loaded from {CONTROLNET_OV_PATH}") + + del controlnet + gc.collect() .. parsed-literal:: @@ -531,37 +530,65 @@ blocks, which serves additional context for the UNet model. ControlNet will be loaded from controlnet-pose.xml -UNet conversion `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. parsed-literal:: + + 5531 + + + +UNet conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The process of UNet model conversion remains the same, like for original Stable Diffusion model, but with respect to the new inputs generated by ControlNet. .. code:: ipython3 - UNET_ONNX_PATH = Path('unet_controlnet/unet_controlnet.onnx') - UNET_OV_PATH = UNET_ONNX_PATH.parents[1] / 'unet_controlnet.xml' + UNET_OV_PATH = Path('unet_controlnet.xml') - if not UNET_OV_PATH.exists(): - if not UNET_ONNX_PATH.exists(): - UNET_ONNX_PATH.parent.mkdir(exist_ok=True) - inputs.pop("controlnet_cond", None) - inputs["down_block_additional_residuals"] = down_block_res_samples - inputs["mid_block_additional_residual"] = mid_block_res_sample + dtype_mapping = { + torch.float32: ov.Type.f32, + torch.float64: ov.Type.f64, + torch.int32: ov.Type.i32, + torch.int64: ov.Type.i64 + } - unet = pipe.unet - unet.eval() + def flattenize_inputs(inputs): + flatten_inputs = [] + for input_data in inputs: + if input_data is None: + continue + if isinstance(input_data, (list, tuple)): + flatten_inputs.extend(flattenize_inputs(input_data)) + else: + flatten_inputs.append(input_data) + return flatten_inputs - input_names = ["sample", "timestep", "encoder_hidden_states", *controlnet_output_names] + if not UNET_OV_PATH.exists(): + inputs.pop("controlnet_cond", None) + inputs["down_block_additional_residuals"] = down_block_res_samples + inputs["mid_block_additional_residual"] = mid_block_res_sample - with torch.no_grad(): - torch_onnx_export(unet, inputs, str(UNET_ONNX_PATH), input_names=input_names, output_names=["sample_out"], onnx_shape_inference=False) - del unet + unet = pipe.unet + unet.eval() + + with torch.no_grad(): + ov_model = ov.convert_model(unet, example_input=inputs) + + flatten_inputs = flattenize_inputs(inputs.values()) + for input_data, input_tensor in zip(flatten_inputs, ov_model.inputs): + input_tensor.get_node().set_partial_shape(ov.PartialShape(input_data.shape)) + input_tensor.get_node().set_element_type(dtype_mapping[input_data.dtype]) + ov_model.validate_nodes_and_infer_types() + ov.save_model(ov_model, UNET_OV_PATH) + del ov_model + cleanup_torchscript_cache() + del unet del pipe.unet gc.collect() - !mo --input_model $UNET_ONNX_PATH --compress_to_fp16 print('Unet successfully converted to IR') else: del pipe.unet @@ -571,98 +598,121 @@ ControlNet. .. parsed-literal:: - Unet will be loaded from unet_controlnet.xml + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /home/ea/work/ov_venv/lib/python3.8/site-packages/diffusers/models/unet_2d_condition.py:526: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if any(s % default_overall_up_factor != 0 for s in sample.shape[-2:]): + /home/ea/work/ov_venv/lib/python3.8/site-packages/diffusers/models/resnet.py:185: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert hidden_states.shape[1] == self.channels + /home/ea/work/ov_venv/lib/python3.8/site-packages/diffusers/models/resnet.py:190: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert hidden_states.shape[1] == self.channels + /home/ea/work/ov_venv/lib/python3.8/site-packages/diffusers/models/resnet.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert hidden_states.shape[1] == self.channels + /home/ea/work/ov_venv/lib/python3.8/site-packages/diffusers/models/resnet.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if hidden_states.shape[0] >= 64: + + +.. parsed-literal:: + + Unet successfully converted to IR .. parsed-literal:: - 5513 + 0 -Text Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The text-encoder is responsible for transforming the input prompt, for example, -“a photo of an astronaut riding a horse” into an embedding space that can be -understood by the U-Net. It is usually a simple transformer-based encoder that -maps a sequence of input tokens to a sequence of latent text embeddings. +The text-encoder is responsible for transforming the input prompt, for +example, “a photo of an astronaut riding a horse” into an embedding +space that can be understood by the U-Net. It is usually a simple +transformer-based encoder that maps a sequence of input tokens to a +sequence of latent text embeddings. The input of the text encoder is tensor ``input_ids``, which contains indexes of tokens from text processed by the tokenizer and padded to the maximum length accepted by the model. Model outputs are two tensors: ``last_hidden_state`` - hidden state from the last MultiHeadAttention layer in the model and ``pooler_out`` - pooled output for whole model -hidden states. We will use ``opset_version=14`` because the model -contains the ``triu`` operation, supported in ONNX only starting from -this opset. +hidden states. .. code:: ipython3 - TEXT_ENCODER_ONNX_PATH = Path('text_encoder.onnx') - TEXT_ENCODER_OV_PATH = TEXT_ENCODER_ONNX_PATH.with_suffix('.xml') + TEXT_ENCODER_OV_PATH = Path('text_encoder.xml') - def convert_encoder_onnx(text_encoder:torch.nn.Module, onnx_path:Path): + def convert_encoder(text_encoder:torch.nn.Module, ir_path:Path): """ - Convert Text Encoder model to ONNX. - Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, + Convert Text Encoder model to OpenVINO IR. + Function accepts text encoder model, prepares example inputs for conversion, and convert it to OpenVINO Model Parameters: text_encoder (torch.nn.Module): text_encoder model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model Returns: None """ - if not onnx_path.exists(): + if not ir_path.exists(): input_ids = torch.ones((1, 77), dtype=torch.long) # switch model to inference mode text_encoder.eval() # disable gradients calculation for reducing memory consumption with torch.no_grad(): - # infer model, just to make sure that it works - text_encoder(input_ids) - # export model to ONNX format - torch_onnx_export( + ov_model = ov.convert_model( text_encoder, # model instance - input_ids, # inputs for model tracing - onnx_path, # output file for saving result - input_names=['tokens'], # model input name for onnx representation - output_names=['last_hidden_state', 'pooler_out'], # model output names for onnx representation - opset_version=14, # onnx opset version for export - onnx_shape_inference=False + example_input=input_ids, # inputs for model tracing + input=([1,77],) ) - print('Text Encoder successfully converted to ONNX') + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('Text Encoder successfully converted to IR') if not TEXT_ENCODER_OV_PATH.exists(): - convert_encoder_onnx(pipe.text_encoder, TEXT_ENCODER_ONNX_PATH) - !mo --input_model $TEXT_ENCODER_ONNX_PATH --compress_to_fp16 - print('Text Encoder successfully converted to IR') + convert_encoder(pipe.text_encoder, TEXT_ENCODER_OV_PATH) else: print(f"Text encoder will be loaded from {TEXT_ENCODER_OV_PATH}") - + del pipe.text_encoder gc.collect() .. parsed-literal:: - Text encoder will be loaded from text_encoder.xml + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:286: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:294: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:326: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): + /home/ea/work/ov_venv/lib/python3.8/site-packages/torch/jit/annotations.py:310: UserWarning: TorchScript will treat type annotations of Tensor dtype-specific subtypes as if they are normal Tensors. dtype constraints are not enforced in compilation either. + warnings.warn("TorchScript will treat type annotations of Tensor " +.. parsed-literal:: + Text Encoder successfully converted to IR -.. parsed-literal:: - 0 +.. parsed-literal:: + + 4202 + -VAE Decoder conversion `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +VAE Decoder conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The VAE model has two parts, an encoder, and a decoder. The encoder is used to convert the image into a low-dimensional latent representation, @@ -681,18 +731,17 @@ diffusion .. code:: ipython3 - VAE_DECODER_ONNX_PATH = Path('vae_decoder.onnx') - VAE_DECODER_OV_PATH = VAE_DECODER_ONNX_PATH.with_suffix('.xml') + VAE_DECODER_OV_PATH = Path('vae_decoder.xml') - def convert_vae_decoder_onnx(vae: torch.nn.Module, onnx_path: Path): + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path): """ - Convert VAE model to ONNX, then IR format. + Convert VAE model to IR format. Function accepts pipeline, creates wrapper class for export only necessary for inference part, - prepares example inputs for ONNX conversion via torch.export, + prepares example inputs for convert, Parameters: vae (torch.nn.Module): VAE model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model Returns: None """ @@ -704,33 +753,32 @@ diffusion def forward(self, latents): return self.vae.decode(latents) - if not onnx_path.exists(): + if not ir_path.exists(): vae_decoder = VAEDecoderWrapper(vae) latents = torch.zeros((1, 4, 64, 64)) vae_decoder.eval() with torch.no_grad(): - torch.onnx.export(vae_decoder, latents, onnx_path, input_names=[ - 'latents'], output_names=['sample']) - print('VAE decoder successfully converted to ONNX') + ov_model = ov.convert_model(vae_decoder, example_input=latents, input=[(1,4,64,64),]) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('VAE decoder successfully converted to IR') if not VAE_DECODER_OV_PATH.exists(): - convert_vae_decoder_onnx(pipe.vae, VAE_DECODER_ONNX_PATH) - !mo --input_model $VAE_DECODER_ONNX_PATH --compress_to_fp16 - print('VAE decoder successfully converted to IR') + convert_vae_decoder(pipe.vae, VAE_DECODER_OV_PATH) else: print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") .. parsed-literal:: - VAE decoder will be loaded from vae_decoder.xml - + VAE decoder successfully converted to IR -Prepare Inference pipeline `⇑ <#top>`__ -############################################################################################################################### +Prepare Inference pipeline +-------------------------------------------------------------------- Putting it all together, let us now take a closer look at how the model works in inference by illustrating the logical flow. |detailed workflow| @@ -866,11 +914,11 @@ on OpenVINO. self, tokenizer: CLIPTokenizer, scheduler, - core: Core, - controlnet: Model, - text_encoder: Model, - unet: Model, - vae_decoder: Model, + core: ov.Core, + controlnet: ov.Model, + text_encoder: ov.Model, + unet: ov.Model, + vae_decoder: ov.Model, device:str = "AUTO" ): super().__init__() @@ -880,7 +928,7 @@ on OpenVINO. self.load_models(core, device, controlnet, text_encoder, unet, vae_decoder) self.set_progress_bar_config(disable=True) - def load_models(self, core: Core, device: str, controlnet:Model, text_encoder: Model, unet: Model, vae_decoder: Model): + def load_models(self, core: ov.Core, device: str, controlnet:ov.Model, text_encoder: ov.Model, unet: ov.Model, vae_decoder: ov.Model): """ Function for loading models on device using OpenVINO @@ -1131,13 +1179,6 @@ on OpenVINO. image = np.transpose(image, (0, 2, 3, 1)) return image - -.. parsed-literal:: - - /tmp/ipykernel_1180132/670611772.py:1: FutureWarning: Importing `DiffusionPipeline` or `ImagePipelineOutput` from diffusers.pipeline_utils is deprecated. Please import from diffusers.pipelines.pipeline_utils instead. - from diffusers.pipeline_utils import DiffusionPipeline - - .. code:: ipython3 from transformers import CLIPTokenizer @@ -1184,8 +1225,8 @@ on OpenVINO. fig.savefig("result.png", bbox_inches='tight') return fig -Running Text-to-Image Generation with ControlNet Conditioning and OpenVINO. `⇑ <#top>`__ -############################################################################################################################### +Running Text-to-Image Generation with ControlNet Conditioning and OpenVINO +-------------------------------------------------------------------------------------------------------------------- Now, we are ready to start generation. For improving the generation process, we also introduce an opportunity to provide a @@ -1197,16 +1238,17 @@ this We can keep this field empty if we want to generate image without negative prompting. -Select inference device `⇑ <#top>`__ -############################################################################################################################### - +Select inference device for Stable Diffusion pipeline +----------------------------------------------------------------------------------------------- -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets + core = ov.Core() + device = widgets.Dropdown( options=core.available_devices + ["AUTO"], value='CPU', @@ -1221,7 +1263,7 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') + Dropdown(description='Device:', options=('CPU', 'GNA', 'AUTO'), value='CPU') @@ -1276,13 +1318,4 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - Running on public URL: https://6927b0a05729fd4297.gradio.live - - This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces) - - - -.. raw:: html - -
diff --git a/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_17_0.png b/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_17_0.png index 7af8840ee7ade5..1847a6c402bef0 100644 --- a/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_17_0.png +++ b/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_17_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db732e96aa0954fadfbe1bdd5cddd2131ca83d526ae211d1da75625365b1a482 -size 498463 +oid sha256:93a101bee4378a0dfdea04df02be54e4fd01634bf190a5ec38a8ee1dbe9a046d +size 491302 diff --git a/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_8_0.png b/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_8_0.png index 7af8840ee7ade5..1847a6c402bef0 100644 --- a/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_8_0.png +++ b/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/235-controlnet-stable-diffusion-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db732e96aa0954fadfbe1bdd5cddd2131ca83d526ae211d1da75625365b1a482 -size 498463 +oid sha256:93a101bee4378a0dfdea04df02be54e4fd01634bf190a5ec38a8ee1dbe9a046d +size 491302 diff --git a/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/index.html b/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/index.html index 631e49636d2546..117a81b5b1d4cd 100644 --- a/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/index.html +++ b/docs/notebooks/235-controlnet-stable-diffusion-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/235-controlnet-stable-diffusion-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/235-controlnet-stable-diffusion-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/235-controlnet-stable-diffusion-with-output_files/


../
-235-controlnet-stable-diffusion-with-output_17_..> 16-Aug-2023 01:31              498463
-235-controlnet-stable-diffusion-with-output_8_0..> 16-Aug-2023 01:31              498463
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/235-controlnet-stable-diffusion-with-output_files/


../
+235-controlnet-stable-diffusion-with-output_17_..> 31-Oct-2023 00:35              491302
+235-controlnet-stable-diffusion-with-output_8_0..> 31-Oct-2023 00:35              491302
 

diff --git a/docs/notebooks/236-stable-diffusion-v2-infinite-zoom-with-output.rst b/docs/notebooks/236-stable-diffusion-v2-infinite-zoom-with-output.rst index 7e2ec9efacc6bf..7b9c471a6341ea 100644 --- a/docs/notebooks/236-stable-diffusion-v2-infinite-zoom-with-output.rst +++ b/docs/notebooks/236-stable-diffusion-v2-infinite-zoom-with-output.rst @@ -1,8 +1,6 @@ Infinite Zoom Stable Diffusion v2 and OpenVINO™ =============================================== - - Stable Diffusion v2 is the next generation of Stable Diffusion model a Text-to-Image latent diffusion model created by the researchers and engineers from `Stability AI `__ and @@ -68,33 +66,37 @@ model using OpenVINO. Notebook contains the following steps: -1. Convert PyTorch models to ONNX format. -2. Convert ONNX models to OpenVINO IR format, using model conversion - API. +1. Create pipeline with PyTorch models using Diffusers library. +2. Convert models to OpenVINO IR format, using model conversion API. 3. Run Stable Diffusion v2 inpainting pipeline for generation infinity zoom video -.. _top: +**Table of contents:** -**Table of contents**: -- `Stable Diffusion v2 Infinite Zoom Showcase <#stable-diffusion-v2-infinite-zoom-showcase>`__ +- `Stable Diffusion v2 Infinite Zoom + Showcase <#stable-diffusion-v-infinite-zoom-showcase>`__ - - `Stable Diffusion Text guided Inpainting <#stable-diffusion-text-guided-inpainting>`__ + - `Stable Diffusion Text guided + Inpainting <#stable-diffusion-text-guided-inpainting>`__ -- `Prerequisites <#prerequisites>`__ +- `Prerequisites <#prerequisites>`__ - - `Stable Diffusion in Diffusers library <#stable-diffusion-in-diffusers-library>`__ - - `Convert models to OpenVINO Intermediate representation (IR) format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ - - `Prepare Inference pipeline <#prepare-inference-pipeline>`__ - - `Zoom Video Generation <#zoom-video-generation>`__ - - `Configure Inference Pipeline <#configure-inference-pipeline>`__ - - `Select inference device <#select-inference-device>`__ - - `Run Infinite Zoom video generation <#run-infinite-zoom-video-generation>`__ - -Stable Diffusion v2 Infinite Zoom Showcase `⇑ <#top>`__ -############################################################################################################################### + - `Stable Diffusion in Diffusers + library <#stable-diffusion-in-diffusers-library>`__ + - `Convert models to OpenVINO Intermediate representation (IR) + format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ + - `Prepare Inference + pipeline <#prepare-inference-pipeline>`__ + - `Zoom Video Generation <#zoom-video-generation>`__ + - `Configure Inference + Pipeline <#configure-inference-pipeline>`__ + - `Select inference device <#select-inference-device>`__ + - `Run Infinite Zoom video + generation <#run-infinite-zoom-video-generation>`__ +Stable Diffusion v2 Infinite Zoom Showcase +------------------------------------------------------------------------------------ In this tutorial we consider how to use Stable Diffusion v2 model for generation sequence of images for infinite zoom video effect. To do @@ -102,12 +104,13 @@ this, we will need `stabilityai/stable-diffusion-2-inpainting `__ model. -Stable Diffusion Text guided Inpainting `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Stable Diffusion Text guided Inpainting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In image editing, inpainting is a process of restoring missing parts of pictures. Most -commonly applied to reconstructing old deteriorated images, removing -cracks, scratches, dust spots, or red-eyes from photographs. +In image editing, inpainting is a process of restoring missing parts of +pictures. Most commonly applied to reconstructing old deteriorated +images, removing cracks, scratches, dust spots, or red-eyes from +photographs. But with the power of AI and the Stable Diffusion model, inpainting can be used to achieve more than that. For example, instead of just @@ -135,25 +138,17 @@ Using this inpainting feature, decreasing image by certain margin and masking this border for every new frame we can create interesting Zoom Out video based on our prompt. -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +Prerequisites +------------------------------------------------------- -Install required packages: +install required packages .. code:: ipython3 - !pip install -q "diffusers>=0.14.0" openvino-dev "transformers >= 4.25.1" gradio - - -.. parsed-literal:: - - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - + %pip install -q "diffusers>=0.14.0" "transformers >= 4.25.1" gradio "openvino>=2023.1.0" -Stable Diffusion in Diffusers library `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Stable Diffusion in Diffusers library +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To work with Stable Diffusion v2, we will use Hugging Face `Diffusers `__ library. To @@ -178,94 +173,16 @@ The code below demonstrates how to create .. parsed-literal:: - 2023-07-16 15:45:16.540634: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-16 15:45:16.577870: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-09-25 12:14:32.810031: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-25 12:14:32.851215: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-16 15:45:17.175991: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - - -.. parsed-literal:: - - Downloading (…)ain/model_index.json: 0%| | 0.00/544 [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert models to OpenVINO Intermediate representation (IR) format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conversion part of model stayed remain as in `Text-to-Image generation notebook <./236-stable-diffusion-v2-text-to-image.ipynb>`__. Except @@ -296,85 +213,102 @@ generated latents channels + 4 for latent representation of masked image from pathlib import Path import torch import numpy as np + import openvino as ov sd2_inpainting_model_dir = Path("sd2_inpainting") sd2_inpainting_model_dir.mkdir(exist_ok=True) .. code:: ipython3 - def convert_encoder_onnx(text_encoder: torch.nn.Module, onnx_path:Path): + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation """ - Convert Text Encoder model to ONNX. - Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + + def convert_encoder(text_encoder: torch.nn.Module, ir_path:Path): + """ + Convert Text Encoder model to IR. + Function accepts pipeline, prepares example inputs for conversion Parameters: text_encoder (torch.nn.Module): text encoder PyTorch model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model Returns: None """ - if not onnx_path.exists(): + if not ir_path.exists(): input_ids = torch.ones((1, 77), dtype=torch.long) # switch model to inference mode text_encoder.eval() # disable gradients calculation for reducing memory consumption with torch.no_grad(): - # export model to ONNX format - torch.onnx._export( + # export model + ov_model = ov.convert_model( text_encoder, # model instance - input_ids, # inputs for model tracing - onnx_path, # output file for saving result - input_names=['tokens'], # model input name for onnx representation - output_names=['last_hidden_state', 'pooler_out'], # model output names for onnx representation - opset_version=14, # onnx opset version for export, - onnx_shape_inference=False + example_input=input_ids, # example inputs for model tracing + input=([1,77],) # input shape for conversion ) - print('Text Encoder successfully converted to ONNX') + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('Text Encoder successfully converted to IR') - def convert_unet_onnx(unet:torch.nn.Module, onnx_path:Path, num_channels:int = 4, width:int = 64, height:int = 64): + def convert_unet(unet:torch.nn.Module, ir_path:Path, num_channels:int = 4, width:int = 64, height:int = 64): """ - Convert Unet model to ONNX, then IR format. - Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, + Convert Unet model to IR format. + Function accepts pipeline, prepares example inputs for conversion Parameters: unet (torch.nn.Module): UNet PyTorch model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model num_channels (int, optional, 4): number of input channels width (int, optional, 64): input width height (int, optional, 64): input height Returns: None """ - if not onnx_path.exists(): + dtype_mapping = { + torch.float32: ov.Type.f32, + torch.float64: ov.Type.f64 + } + if not ir_path.exists(): # prepare inputs encoder_hidden_state = torch.ones((2, 77, 1024)) latents_shape = (2, num_channels, width, height) latents = torch.randn(latents_shape) t = torch.from_numpy(np.array(1, dtype=np.float32)) - - # model size > 2Gb, it will be represented as onnx with external data files, we will store it in separated directory for avoid a lot of files in current directory - onnx_path.parent.mkdir(exist_ok=True, parents=True) unet.eval() + dummy_inputs = (latents, t, encoder_hidden_state) + input_info = [] + for input_tensor in dummy_inputs: + shape = ov.PartialShape(tuple(input_tensor.shape)) + element_type = dtype_mapping[input_tensor.dtype] + input_info.append((shape, element_type)) with torch.no_grad(): - torch.onnx._export( + ov_model = ov.convert_model( unet, - (latents, t, encoder_hidden_state), str(onnx_path), - input_names=['latent_model_input', 't', 'encoder_hidden_states'], - output_names=['out_sample'], - onnx_shape_inference=False + example_input=dummy_inputs, + input=input_info ) - print('U-Net successfully converted to ONNX') + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('U-Net successfully converted to IR') - def convert_vae_encoder_onnx(vae: torch.nn.Module, onnx_path: Path, width:int = 512, height:int = 512): + def convert_vae_encoder(vae: torch.nn.Module, ir_path: Path, width:int = 512, height:int = 512): """ - Convert VAE model to ONNX, then IR format. - Function accepts pipeline, creates wrapper class for export only necessary for inference part, - prepares example inputs for ONNX conversion via torch.export, + Convert VAE model to IR format. + VAE model, creates wrapper class for export only necessary for inference part, + prepares example inputs for onversion Parameters: vae (torch.nn.Module): VAE PyTorch model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model width (int, optional, 512): input width height (int, optional, 512): input height Returns: @@ -386,28 +320,28 @@ generated latents channels + 4 for latent representation of masked image self.vae = vae def forward(self, image): - h = self.vae.encoder(image) - moments = self.vae.quant_conv(h) - return moments + return self.vae.encode(x=image)["latent_dist"].sample() - if not onnx_path.exists(): + if not ir_path.exists(): vae_encoder = VAEEncoderWrapper(vae) vae_encoder.eval() image = torch.zeros((1, 3, width, height)) with torch.no_grad(): - torch.onnx.export(vae_encoder, image, onnx_path, input_names=[ - 'init_image'], output_names=['image_latent']) - print('VAE encoder successfully converted to ONNX') + ov_model = ov.convert_model(vae_encoder, example_input=image, input=([1,3, width, height],)) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('VAE encoder successfully converted to IR') - def convert_vae_decoder_onnx(vae: torch.nn.Module, onnx_path: Path, width:int = 64, height:int = 64): + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path, width:int = 64, height:int = 64): """ - Convert VAE model to ONNX, then IR format. - Function accepts pipeline, creates wrapper class for export only necessary for inference part, - prepares example inputs for ONNX conversion via torch.export, + Convert VAE decoder model to IR format. + Function accepts VAE model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion Parameters: - vae: - onnx_path (Path): File for storing onnx model + vae (torch.nn.Module): VAE model + ir_path (Path): File for storing model width (int, optional, 64): input width height (int, optional, 64): input height Returns: @@ -419,28 +353,26 @@ generated latents channels + 4 for latent representation of masked image self.vae = vae def forward(self, latents): - latents = 1 / 0.18215 * latents return self.vae.decode(latents) - if not onnx_path.exists(): + if not ir_path.exists(): vae_decoder = VAEDecoderWrapper(vae) latents = torch.zeros((1, 4, width, height)) vae_decoder.eval() with torch.no_grad(): - torch.onnx.export(vae_decoder, latents, onnx_path, input_names=[ - 'latents'], output_names=['sample']) - print('VAE decoder successfully converted to ONNX') + ov_model = ov.convert_model(vae_decoder, example_input=latents, input=([1,4, width, height],)) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('VAE decoder successfully converted to IR') .. code:: ipython3 - TEXT_ENCODER_ONNX_PATH_INPAINT = sd2_inpainting_model_dir / "text_encoder.onnx" - TEXT_ENCODER_OV_PATH_INPAINT = TEXT_ENCODER_ONNX_PATH_INPAINT.with_suffix('.xml') + TEXT_ENCODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / "text_encoder.xml" if not TEXT_ENCODER_OV_PATH_INPAINT.exists(): - convert_encoder_onnx(text_encoder_inpaint, TEXT_ENCODER_ONNX_PATH_INPAINT) - !mo --input_model $TEXT_ENCODER_ONNX_PATH_INPAINT --output_dir $sd2_inpainting_model_dir - print('Text Encoder successfully converted to IR') + convert_encoder(text_encoder_inpaint, TEXT_ENCODER_OV_PATH_INPAINT) else: print(f"Text encoder will be loaded from {TEXT_ENCODER_OV_PATH_INPAINT}") @@ -450,43 +382,16 @@ generated latents channels + 4 for latent representation of masked image .. parsed-literal:: - /tmp/ipykernel_1181138/3505677505.py:19: FutureWarning: 'torch.onnx._export' is deprecated in version 1.12.0 and will be removed in version 1.14. Please use `torch.onnx.export` instead. - torch.onnx._export( - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:684: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mask = torch.full((tgt_len, tgt_len), torch.tensor(torch.finfo(dtype).min, device=device), device=device) - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:284: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:292: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len): - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/symbolic_helper.py:710: UserWarning: Type cannot be inferred, which might cause exported graph to produce incorrect results. - warnings.warn( - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:5408: UserWarning: Exporting aten::index operator of advanced indexing in opset 14 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results. - warnings.warn( - - -.. parsed-literal:: - - Text Encoder successfully converted to ONNX - [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. - Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.1/openvino_2_0_transition_guide.html - [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/text_encoder.xml - [ SUCCESS ] BIN file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/text_encoder.bin - Text Encoder successfully converted to IR + Text encoder will be loaded from sd2_inpainting/text_encoder.xml .. code:: ipython3 - UNET_ONNX_PATH_INPAINT = sd2_inpainting_model_dir / 'unet/unet.onnx' - UNET_OV_PATH_INPAINT = UNET_ONNX_PATH_INPAINT.parents[1] / 'unet.xml' + UNET_OV_PATH_INPAINT = sd2_inpainting_model_dir / 'unet.xml' if not UNET_OV_PATH_INPAINT.exists(): - convert_unet_onnx(unet_inpaint, UNET_ONNX_PATH_INPAINT, num_channels=9, width=64, height=64) + convert_unet(unet_inpaint, UNET_OV_PATH_INPAINT, num_channels=9, width=64, height=64) del unet_inpaint gc.collect() - !mo --input_model $UNET_ONNX_PATH_INPAINT --output_dir $sd2_inpainting_model_dir - print('U-Net successfully converted to IR') else: del unet_inpaint print(f"U-Net will be loaded from {UNET_OV_PATH_INPAINT}") @@ -495,51 +400,21 @@ generated latents channels + 4 for latent representation of masked image .. parsed-literal:: - /tmp/ipykernel_1181138/3505677505.py:56: FutureWarning: 'torch.onnx._export' is deprecated in version 1.12.0 and will be removed in version 1.14. Please use `torch.onnx.export` instead. - torch.onnx._export( - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/models/unet_2d_condition.py:752: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if any(s % default_overall_up_factor != 0 for s in sample.shape[-2:]): - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/models/resnet.py:214: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert hidden_states.shape[1] == self.channels - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/models/resnet.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert hidden_states.shape[1] == self.channels - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/models/resnet.py:138: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert hidden_states.shape[1] == self.channels - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/models/resnet.py:151: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if hidden_states.shape[0] >= 64: - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/models/unet_2d_condition.py:977: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not return_dict: - - -.. parsed-literal:: - - U-Net successfully converted to ONNX - [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. - Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.1/openvino_2_0_transition_guide.html - [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/unet.xml - [ SUCCESS ] BIN file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/unet.bin - U-Net successfully converted to IR + U-Net will be loaded from sd2_inpainting/unet.xml .. code:: ipython3 - VAE_ENCODER_ONNX_PATH_INPAINT = sd2_inpainting_model_dir / 'vae_encoder.onnx' - VAE_ENCODER_OV_PATH_INPAINT = VAE_ENCODER_ONNX_PATH_INPAINT.with_suffix('.xml') + VAE_ENCODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / 'vae_encoder.xml' if not VAE_ENCODER_OV_PATH_INPAINT.exists(): - convert_vae_encoder_onnx(vae_inpaint, VAE_ENCODER_ONNX_PATH_INPAINT, 512, 512) - !mo --input_model $VAE_ENCODER_ONNX_PATH_INPAINT --output_dir $sd2_inpainting_model_dir - print('VAE encoder successfully converted to IR') + convert_vae_encoder(vae_inpaint, VAE_ENCODER_OV_PATH_INPAINT, 512, 512) else: print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH_INPAINT}") - VAE_DECODER_ONNX_PATH_INPAINT = sd2_inpainting_model_dir / 'vae_decoder.onnx' - VAE_DECODER_OV_PATH_INPAINT = VAE_DECODER_ONNX_PATH_INPAINT.with_suffix('.xml') + VAE_DECODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / 'vae_decoder.xml' if not VAE_DECODER_OV_PATH_INPAINT.exists(): - convert_vae_decoder_onnx(vae_inpaint, VAE_DECODER_ONNX_PATH_INPAINT, 64, 64) - !mo --input_model $VAE_DECODER_ONNX_PATH_INPAINT --output_dir $sd2_inpainting_model_dir - print('VAE decoder successfully converted to IR') + convert_vae_decoder(vae_inpaint, VAE_DECODER_OV_PATH_INPAINT, 64, 64) else: print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH_INPAINT}") @@ -549,49 +424,12 @@ generated latents channels + 4 for latent representation of masked image .. parsed-literal:: - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) - _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) - _C._jit_pass_onnx_graph_shape_type_inference( - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) - _C._jit_pass_onnx_graph_shape_type_inference( - - -.. parsed-literal:: - - VAE encoder successfully converted to ONNX - [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. - Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.1/openvino_2_0_transition_guide.html - [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/vae_encoder.xml - [ SUCCESS ] BIN file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/vae_encoder.bin - VAE encoder successfully converted to IR + VAE encoder will be loaded from sd2_inpainting/vae_encoder.xml + VAE decoder will be loaded from sd2_inpainting/vae_decoder.xml -.. parsed-literal:: - - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - - -.. parsed-literal:: - - VAE decoder successfully converted to ONNX - [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. - Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.1/openvino_2_0_transition_guide.html - [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/vae_decoder.xml - [ SUCCESS ] BIN file: /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/sd2_inpainting/vae_decoder.bin - VAE decoder successfully converted to IR - - -Prepare Inference pipeline `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Prepare Inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As it was discussed previously, Inpainting inference pipeline is based on Text-to-Image inference pipeline with addition mask processing step. @@ -609,7 +447,6 @@ We will reuse ``OVStableDiffusionPipeline`` basic utilities in from transformers import CLIPTokenizer from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler - from openvino.runtime import Model def prepare_mask_and_masked_image(image:PIL.Image.Image, mask:PIL.Image.Image): @@ -663,7 +500,7 @@ We will reuse ``OVStableDiffusionPipeline`` basic utilities in .. parsed-literal:: - /tmp/ipykernel_1181138/859685649.py:8: FutureWarning: Importing `DiffusionPipeline` or `ImagePipelineOutput` from diffusers.pipeline_utils is deprecated. Please import from diffusers.pipelines.pipeline_utils instead. + /tmp/ipykernel_1292073/2055396221.py:8: FutureWarning: Importing `DiffusionPipeline` or `ImagePipelineOutput` from diffusers.pipeline_utils is deprecated. Please import from diffusers.pipelines.pipeline_utils instead. from diffusers.pipeline_utils import DiffusionPipeline @@ -672,12 +509,12 @@ We will reuse ``OVStableDiffusionPipeline`` basic utilities in class OVStableDiffusionInpaintingPipeline(DiffusionPipeline): def __init__( self, - vae_decoder: Model, - text_encoder: Model, + vae_decoder: ov.Model, + text_encoder: ov.Model, tokenizer: CLIPTokenizer, - unet: Model, + unet: ov.Model, scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], - vae_encoder: Model = None, + vae_encoder: ov.Model = None, ): """ Pipeline for text-to-image generation using Stable Diffusion. @@ -737,10 +574,8 @@ We will reuse ``OVStableDiffusionPipeline`` basic utilities in mask = mask.numpy() # encode the mask image into latents space so we can concatenate it to the latents - moments = self.vae_encoder(masked_image)[self._vae_e_output] - mean, logvar = np.split(moments, 2, axis=1) - std = np.exp(logvar * 0.5) - masked_image_latents = (mean + std * np.random.randn(*mean.shape)) * 0.18215 + latents = self.vae_encoder(masked_image)[self._vae_e_output] + masked_image_latents = latents * 0.18215 mask = np.concatenate([mask] * 2) if do_classifier_free_guidance else mask masked_image_latents = ( @@ -868,7 +703,7 @@ We will reuse ``OVStableDiffusionPipeline`` basic utilities in **extra_step_kwargs, )["prev_sample"].numpy() # scale and decode the image latents with vae - image = self.vae_decoder(latents)[self._vae_d_output] + image = self.vae_decoder(latents * (1 / 0.18215))[self._vae_d_output] image = self.postprocess_image(image, meta, output_type) return {"sample": image} @@ -961,10 +796,8 @@ We will reuse ``OVStableDiffusionPipeline`` basic utilities in noise = noise * self.scheduler.sigmas[0].numpy() return noise, {} input_image, meta = preprocess(image) - moments = self.vae_encoder(input_image)[self._vae_e_output] - mean, logvar = np.split(moments, 2, axis=1) - std = np.exp(logvar * 0.5) - latents = (mean + std * np.random.randn(*mean.shape)) * 0.18215 + latents = self.vae_encoder(input_image)[self._vae_e_output] + latents = latents * 0.18215 latents = self.scheduler.add_noise(torch.from_numpy(latents), torch.from_numpy(noise), latent_timestep).numpy() return latents, meta @@ -1027,9 +860,8 @@ We will reuse ``OVStableDiffusionPipeline`` basic utilities in return timesteps, num_inference_steps - t_start -Zoom Video Generation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Zoom Video Generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For achieving zoom effect, we will use inpainting to expand images beyond their original borders. We run our @@ -1263,29 +1095,23 @@ generation is finished, we record frames in reversed order. loop=0, ) -Configure Inference Pipeline `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Configure Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Configuration steps: - -1. Load models on device. -2. Configure tokenizer and scheduler. -3. Create instance of ``OVStableDiffusionInpaintingPipeline`` class. +Configuration steps: 1. Load models on device 2. Configure tokenizer and +scheduler 3. Create instance of ``OVStableDiffusionInpaintingPipeline`` +class .. code:: ipython3 - from openvino.runtime import Core - - core = Core() + core = ov.Core() tokenizer = CLIPTokenizer.from_pretrained('openai/clip-vit-large-patch14') -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -1305,17 +1131,19 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GNA', 'AUTO'), value='AUTO') .. code:: ipython3 + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + text_enc_inpaint = core.compile_model(TEXT_ENCODER_OV_PATH_INPAINT, device.value) unet_model_inpaint = core.compile_model(UNET_OV_PATH_INPAINT, device.value) - vae_decoder_inpaint = core.compile_model(VAE_DECODER_OV_PATH_INPAINT, device.value) - vae_encoder_inpaint = core.compile_model(VAE_ENCODER_OV_PATH_INPAINT, device.value) + vae_decoder_inpaint = core.compile_model(VAE_DECODER_OV_PATH_INPAINT, device.value, ov_config) + vae_encoder_inpaint = core.compile_model(VAE_ENCODER_OV_PATH_INPAINT, device.value, ov_config) ov_pipe_inpaint = OVStableDiffusionInpaintingPipeline( tokenizer=tokenizer, @@ -1326,9 +1154,8 @@ Select device from dropdown list for running inference using OpenVINO: scheduler=scheduler_inpaint, ) -Run Infinite Zoom video generation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Infinite Zoom video generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -1381,8 +1208,8 @@ Run Infinite Zoom video generation `⇑ <#top>`__ .. parsed-literal:: - Running on local URL: http://127.0.0.1:7861 - Running on public URL: https://462b1833bf3b980731.gradio.live + Running on local URL: http://127.0.0.1:7860 + Running on public URL: https://372deef95f8b1d0168.gradio.live This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces) @@ -1390,5 +1217,4 @@ Run Infinite Zoom video generation `⇑ <#top>`__ .. .. raw:: html -..
- +..
diff --git a/docs/notebooks/236-stable-diffusion-v2-optimum-demo-comparison-with-output.rst b/docs/notebooks/236-stable-diffusion-v2-optimum-demo-comparison-with-output.rst index ff8f9a9350f7ad..36bbae3ae3cfd5 100644 --- a/docs/notebooks/236-stable-diffusion-v2-optimum-demo-comparison-with-output.rst +++ b/docs/notebooks/236-stable-diffusion-v2-optimum-demo-comparison-with-output.rst @@ -1,18 +1,24 @@ Stable Diffusion v2.1 using Optimum-Intel OpenVINO and multiple Intel Hardware ============================================================================== - +This notebook will provide you a way to see different precision models +performing in different hardware. This notebook was done for showing +case the use of Optimum-Intel-OpenVINO and it is not optimized for +running multiple times. |image0| -.. _top: +**Table of contents:** -**Table of contents**: -- `Showing Info Available Devices <#showing-info-available-devices>`__ -- `Using full precision model in CPU with StableDiffusionPipeline <#using-full-precision-model-in-cpu-with-stablediffusionpipeline>`__ -- `Using full precision model in CPU with OVStableDiffusionPipeline <#using-full-precision-model-in-cpu-with-ovstablediffusionpipeline>`__ -- `Using full precision model in dGPU with OVStableDiffusionPipeline <#using-full-precision-model-in-dgpu-with-ovstablediffusionpipeline>`__ +- `Showing Info Available + Devices <#showing-info-available-devices>`__ +- `Using full precision model in CPU with + ``StableDiffusionPipeline`` <#using-full-precision-model-in-cpu-with-stablediffusionpipeline>`__ +- `Using full precision model in CPU with + ``OVStableDiffusionPipeline`` <#using-full-precision-model-in-cpu-with-ovstablediffusionpipeline>`__ +- `Using full precision model in dGPU with + ``OVStableDiffusionPipeline`` <#using-full-precision-model-in-dgpu-with-ovstablediffusionpipeline>`__ .. |image0| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 @@ -22,23 +28,19 @@ accelerate end-to-end pipelines on Intel architectures. More details in this `repository `__. -.. note:: - - We suggest you to create a different environment and run the following installation command there. - +``Note: We suggest you to create a different environment and run the following installation command there.`` .. code:: ipython3 - %pip install -q "optimum-intel[openvino,diffusers]" "ipywidgets" + %pip install -q "optimum-intel[openvino,diffusers]" "ipywidgets" "transformers >= 4.31" .. code:: ipython3 import warnings warnings.filterwarnings('ignore') -Showing Info Available Devices `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Showing Info Available Devices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``available_devices`` property shows the available devices in your system. The “FULL_DEVICE_NAME” option to ``ie.get_property()`` shows the @@ -48,14 +50,11 @@ you have integrated GPU (iGPU) and discrete GPU (dGPU), it will show If you just have either an iGPU or dGPU that will be assigned to ``"GPU"`` -.. note:: - - For more details about GPU with OpenVINO visit this - `link `__. - If you have been facing any issue in Ubuntu 20.04 or Windows 11 read - this - `blog `__. - +Note: For more details about GPU with OpenVINO visit this +`link `__. +If you have been facing any issue in Ubuntu 20.04 or Windows 11 read +this +`blog `__. .. code:: ipython3 @@ -75,13 +74,15 @@ If you just have either an iGPU or dGPU that will be assigned to GPU: Intel(R) Data Center GPU Flex 170 (dGPU) -Using full precision model in CPU with ``StableDiffusionPipeline``. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Using full precision model in CPU with ``StableDiffusionPipeline`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 from diffusers import StableDiffusionPipeline + import gc + model_id = "stabilityai/stable-diffusion-2-1-base" pipe = StableDiffusionPipeline.from_pretrained(model_id) pipe.save_pretrained("./stabilityai_cpu") @@ -89,6 +90,9 @@ Using full precision model in CPU with ``StableDiffusionPipeline``. `⇑ <#top>` output_cpu = pipe(prompt, num_inference_steps=17).images[0] output_cpu.save("image_cpu.png") output_cpu + + del pipe + gc.collect() @@ -127,9 +131,8 @@ Using full precision model in CPU with ``StableDiffusionPipeline``. `⇑ <#top>` -Using full precision model in CPU with ``OVStableDiffusionPipeline``. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Using full precision model in CPU with ``OVStableDiffusionPipeline`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -217,9 +220,8 @@ Using full precision model in CPU with ``OVStableDiffusionPipeline``. `⇑ <#top -Using full precision model in dGPU with ``OVStableDiffusionPipeline``. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Using full precision model in dGPU with ``OVStableDiffusionPipeline`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The model in this notebook is FP32 precision. And thanks to the new feature of OpenVINO 2023.0 you do not need to convert the model to FP16 @@ -244,18 +246,8 @@ for running the inference on GPU. output_gpu_ov = ov_pipe(prompt, num_inference_steps=17).images[0] output_gpu_ov.save("image_ov_gpu.png") output_gpu_ov - - - -.. parsed-literal:: - - 0%| | 0/18 [00:00 -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-comparison-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-comparison-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-comparison-with-output_files/


../
-236-stable-diffusion-v2-optimum-demo-comparison..> 16-Aug-2023 01:31              573225
-236-stable-diffusion-v2-optimum-demo-comparison..> 16-Aug-2023 01:31              569855
-236-stable-diffusion-v2-optimum-demo-comparison..> 16-Aug-2023 01:31              466925
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-comparison-with-output_files/


../
+236-stable-diffusion-v2-optimum-demo-comparison..> 31-Oct-2023 00:35              573225
+236-stable-diffusion-v2-optimum-demo-comparison..> 31-Oct-2023 00:35              466925
 

diff --git a/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output.rst b/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output.rst index bfa6ef6dce9ef0..157f7c253cff7b 100644 --- a/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output.rst +++ b/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output.rst @@ -1,18 +1,19 @@ Stable Diffusion v2.1 using Optimum-Intel OpenVINO ================================================== - - |image0| -.. _top: +**Table of contents:** -**Table of contents**: -- `Showing Info Available Devices <#showing-info-available-devices>`__ -- `Download Pre-Converted Stable Diffusion 2.1 IR <#download-pre-converted-stable-diffusion-2.1-ir>`__ -- `Save the pre-trained models, Select the inference device and compile it <#save-the-pre-trained-models-select-the-inference-device-and-compile-it>`__ -- `Be creative, add the prompt and enjoy the result <#be-creative-add-the-prompt-and-enjoy-the-result>`__ +- `Showing Info Available + Devices <#showing-info-available-devices>`__ +- `Download Pre-Converted Stable Diffusion 2.1 + IR <#download-pre-converted-stable-diffusion--ir>`__ +- `Save the pre-trained models, Select the inference device and compile + it <#save-the-pre-trained-models-select-the-inference-device-and-compile-it>`__ +- `Be creative, add the prompt and enjoy the + result <#be-creative-add-the-prompt-and-enjoy-the-result>`__ .. |image0| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 @@ -22,18 +23,16 @@ accelerate end-to-end pipelines on Intel architectures. More details in this `repository `__. -.. note:: - - We suggest you to create a different environment and run the following installation command there. +``Note: We suggest you to create a different environment and run the following installation command there.`` .. code:: ipython3 - %pip install -q "optimum-intel[openvino,diffusers]" "ipywidgets" + %pip install -q "optimum-intel[openvino,diffusers]" "ipywidgets" "transformers >= 4.31" -.. hint:: +.. parsed-literal:: - You may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. Stable Diffusion pipeline should brings 6 elements together, a text @@ -55,11 +54,10 @@ in this notebook is `helenai/stabilityai-stable-diffusion-2-1-base-ov `__. Let’s download the pre-converted model Stable Diffusion 2.1 `Intermediate Representation Format -(IR) `__ - -Showing Info Available Devices `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +(IR) `__ +Showing Info Available Devices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``available_devices`` property shows the available devices in your system. The “FULL_DEVICE_NAME” option to ``ie.get_property()`` shows the @@ -69,13 +67,11 @@ you have integrated GPU (iGPU) and discrete GPU (dGPU), it will show If you just have either an iGPU or dGPU that will be assigned to ``"GPU"`` -.. note:: - - For more details about GPU with OpenVINO visit this - `link `__. - If you have been facing any issue in Ubuntu 20.04 or Windows 11 read - this - `blog `__. +Note: For more details about GPU with OpenVINO visit this +`link `__. +If you have been facing any issue in Ubuntu 20.04 or Windows 11 read +this +`blog `__. .. code:: ipython3 @@ -96,9 +92,8 @@ If you just have either an iGPU or dGPU that will be assigned to GPU.1: Intel(R) Arc(TM) A770 Graphics (dGPU) -Download Pre-Converted Stable Diffusion 2.1 IR `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Download Pre-Converted Stable Diffusion 2.1 IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -188,9 +183,8 @@ Download Pre-Converted Stable Diffusion 2.1 IR `⇑ <#top>`__ -Save the pre-trained models, Select the inference device and compile it. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Save the pre-trained models, Select the inference device and compile it +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can save the model locally in order to avoid downloading process later. The model will also saved in the cache. @@ -209,17 +203,21 @@ later. The model will also saved in the cache. Compiling the unet... -Be creative, add the prompt and enjoy the result `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Be creative, add the prompt and enjoy the result +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 + import gc + # Generate an image. prompt = "red car in snowy forest, epic vista, beautiful landscape, 4k, 8k" output = pipe(prompt, num_inference_steps=17, output_type="pil").images[0] output.save("image.png") output + + del pipe + gc.collect() diff --git a/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output_files/index.html b/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output_files/index.html index 34231ab3f033a0..795d2dfdddfd24 100644 --- a/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output_files/index.html +++ b/docs/notebooks/236-stable-diffusion-v2-optimum-demo-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-with-output_files/


../
-236-stable-diffusion-v2-optimum-demo-with-outpu..> 16-Aug-2023 01:31              451596
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/236-stable-diffusion-v2-optimum-demo-with-output_files/


../
+236-stable-diffusion-v2-optimum-demo-with-outpu..> 31-Oct-2023 00:35              451596
 

diff --git a/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output.rst b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output.rst index 7cd65143c0b083..3c7ee85bcbb054 100644 --- a/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output.rst +++ b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output.rst @@ -1,8 +1,6 @@ Stable Diffusion Text-to-Image Demo =================================== - - Stable Diffusion is an innovative generative AI technique that allows us to generate and manipulate images in interesting ways, including generating image from text and restoring missing parts of pictures @@ -13,38 +11,31 @@ including being able to use more data, employ more training, and has less restrictive filtering of the dataset. All of these features give us promising results for selecting a wide range of input text prompts! -.. note:: - - This is a shorter version of the - `236-stable-diffusion-v2-text-to-image `__ - notebook for demo purposes and to get started quickly. This version does - not have the full implementation of the helper utilities needed to - convert the models from PyTorch to ONNX to OpenVINO, and the OpenVINO - ``OVStableDiffusionPipeline`` within the notebook directly. If you would - like to see the full implementation of stable diffusion for text to - image, please visit - `236-stable-diffusion-v2-text-to-image `__. - - -.. _top: +**Note:** This is a shorter version of the +`236-stable-diffusion-v2-text-to-image `__ +notebook for demo purposes and to get started quickly. This version does +not have the full implementation of the helper utilities needed to +convert the models from PyTorch to ONNX to OpenVINO, and the OpenVINO +``OVStableDiffusionPipeline`` within the notebook directly. If you would +like to see the full implementation of stable diffusion for text to +image, please visit +`236-stable-diffusion-v2-text-to-image `__. -**Table of contents**: -- `Step 0: Install and import prerequisites <#step-0-install-and-import-prerequisites>`__ -- `Step 1: Stable Diffusion v2 Fundamental components <#step-1-stable-diffusion-v2-fundamental-components>`__ +**Table of contents:** +--- - - `Step 1.1: Retrieve components from HuggingFace <#step-1-1-retrieve-components-from-huggingface>`__ - -- `Step 2: Convert the models to OpenVINO <#step-2-convert-the-models-to-openvino>`__ -- `Step 3: Text-to-Image Generation Inference Pipeline <#step-3-text-to-image-generation-inference-pipeline>`__ - - - `Step 3.1: Load and Understand Text to Image OpenVINO models <#step-3-1-load-and-understand-text-to-image-openvino-models>`__ - - `Select inference device <#select-inference-device>`__ - - `Step 3.3: Run Text-to-Image generation <#step-3-3-run-text-to-image-generation>`__ - -Step 0: Install and import prerequisites `⇑ <#top>`__ -############################################################################################################################### +- `Step 0: Install and import prerequisites <#step--install-and-import-prerequisites>`__ +- `Step 1: Stable Diffusion v2 Fundamental components <#step--stable-diffusion-v-fundamental-components>`__ +- `Step 1.1: Retrieve components from HuggingFace <#step--retrieve-components-from-huggingface>`__ +- `Step 2: Convert the models to OpenVINO <#step--convert-the-models-to-openvino>`__ +- `Step 3: Text-to-Image Generation Inference Pipeline <#step--text-to-image-generation-inference-pipeline>`__ +- `Step 3.1: Load and Understand Text to Image OpenVINO models <#step--load-and-understand-text-to-image-openvino-models>`__ +- `Step 3.2: Select inference device <#step--select-inference-device>`__ +- `Step 3.3: Run Text-to-Image generation <#step--run-text-to-image-generation>`__ +Step 0: Install and import prerequisites +---------------------------------------------------------------------------------- .. code:: ipython3 @@ -60,11 +51,25 @@ pipelines `__. .. code:: ipython3 - !pip install -q "diffusers>=0.14.0" openvino-dev openvino "transformers >= 4.25.1" accelerate + %pip install -q "diffusers>=0.14.0" "openvino>=2023.1.0" "transformers >= 4.31" accelerate "urllib3==1.26.15" -Step 1: Stable Diffusion v2 Fundamental components `⇑ <#top>`__ -############################################################################################################################### +.. parsed-literal:: + + WARNING: Ignoring invalid distribution -orch (/home/ea/work/ov_venv/lib/python3.8/site-packages) + WARNING: Ignoring invalid distribution -orch (/home/ea/work/ov_venv/lib/python3.8/site-packages) + WARNING: Ignoring invalid distribution -orch (/home/ea/work/ov_venv/lib/python3.8/site-packages) + WARNING: Ignoring invalid distribution -orch (/home/ea/work/ov_venv/lib/python3.8/site-packages) + WARNING: Ignoring invalid distribution -orch (/home/ea/work/ov_venv/lib/python3.8/site-packages) + WARNING: Ignoring invalid distribution -orch (/home/ea/work/ov_venv/lib/python3.8/site-packages) + + [notice] A new release of pip available: 22.3 -> 23.2.1 + [notice] To update, run: pip install --upgrade pip + Note: you may need to restart the kernel to use updated packages. + + +Step 1: Stable Diffusion v2 Fundamental components +-------------------------------------------------------------------------------------------- Stable Diffusion pipelines for both Text to Image and Inpainting consist of three important parts: @@ -78,9 +83,8 @@ of three important parts: Depending on the pipeline, the parameters for these parts can differ, which we’ll explore in this demo! -Step 1.1: Retrieve components from HuggingFace `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Step 1.1: Retrieve components from HuggingFace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Let’s start by retrieving these components from HuggingFace! @@ -109,30 +113,36 @@ using ``stable-diffusion-2-1``. .. parsed-literal:: - text_encoder\model.safetensors not found + 2023-09-12 11:59:21.971103: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-12 11:59:22.005818: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-09-12 11:59:22.607625: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -Step 2: Convert the models to OpenVINO `⇑ <#top>`__ -############################################################################################################################### +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/6 [00:00 Onnx -> OpenVINO # 1. Convert the Text Encoder - txt_encoder_ov_path = convert_txt_encoder_onnx_OV(txt2img_model_dir, text_encoder) + txt_encoder_ov_path = txt2img_model_dir / "text_encoder.xml" + convert_encoder(text_encoder, txt_encoder_ov_path) # 2. Convert the U-NET - unet_ov_path = convert_unet_onnx_OV(txt2img_model_dir, unet, num_channels=4, width=96, height=96) + unet_ov_path = txt2img_model_dir / "unet.xml" + convert_unet(unet, unet_ov_path, num_channels=4, width=96, height=96) # 3. Convert the VAE encoder - vae_encoder_ov_path = convert_vae_encoder_onnx_OV(txt2img_model_dir, vae, width=768, height=768) + vae_encoder_ov_path = txt2img_model_dir / "vae_encoder.xml" + convert_vae_encoder(vae, vae_encoder_ov_path, width=768, height=768) # 4. Convert the VAE decoder - vae_decoder_ov_path = convert_vae_decoder_onnx_OV(txt2img_model_dir, vae, width=96, height=96) - + vae_decoder_ov_path = txt2img_model_dir / "vae_decoder.xml" + convert_vae_decoder(vae, vae_decoder_ov_path, width=96, height=96) -.. parsed-literal:: - - Text encoder will be loaded from sd2.1\text_encoder.xml - - -.. parsed-literal:: - - WARNING:root:Failed to send event with error cannot schedule new futures after shutdown. - WARNING:root:Failed to send event with error cannot schedule new futures after shutdown. - WARNING:root:Failed to send event with error cannot schedule new futures after shutdown. - WARNING:root:Failed to send event with error cannot schedule new futures after shutdown. - - -Step 3: Text-to-Image Generation Inference Pipeline `⇑ <#top>`__ -############################################################################################################################### - - -Step 3.1: Load and Understand Text to Image OpenVINO models `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Step 3: Text-to-Image Generation Inference Pipeline +--------------------------------------------------------------------------------------------- -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Step 3.1: Load and Understand Text to Image OpenVINO models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Step 3.2: Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -201,6 +198,15 @@ Select device from dropdown list for running inference using OpenVINO: device + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + Let’s create instances of our OpenVINO Model for Text to Image. .. code:: ipython3 @@ -258,13 +264,12 @@ As part of the ``OVStableDiffusionPipeline()`` class: .. parsed-literal:: - C:\Users\rcheruvu\Desktop\openvino_notebooks\notebooks\236-stable-diffusion-v2\implementation\ov_stable_diffusion_pipeline.py:10: FutureWarning: Importing `DiffusionPipeline` or `ImagePipelineOutput` from diffusers.pipeline_utils is deprecated. Please import from diffusers.pipelines.pipeline_utils instead. + /home/ea/work/openvino_notebooks/notebooks/236-stable-diffusion-v2/implementation/ov_stable_diffusion_pipeline.py:10: FutureWarning: Importing `DiffusionPipeline` or `ImagePipelineOutput` from diffusers.pipeline_utils is deprecated. Please import from diffusers.pipelines.pipeline_utils instead. from diffusers.pipeline_utils import DiffusionPipeline -Step 3.3: Run Text-to-Image generation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Step 3.3: Run Text-to-Image generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, let’s define some text prompts for image generation and run our inference pipeline. diff --git a/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.jpg b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.jpg new file mode 100644 index 00000000000000..db8b0305829407 --- /dev/null +++ b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:130b0c3da24ebf159541023888074c33755c1619cdd596dc0c49610d7d40b972 +size 100414 diff --git a/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.png b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.png index 1f07a879344f27..f670bb993562c3 100644 --- a/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.png +++ b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/236-stable-diffusion-v2-text-to-image-demo-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f817c3131d4c8a805a6088a566c1949257f5be5c18c9ef5062eac5e6c549d5ad -size 1009164 +oid sha256:0b54a4545297dc62c23ae52954b0c57e52b9c13781dfd7fd5623a9dda4702928 +size 1057851 diff --git a/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/index.html b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/index.html index 63ddd649d42a35..3f19fd25019d2f 100644 --- a/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/index.html +++ b/docs/notebooks/236-stable-diffusion-v2-text-to-image-demo-with-output_files/index.html @@ -1,7 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/236-stable-diffusion-v2-text-to-image-demo-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/236-stable-diffusion-v2-text-to-image-demo-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/236-stable-diffusion-v2-text-to-image-demo-with-output_files/


../
-236-stable-diffusion-v2-text-to-image-demo-with..> 16-Aug-2023 01:31             1009164
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/236-stable-diffusion-v2-text-to-image-demo-with-output_files/


../
+236-stable-diffusion-v2-text-to-image-demo-with..> 31-Oct-2023 00:35              100414
+236-stable-diffusion-v2-text-to-image-demo-with..> 31-Oct-2023 00:35             1057851
 

diff --git a/docs/notebooks/236-stable-diffusion-v2-text-to-image-with-output.rst b/docs/notebooks/236-stable-diffusion-v2-text-to-image-with-output.rst index 885e8893389a01..f7c2ce5b701141 100644 --- a/docs/notebooks/236-stable-diffusion-v2-text-to-image-with-output.rst +++ b/docs/notebooks/236-stable-diffusion-v2-text-to-image-with-output.rst @@ -1,8 +1,6 @@ Text-to-Image Generation with Stable Diffusion v2 and OpenVINO™ =============================================================== - - Stable Diffusion v2 is the next generation of Stable Diffusion model a Text-to-Image latent diffusion model created by the researchers and engineers from `Stability AI `__ and @@ -68,55 +66,48 @@ model using OpenVINO. Notebook contains the following steps: -1. Convert PyTorch models to ONNX format. -2. Convert ONNX models to OpenVINO IR format, using model conversion +1. Create PyTorch models pipeline using Diffusers library. +2. Convert PyTorch models to OpenVINO IR format, using model conversion API. 3. Run Stable Diffusion v2 Text-to-Image pipeline with OpenVINO. -.. note:: - - This is the full version of the Stable Diffusion text-to-image - implementation. If you would like to get started and run the notebook - quickly, check out `236-stable-diffusion-v2-text-to-image-demo - notebook `__. - +**Note:** This is the full version of the Stable Diffusion text-to-image +implementation. If you would like to get started and run the notebook +quickly, check out `236-stable-diffusion-v2-text-to-image-demo +notebook `__. -.. _top: +**Table of contents:** -**Table of contents**: -- `Prerequisites <#prerequisites>`__ -- `Stable Diffusion v2 for Text-to-Image Generation <#stable-diffusion-v2-for-text-to-image-generation>`__ +- `Prerequisites <#prerequisites>`__ +- `Stable Diffusion v2 for Text-to-Image + Generation <#stable-diffusion-v-for-text-to-image-generation>`__ - - `Stable Diffusion in Diffusers library <#stable-diffusion-in-diffusers-library>`__ - - `Convert models to OpenVINO Intermediate representation (IR) format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ - - `Text Encoder <#text-encoder>`__ - - `U-Net <#u-net>`__ - - `VAE <#vae>`__ - - `Prepare Inference Pipeline <#prepare-inference-pipeline>`__ - - `Configure Inference Pipeline <#configure-inference-pipeline>`__ - - `Run Text-to-Image generation <#run-text-to-image-generation>`__ + - `Stable Diffusion in Diffusers + library <#stable-diffusion-in-diffusers-library>`__ + - `Convert models to OpenVINO Intermediate representation (IR) + format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ + - `Text Encoder <#text-encoder>`__ + - `U-Net <#u-net>`__ + - `VAE <#vae>`__ + - `Prepare Inference + Pipeline <#prepare-inference-pipeline>`__ + - `Configure Inference + Pipeline <#configure-inference-pipeline>`__ + - `Run Text-to-Image + generation <#run-text-to-image-generation>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +Prerequisites +------------------------------------------------------- -Install required packages: +install required packages .. code:: ipython3 - !pip install -q "diffusers>=0.14.0" openvino-dev "transformers >= 4.25.1" gradio - - -.. parsed-literal:: - - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - -Stable Diffusion v2 for Text-to-Image Generation `⇑ <#top>`__ -############################################################################################################################### + %pip install -q "diffusers>=0.14.0" "openvino>=2023.1.0" "transformers >= 4.25.1" gradio +Stable Diffusion v2 for Text-to-Image Generation +------------------------------------------------------------------------------------------ To start, let’s look on Text-to-Image process for Stable Diffusion v2. We will use `Stable Diffusion @@ -130,8 +121,8 @@ post `__ and original model `repository `__. -Stable Diffusion in Diffusers library `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Stable Diffusion in Diffusers library +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To work with Stable Diffusion v2, we will use Hugging Face `Diffusers `__ library. To @@ -163,30 +154,26 @@ using ``stable-diffusion-2-1``: .. parsed-literal:: - 2023-07-16 16:09:31.920601: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-16 16:09:31.958945: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-08-29 22:18:10.107478: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-08-29 22:18:10.146633: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-16 16:09:32.584374: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-08-29 22:18:10.895453: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. parsed-literal:: -Convert models to OpenVINO Intermediate representation (IR) format. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Fetching 13 files: 0%| | 0/13 [00:00`__. We need to -provide a model object, input data for model tracing, and a path for -saving the model. Optionally, we can provide a target ONNX opset for -conversion and other parameters specified in the documentation (for -example, input and output names or dynamic shapes). +Convert models to OpenVINO Intermediate representation (IR) format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -While ONNX models are directly supported by OpenVINO™ runtime, it can be -useful to convert them to IR format to take the advantage of advanced -OpenVINO optimization tools and features. We will use OpenVINO `Model -Optimizer `__ -to convert a model to IR format. +Starting from 2023.0 release, OpenVINO supports PyTorch models directly +via Model Conversion API. ``ov.convert_model`` function accepts instance +of PyTorch model and example inputs for tracing and returns object of +``ov.Model`` class, ready to use or save on disk using ``ov.save_model`` +function. The pipeline consists of three important parts: @@ -197,9 +184,8 @@ The pipeline consists of three important parts: Let us convert each part: -Text Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text-encoder is responsible for transforming the input prompt, for example, “a photo of an astronaut riding a horse” into an embedding @@ -212,9 +198,7 @@ indexes of tokens from text processed by the tokenizer and padded to the maximum length accepted by the model. Model outputs are two tensors: ``last_hidden_state`` - hidden state from the last MultiHeadAttention layer in the model and ``pooler_out`` - pooled output for whole model -hidden states. We will use ``opset_version=14`` because the model -contains the ``triu`` operation, supported in ONNX only starting from -this opset. +hidden states. .. code:: ipython3 @@ -227,45 +211,50 @@ this opset. import gc import torch + import openvino as ov - TEXT_ENCODER_ONNX_PATH = sd2_1_model_dir / 'text_encoder.onnx' - TEXT_ENCODER_OV_PATH = TEXT_ENCODER_ONNX_PATH.with_suffix('.xml') + TEXT_ENCODER_OV_PATH = sd2_1_model_dir / 'text_encoder.xml' - def convert_encoder_onnx(text_encoder: torch.nn.Module, onnx_path:Path): + def cleanup_torchscript_cache(): """ - Convert Text Encoder model to ONNX. - Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + + def convert_encoder(text_encoder: torch.nn.Module, ir_path:Path): + """ + Convert Text Encoder model to IR. + Function accepts pipeline, prepares example inputs for conversion Parameters: text_encoder (torch.nn.Module): text encoder PyTorch model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model Returns: None """ - if not onnx_path.exists(): + if not ir_path.exists(): input_ids = torch.ones((1, 77), dtype=torch.long) # switch model to inference mode text_encoder.eval() # disable gradients calculation for reducing memory consumption with torch.no_grad(): - # export model to ONNX format - torch.onnx._export( + # export model + ov_model = ov.convert_model( text_encoder, # model instance - input_ids, # inputs for model tracing - onnx_path, # output file for saving result - input_names=['tokens'], # model input name for onnx representation - output_names=['last_hidden_state', 'pooler_out'], # model output names for onnx representation - opset_version=14, # onnx opset version for export, - onnx_shape_inference=False + example_input=input_ids, # example inputs for model tracing + input=([1,77],) # input shape for conversion ) - print('Text Encoder successfully converted to ONNX') - + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('Text Encoder successfully converted to IR') if not TEXT_ENCODER_OV_PATH.exists(): - convert_encoder_onnx(text_encoder, TEXT_ENCODER_ONNX_PATH) - !mo --input_model $TEXT_ENCODER_ONNX_PATH --output_dir $sd2_1_model_dir - print('Text Encoder successfully converted to IR') + convert_encoder(text_encoder, TEXT_ENCODER_OV_PATH) else: print(f"Text encoder will be loaded from {TEXT_ENCODER_OV_PATH}") @@ -278,9 +267,8 @@ this opset. Text encoder will be loaded from sd2.1/text_encoder.xml -U-Net `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +U-Net +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ U-Net model gradually denoises latent image representation guided by text encoder hidden state. @@ -307,65 +295,62 @@ such use cases required to modify number of input channels. import numpy as np - UNET_ONNX_PATH = sd2_1_model_dir / 'unet/unet.onnx' - UNET_OV_PATH = UNET_ONNX_PATH.parents[1] / 'unet.xml' + UNET_OV_PATH = sd2_1_model_dir / 'unet.xml' - def convert_unet_onnx(unet:torch.nn.Module, onnx_path:Path, num_channels:int = 4, width:int = 64, height:int = 64): + def convert_unet(unet:torch.nn.Module, ir_path:Path, num_channels:int = 4, width:int = 64, height:int = 64): """ - Convert Unet model to ONNX, then IR format. - Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, + Convert Unet model to IR format. + Function accepts pipeline, prepares example inputs for conversion Parameters: unet (torch.nn.Module): UNet PyTorch model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model num_channels (int, optional, 4): number of input channels width (int, optional, 64): input width height (int, optional, 64): input height Returns: None """ - if not onnx_path.exists(): + dtype_mapping = { + torch.float32: ov.Type.f32, + torch.float64: ov.Type.f64 + } + if not ir_path.exists(): # prepare inputs encoder_hidden_state = torch.ones((2, 77, 1024)) latents_shape = (2, num_channels, width, height) latents = torch.randn(latents_shape) t = torch.from_numpy(np.array(1, dtype=np.float32)) - - # model size > 2Gb, it will be represented as onnx with external data files, we will store it in separated directory for avoid a lot of files in current directory - onnx_path.parent.mkdir(exist_ok=True, parents=True) unet.eval() + dummy_inputs = (latents, t, encoder_hidden_state) + input_info = [] + for input_tensor in dummy_inputs: + shape = ov.PartialShape(tuple(input_tensor.shape)) + element_type = dtype_mapping[input_tensor.dtype] + input_info.append((shape, element_type)) with torch.no_grad(): - torch.onnx._export( + ov_model = ov.convert_model( unet, - (latents, t, encoder_hidden_state), str(onnx_path), - input_names=['latent_model_input', 't', 'encoder_hidden_states'], - output_names=['out_sample'], - onnx_shape_inference=False + example_input=dummy_inputs, + input=input_info ) - print('U-Net successfully converted to ONNX') + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('U-Net successfully converted to IR') if not UNET_OV_PATH.exists(): - convert_unet_onnx(unet, UNET_ONNX_PATH, width=96, height=96) + convert_unet(unet, UNET_OV_PATH, width=96, height=96) del unet gc.collect() - !mo --input_model $UNET_ONNX_PATH --output_dir $sd2_1_model_dir - print('U-Net successfully converted to IR') else: del unet - print(f"U-Net will be loaded from {UNET_OV_PATH}") gc.collect(); - -.. parsed-literal:: - - U-Net will be loaded from sd2.1/unet.xml - - -VAE `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +VAE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The VAE model has two parts, an encoder and a decoder. The encoder is used to convert the image into a low dimensional latent representation, @@ -385,25 +370,22 @@ When running Text-to-Image pipeline, we will see that we **only need the VAE decoder**, but preserve VAE encoder conversion, it will be useful in next chapter of our tutorial. -.. note:: - - This process will take a few minutes and use significant amount of RAM (recommended at least 32GB). - +Note: This process will take a few minutes and use significant amount of +RAM (recommended at least 32GB). .. code:: ipython3 - VAE_ENCODER_ONNX_PATH = sd2_1_model_dir / 'vae_encoder.onnx' - VAE_ENCODER_OV_PATH = VAE_ENCODER_ONNX_PATH.with_suffix('.xml') + VAE_ENCODER_OV_PATH = sd2_1_model_dir / 'vae_encoder.xml' - def convert_vae_encoder_onnx(vae: torch.nn.Module, onnx_path: Path, width:int = 512, height:int = 512): + def convert_vae_encoder(vae: torch.nn.Module, ir_path: Path, width:int = 512, height:int = 512): """ - Convert VAE model to ONNX, then IR format. - Function accepts pipeline, creates wrapper class for export only necessary for inference part, - prepares example inputs for ONNX conversion via torch.export, + Convert VAE model to IR format. + VAE model, creates wrapper class for export only necessary for inference part, + prepares example inputs for onversion Parameters: vae (torch.nn.Module): VAE PyTorch model - onnx_path (Path): File for storing onnx model + ir_path (Path): File for storing model width (int, optional, 512): input width height (int, optional, 512): input height Returns: @@ -415,39 +397,28 @@ next chapter of our tutorial. self.vae = vae def forward(self, image): - h = self.vae.encoder(image) - moments = self.vae.quant_conv(h) - return moments + return self.vae.encode(x=image)["latent_dist"].sample() - if not onnx_path.exists(): + if not ir_path.exists(): vae_encoder = VAEEncoderWrapper(vae) vae_encoder.eval() image = torch.zeros((1, 3, width, height)) with torch.no_grad(): - torch.onnx.export(vae_encoder, image, onnx_path, input_names=[ - 'init_image'], output_names=['image_latent']) - print('VAE encoder successfully converted to ONNX') + ov_model = ov.convert_model(vae_encoder, example_input=image, input=([1,3, width, height],)) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('VAE encoder successfully converted to IR') - if not VAE_ENCODER_OV_PATH.exists(): - convert_vae_encoder_onnx(vae, VAE_ENCODER_ONNX_PATH, 768, 768) - !mo --input_model $VAE_ENCODER_ONNX_PATH --output_dir $sd2_1_model_dir - print('VAE encoder successfully converted to IR') - else: - print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH}") - - VAE_DECODER_ONNX_PATH = sd2_1_model_dir / 'vae_decoder.onnx' - VAE_DECODER_OV_PATH = VAE_DECODER_ONNX_PATH.with_suffix('.xml') - - - def convert_vae_decoder_onnx(vae: torch.nn.Module, onnx_path: Path, width:int = 64, height:int = 64): + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path, width:int = 64, height:int = 64): """ - Convert VAE model to ONNX, then IR format. - Function accepts pipeline, creates wrapper class for export only necessary for inference part, - prepares example inputs for ONNX conversion via torch.export, + Convert VAE decoder model to IR format. + Function accepts VAE model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion Parameters: - vae: - onnx_path (Path): File for storing onnx model + vae (torch.nn.Module): VAE model + ir_path (Path): File for storing model width (int, optional, 64): input width height (int, optional, 64): input height Returns: @@ -459,24 +430,29 @@ next chapter of our tutorial. self.vae = vae def forward(self, latents): - latents = 1 / 0.18215 * latents return self.vae.decode(latents) - if not onnx_path.exists(): + if not ir_path.exists(): vae_decoder = VAEDecoderWrapper(vae) latents = torch.zeros((1, 4, width, height)) vae_decoder.eval() with torch.no_grad(): - torch.onnx.export(vae_decoder, latents, onnx_path, input_names=[ - 'latents'], output_names=['sample']) - print('VAE decoder successfully converted to ONNX') + ov_model = ov.convert_model(vae_decoder, example_input=latents, input=([1,4, width, height],)) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print('VAE decoder successfully converted to IR') + if not VAE_ENCODER_OV_PATH.exists(): + convert_vae_encoder(vae, VAE_ENCODER_OV_PATH, 768, 768) + else: + print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH}") + + VAE_DECODER_OV_PATH = sd2_1_model_dir / 'vae_decoder.xml' if not VAE_DECODER_OV_PATH.exists(): - convert_vae_decoder_onnx(vae, VAE_DECODER_ONNX_PATH, 96, 96) - !mo --input_model $VAE_DECODER_ONNX_PATH --output_dir $sd2_1_model_dir - print('VAE decoder successfully converted to IR') + convert_vae_decoder(vae, VAE_DECODER_OV_PATH, 96, 96) else: print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") @@ -490,9 +466,8 @@ next chapter of our tutorial. VAE decoder will be loaded from sd2.1/vae_decoder.xml -Prepare Inference Pipeline `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Prepare Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Putting it all together, let us now take a closer look at how the model works in inference by illustrating the logical flow. @@ -561,7 +536,6 @@ but there is some small difference in details: from transformers import CLIPTokenizer from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler - from openvino.runtime import Model def scale_fit_to_window(dst_width:int, dst_height:int, image_width:int, image_height:int): @@ -613,12 +587,12 @@ but there is some small difference in details: class OVStableDiffusionPipeline(DiffusionPipeline): def __init__( self, - vae_decoder: Model, - text_encoder: Model, + vae_decoder: ov.Model, + text_encoder: ov.Model, tokenizer: CLIPTokenizer, - unet: Model, + unet: ov.Model, scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], - vae_encoder: Model = None, + vae_encoder: ov.Model = None, ): """ Pipeline for text-to-image generation using Stable Diffusion. @@ -740,7 +714,7 @@ but there is some small difference in details: # compute the previous noisy sample x_t -> x_t-1 latents = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs)["prev_sample"].numpy() # scale and decode the image latents with vae - image = self.vae_decoder(latents)[self._vae_d_output] + image = self.vae_decoder(latents * (1 / 0.18215))[self._vae_d_output] image = self.postprocess_image(image, meta, output_type) return {"sample": image} @@ -833,10 +807,8 @@ but there is some small difference in details: noise = noise * self.scheduler.sigmas[0].numpy() return noise, {} input_image, meta = preprocess(image) - moments = self.vae_encoder(input_image)[self._vae_e_output] - mean, logvar = np.split(moments, 2, axis=1) - std = np.exp(logvar * 0.5) - latents = (mean + std * np.random.randn(*mean.shape)) * 0.18215 + latents = self.vae_encoder(input_image)[self._vae_e_output] + latents = latents * 0.18215 latents = self.scheduler.add_noise(torch.from_numpy(latents), torch.from_numpy(noise), latent_timestep).numpy() return latents, meta @@ -899,25 +871,16 @@ but there is some small difference in details: return timesteps, num_inference_steps - t_start - -.. parsed-literal:: - - /tmp/ipykernel_1185037/1028096992.py:9: FutureWarning: Importing `DiffusionPipeline` or `ImagePipelineOutput` from diffusers.pipeline_utils is deprecated. Please import from diffusers.pipelines.pipeline_utils instead. - from diffusers.pipeline_utils import DiffusionPipeline - - -Configure Inference Pipeline `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Configure Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ First, you should create instances of OpenVINO Model. .. code:: ipython3 import ipywidgets as widgets - from openvino.runtime import Core - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], value='AUTO', @@ -932,17 +895,18 @@ First, you should create instances of OpenVINO Model. .. parsed-literal:: - Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GNA', 'AUTO'), value='AUTO') .. code:: ipython3 - core = Core() + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + text_enc = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) unet_model = core.compile_model(UNET_OV_PATH, device.value) - vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value) - vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value) + vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_config) + vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value, ov_config) Model tokenizer and scheduler are also important parts of the pipeline. Let us define them and put all components together. @@ -963,20 +927,16 @@ Let us define them and put all components together. scheduler=scheduler ) -Run Text-to-Image generation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Text-to-Image generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, you can define a text prompts for image generation and run inference pipeline. Optionally, you can also change the random generator seed for latent state initialization and number of steps. -.. note:: - - Consider increasing ``steps`` to get more precise results. + **Note**: Consider increasing ``steps`` to get more precise results. A suggested value is ``50``, but it will take longer time to process. - .. code:: ipython3 import gradio as gr @@ -1018,7 +978,7 @@ seed for latent state initialization and number of steps. .. parsed-literal:: - Running on local URL: http://127.0.0.1:7863 + Running on local URL: http://127.0.0.1:7861 To create a public link, set `share=True` in `launch()`. @@ -1026,5 +986,5 @@ seed for latent state initialization and number of steps. .. .. raw:: html -..
+..
diff --git a/docs/notebooks/237-segment-anything-with-output.rst b/docs/notebooks/237-segment-anything-with-output.rst index ecf2d8c0373bcb..ae1359bd6108c5 100644 --- a/docs/notebooks/237-segment-anything-with-output.rst +++ b/docs/notebooks/237-segment-anything-with-output.rst @@ -1,37 +1,46 @@ Object masks from prompts with SAM and OpenVINO =============================================== - - -.. _top: - -**Table of contents**: - -- `Background <#background>`__ -- `Prerequisites <#prerequisites>`__ -- `Convert model to OpenVINO Intermediate Representation <#convert-model-to-openvino-intermediate-representation>`__ - - - `Download model checkpoint and create PyTorch model <#download-model-checkpoint-and-create-pytorch-model>`__ - - `Image Encoder <#image-encoder>`__ - - `Mask predictor <#mask-predictor>`__ - -- `Run OpenVINO model in interactive segmentation mode <#run-openvino-model-in-interactive-segmentation-mode>`__ - - - `Example Image <#example-image>`__ - - `Preprocessing and visualization utilities <#preprocessing-and-visualization-utilities>`__ - - `Image encoding <#image-encoding>`__ - - `Example point input <#example-point-input>`__ - - `Example with multiple points <#example-with-multiple-points>`__ - - `Example box and point input with negative label <#example-box-and-point-input-with-negative-label>`__ - -- `Interactive segmentation <#interactive-segmentation>`__ -- `Run OpenVINO model in automatic mask generation mode <#run-openvino-model-in-automatic-mask-generation-mode>`__ -- `Optimize encoder using NNCF Post-training Quantization API <#optimize-encoder-using-nncf-post-training-quantization-api>`__ - - - `Prepare a calibration dataset <#prepare-a-calibration-dataset>`__ - - `Run quantization and serialize OpenVINO IR model <#run-quantization-and-serialize-openvino-ir-model>`__ - - `Validate Quantized Model Inference <#validate-quantized-model-inference>`__ - - `Compare Performance of the Original and Quantized Models <#compare-performance-of-the-original-and-quantized-models>`__ +**Table of contents:** + + +- `Background <#background>`__ +- `Prerequisites <#prerequisites>`__ +- `Convert model to OpenVINO Intermediate + Representation <#convert-model-to-openvino-intermediate-representation>`__ + + - `Download model checkpoint and create PyTorch + model <#download-model-checkpoint-and-create-pytorch-model>`__ + - `Image Encoder <#image-encoder>`__ + - `Mask predictor <#mask-predictor>`__ + +- `Run OpenVINO model in interactive segmentation + mode <#run-openvino-model-in-interactive-segmentation-mode>`__ + + - `Example Image <#example-image>`__ + - `Preprocessing and visualization + utilities <#preprocessing-and-visualization-utilities>`__ + - `Image encoding <#image-encoding>`__ + - `Example point input <#example-point-input>`__ + - `Example with multiple + points <#example-with-multiple-points>`__ + - `Example box and point input with negative + label <#example-box-and-point-input-with-negative-label>`__ + +- `Interactive segmentation <#interactive-segmentation>`__ +- `Run OpenVINO model in automatic mask generation + mode <#run-openvino-model-in-automatic-mask-generation-mode>`__ +- `Optimize encoder using NNCF Post-training Quantization + API <#optimize-encoder-using-nncf-post-training-quantization-api>`__ + + - `Prepare a calibration + dataset <#prepare-a-calibration-dataset>`__ + - `Run quantization and serialize OpenVINO IR + model <#run-quantization-and-serialize-openvino-ir-model>`__ + - `Validate Quantized Model + Inference <#validate-quantized-model-inference>`__ + - `Compare Performance of the Original and Quantized + Models <#compare-performance-of-the-original-and-quantized-models>`__ Segmentation - identifying which image pixels belong to an object - is a core task in computer vision and is used in a broad array of @@ -57,9 +66,8 @@ zero-shot transfer). This notebook shows an example of how to convert and use Segment Anything Model in OpenVINO format, allowing it to run on a variety of platforms that support an OpenVINO. -Background `⇑ <#top>`__ -############################################################################################################################### - +Background +---------------------------------------------------- Previously, to solve any kind of segmentation problem, there were two classes of approaches. The first, interactive segmentation, allowed for @@ -126,29 +134,18 @@ post `__ -############################################################################################################################### - +Prerequisites +------------------------------------------------------- .. code:: ipython3 - !pip install -q "segment_anything" "gradio>=3.25" - - -.. parsed-literal:: - - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - -Convert model to OpenVINO Intermediate Representation `⇑ <#top>`__ -############################################################################################################################### - + %pip install -q "segment_anything" "gradio>=3.25" "openvino>=2023.1.0" "nncf>=2.5.0" -Download model checkpoint and create PyTorch model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert model to OpenVINO Intermediate Representation +----------------------------------------------------------------------------------------------- +Download model checkpoint and create PyTorch model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are several Segment Anything Model `checkpoints `__ @@ -198,9 +195,8 @@ into account this fact, we split model on 2 independent parts: image_encoder and mask_predictor (combination of Prompt Encoder and Mask Decoder). -Image Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Image Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Image Encoder input is tensor with shape ``1x3x1024x1024`` in ``NCHW`` format, contains image for segmentation. Image Encoder output is image @@ -211,23 +207,18 @@ embeddings, tensor with shape ``1x256x64x64`` import warnings from pathlib import Path import torch - from openvino.tools import mo - from openvino.runtime import serialize, Core + import openvino as ov - core = Core() + core = ov.Core() ov_encoder_path = Path("sam_image_encoder.xml") - onnx_encoder_path = ov_encoder_path.with_suffix(".onnx") if not ov_encoder_path.exists(): - if not onnx_encoder_path.exists(): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) - warnings.filterwarnings("ignore", category=UserWarning) - - torch.onnx.export(sam.image_encoder, torch.zeros(1,3,1024,1024), onnx_encoder_path) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) + warnings.filterwarnings("ignore", category=UserWarning) - ov_encoder_model = mo.convert_model(onnx_encoder_path, compress_to_fp16=True) - serialize(ov_encoder_model, str(ov_encoder_path)) + ov_encoder_model = ov.convert_model(sam.image_encoder, example_input=torch.zeros(1,3,1024,1024), input=([1,3,1024,1024],)) + ov.save_model(ov_encoder_model, ov_encoder_path) else: ov_encoder_model = core.read_model(ov_encoder_path) @@ -257,9 +248,8 @@ embeddings, tensor with shape ``1x256x64x64`` ov_encoder = core.compile_model(ov_encoder_model, device.value) -Mask predictor `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Mask predictor +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This notebook expects the model was exported with the parameter ``return_single_mask=True``. It means that model will only return the @@ -295,7 +285,7 @@ Model outputs: from typing import Tuple - class SamONNXModel(torch.nn.Module): + class SamExportableModel(torch.nn.Module): def __init__( self, model, @@ -318,15 +308,15 @@ Model outputs: point_embedding = self.model.prompt_encoder.pe_layer._pe_encoding(point_coords) point_labels = point_labels.unsqueeze(-1).expand_as(point_embedding) - point_embedding = point_embedding * (point_labels != -1) + point_embedding = point_embedding * (point_labels != -1).to(torch.float32) point_embedding = point_embedding + self.model.prompt_encoder.not_a_point_embed.weight * ( point_labels == -1 - ) + ).to(torch.float32) for i in range(self.model.prompt_encoder.num_point_embeddings): point_embedding = point_embedding + self.model.prompt_encoder.point_embeddings[ i - ].weight * (point_labels == i) + ].weight * (point_labels == i).to(torch.float32) return point_embedding @@ -402,37 +392,19 @@ Model outputs: ov_model_path = Path("sam_mask_predictor.xml") if not ov_model_path.exists(): - onnx_model_path = ov_model_path.with_suffix('.onnx') - if not onnx_model_path.exists(): - onnx_model = SamONNXModel(sam, return_single_mask=True) - dynamic_axes = { - "point_coords": {0: "batch_size", 1: "num_points"}, - "point_labels": {0: "batch_size", 1: "num_points"}, - } - - embed_dim = sam.prompt_encoder.embed_dim - embed_size = sam.prompt_encoder.image_embedding_size - dummy_inputs = { - "image_embeddings": torch.randn(1, embed_dim, *embed_size, dtype=torch.float), - "point_coords": torch.randint(low=0, high=1024, size=(1, 5, 2), dtype=torch.float), - "point_labels": torch.randint(low=0, high=4, size=(1, 5), dtype=torch.float), - } - output_names = ["masks", "iou_predictions"] - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) - warnings.filterwarnings("ignore", category=UserWarning) - torch.onnx.export( - onnx_model, - tuple(dummy_inputs.values()), - onnx_model_path, - input_names=list(dummy_inputs.keys()), - output_names=output_names, - dynamic_axes=dynamic_axes, - ) - - ov_model = mo.convert_model(onnx_model_path, compress_to_fp16=True) - serialize(ov_model, str(ov_model_path)) + exportable_model = SamExportableModel(sam, return_single_mask=True) + embed_dim = sam.prompt_encoder.embed_dim + embed_size = sam.prompt_encoder.image_embedding_size + dummy_inputs = { + "image_embeddings": torch.randn(1, embed_dim, *embed_size, dtype=torch.float), + "point_coords": torch.randint(low=0, high=1024, size=(1, 5, 2), dtype=torch.float), + "point_labels": torch.randint(low=0, high=4, size=(1, 5), dtype=torch.float), + } + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) + warnings.filterwarnings("ignore", category=UserWarning) + ov_model = ov.convert_model(exportable_model, example_input=dummy_inputs) + ov.save_model(ov_model, ov_model_path) else: ov_model = core.read_model(ov_model_path) @@ -453,13 +425,11 @@ Model outputs: ov_predictor = core.compile_model(ov_model, device.value) -Run OpenVINO model in interactive segmentation mode `⇑ <#top>`__ -############################################################################################################################### - - -Example Image `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run OpenVINO model in interactive segmentation mode +--------------------------------------------------------------------------------------------- +Example Image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -489,9 +459,8 @@ Example Image `⇑ <#top>`__ .. image:: 237-segment-anything-with-output_files/237-segment-anything-with-output_21_0.png -Preprocessing and visualization utilities `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Preprocessing and visualization utilities +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To prepare input for Image Encoder we should: @@ -606,9 +575,8 @@ These steps are applicable to all available models w, h = box[2] - box[0], box[3] - box[1] ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2)) -Image encoding `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Image encoding +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To start work with image, we should preprocess it and obtain image embeddings using ``ov_encoder``. We will use the same image for all @@ -624,9 +592,8 @@ reuse them. Now, we can try to provide different prompts for mask generation -Example point input `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Example point input +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In this example we select one point. The green star symbol show its location on the image below. @@ -691,9 +658,8 @@ object). .. image:: 237-segment-anything-with-output_files/237-segment-anything-with-output_35_0.png -Example with multiple points `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Example with multiple points +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in this example, we provide additional point for cover larger object area. @@ -759,9 +725,8 @@ Package inputs, then predict and threshold the mask. Great! Looks like now, predicted mask cover whole truck. -Example box and point input with negative label `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Example box and point input with negative label +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In this example we define input prompt using bounding box and point inside it.The bounding box represented as set of points of its left @@ -833,9 +798,8 @@ Package inputs, then predict and threshold the mask. .. image:: 237-segment-anything-with-output_files/237-segment-anything-with-output_53_0.png -Interactive segmentation `⇑ <#top>`__ -############################################################################################################################### - +Interactive segmentation +------------------------------------------------------------------ Now, you can try SAM on own image. Upload image to input window and click on desired point, model predict segment based on your image and @@ -923,15 +887,15 @@ point. .. parsed-literal:: - /tmp/ipykernel_1187339/1907223323.py:46: GradioDeprecationWarning: The `style` method is deprecated. Please set these arguments in the constructor instead. + /tmp/ipykernel_862585/1907223323.py:46: GradioDeprecationWarning: The `style` method is deprecated. Please set these arguments in the constructor instead. input_img = gr.Image(label="Input", type="numpy").style(height=480, width=480) - /tmp/ipykernel_1187339/1907223323.py:47: GradioDeprecationWarning: The `style` method is deprecated. Please set these arguments in the constructor instead. + /tmp/ipykernel_862585/1907223323.py:47: GradioDeprecationWarning: The `style` method is deprecated. Please set these arguments in the constructor instead. output_img = gr.Image(label="Selected Segment", type="numpy").style(height=480, width=480) .. parsed-literal:: - Running on local URL: http://127.0.0.1:7862 + Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. @@ -939,12 +903,11 @@ point. .. .. raw:: html -..
- +..
-Run OpenVINO model in automatic mask generation mode `⇑ <#top>`__ -############################################################################################################################### +Run OpenVINO model in automatic mask generation mode +---------------------------------------------------------------------------------------------- Since SAM can efficiently process prompts, masks for the entire image can be generated by sampling a large number of prompts over an image. @@ -1310,9 +1273,8 @@ is a dictionary containing various data about the mask. These keys are: -Optimize encoder using NNCF Post-training Quantization API `⇑ <#top>`__ -############################################################################################################################### - +Optimize encoder using NNCF Post-training Quantization API +---------------------------------------------------------------------------------------------------- `NNCF `__ provides a suite of advanced algorithms for Neural Networks inference optimization in @@ -1326,12 +1288,11 @@ The optimization process contains the following steps: 1. Create a Dataset for quantization. 2. Run ``nncf.quantize`` for getting an optimized model. -3. Serialize OpenVINO IR model, using the ``openvino.runtime.serialize`` +3. Serialize OpenVINO IR model, using the ``openvino.save_model`` function. -Prepare a calibration dataset `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Prepare a calibration dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Download COCO dataset. Since the dataset is used to calibrate the model’s parameter instead of fine-tuning it, we don’t need to download @@ -1393,7 +1354,7 @@ dataset and returns data that can be passed to the model for inference. Parameters: image_data: image data produced by DataLoader during iteration Returns: - input_tensor: input data in Dict format for ONNX model quantization + input_tensor: input data in Dict format for model quantization """ image = image_data.numpy() processed_image = preprocess_image(np.squeeze(image)) @@ -1407,9 +1368,8 @@ dataset and returns data that can be passed to the model for inference. INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -Run quantization and serialize OpenVINO IR model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run quantization and serialize OpenVINO IR model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``nncf.quantize`` function provides an interface for model quantization. It requires an instance of the OpenVINO Model and @@ -1427,15 +1387,13 @@ result, we will use a ``mixed`` quantization preset. It provides symmetric quantization of weights and asymmetric quantization of activations. -.. note:: - - Model post-training quantization is time-consuming process. + **Note**: Model post-training quantization is time-consuming process. Be patient, it can take several minutes depending on your hardware. .. code:: ipython3 - # Load FP32 ONNX model - model = core.read_model(onnx_encoder_path) + + model = core.read_model(ov_encoder_path) quantized_model = nncf.quantize(model, calibration_dataset, model_type=nncf.parameters.ModelType.TRANSFORMER, @@ -1445,443 +1403,23 @@ activations. .. parsed-literal:: - INFO:nncf:709 ignored nodes was found by types in the NNCFGraph - INFO:nncf:24 ignored nodes was found by name in the NNCFGraph - INFO:nncf:Not adding activation input quantizer for operation: 6 /Add - INFO:nncf:Not adding activation input quantizer for operation: 9 /blocks.0/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 10 /blocks.0/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 16 /blocks.0/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 24 /blocks.0/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 34 /blocks.0/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 45 /blocks.0/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 15 /blocks.0/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 23 /blocks.0/norm1/Mul - 33 /blocks.0/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 556 /blocks.0/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 557 /blocks.0/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 558 /blocks.0/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 633 /blocks.0/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 472 /blocks.0/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 552 /blocks.0/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 551 /blocks.0/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 631 /blocks.0/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 8 /blocks.0/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 13 /blocks.0/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 14 /blocks.0/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 22 /blocks.0/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 32 /blocks.0/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 43 /blocks.0/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 56 /blocks.0/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 21 /blocks.0/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 31 /blocks.0/norm2/Mul - 42 /blocks.0/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 91 /blocks.0/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 154 /blocks.0/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 92 /blocks.0/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 120 /blocks.0/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 12 /blocks.0/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 19 /blocks.1/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 20 /blocks.1/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 30 /blocks.1/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 41 /blocks.1/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 54 /blocks.1/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 72 /blocks.1/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 29 /blocks.1/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 40 /blocks.1/norm1/Mul - 53 /blocks.1/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 731 /blocks.1/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 732 /blocks.1/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 733 /blocks.1/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 820 /blocks.1/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 616 /blocks.1/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 727 /blocks.1/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 726 /blocks.1/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 818 /blocks.1/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 18 /blocks.1/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 27 /blocks.1/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 28 /blocks.1/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 39 /blocks.1/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 52 /blocks.1/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 66 /blocks.1/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 85 /blocks.1/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 38 /blocks.1/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 51 /blocks.1/norm2/Mul - 65 /blocks.1/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 140 /blocks.1/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 272 /blocks.1/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 141 /blocks.1/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 201 /blocks.1/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 26 /blocks.1/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 36 /blocks.2/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 37 /blocks.2/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 50 /blocks.2/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 64 /blocks.2/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 83 /blocks.2/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 107 /blocks.2/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 49 /blocks.2/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 63 /blocks.2/norm1/Mul - 82 /blocks.2/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 525 /blocks.2/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 526 /blocks.2/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 527 /blocks.2/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 605 /blocks.2/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 436 /blocks.2/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 521 /blocks.2/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 520 /blocks.2/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 603 /blocks.2/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 35 /blocks.2/Add - INFO:nncf:Not adding activation input quantizer for operation: 47 /blocks.2/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 48 /blocks.2/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 62 /blocks.2/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 81 /blocks.2/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 102 /blocks.2/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 135 /blocks.2/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 61 /blocks.2/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 80 /blocks.2/norm2/Mul - 101 /blocks.2/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 253 /blocks.2/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 427 /blocks.2/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 254 /blocks.2/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 330 /blocks.2/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 46 /blocks.2/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 59 /blocks.3/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 60 /blocks.3/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 79 /blocks.3/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 100 /blocks.3/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 133 /blocks.3/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 174 /blocks.3/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 78 /blocks.3/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 99 /blocks.3/norm1/Mul - 132 /blocks.3/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1110 /blocks.3/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 1111 /blocks.3/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 1112 /blocks.3/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 1192 /blocks.3/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 1013 /blocks.3/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 1106 /blocks.3/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1105 /blocks.3/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 1190 /blocks.3/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 58 /blocks.3/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 76 /blocks.3/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 77 /blocks.3/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 98 /blocks.3/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 131 /blocks.3/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 168 /blocks.3/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 247 /blocks.3/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 97 /blocks.3/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 130 /blocks.3/norm2/Mul - 167 /blocks.3/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 413 /blocks.3/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 588 /blocks.3/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 414 /blocks.3/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 506 /blocks.3/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 75 /blocks.3/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 95 /blocks.4/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 96 /blocks.4/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 129 /blocks.4/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 166 /blocks.4/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 245 /blocks.4/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 317 /blocks.4/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 128 /blocks.4/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 165 /blocks.4/norm1/Mul - 244 /blocks.4/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1294 /blocks.4/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 1295 /blocks.4/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 1296 /blocks.4/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 1384 /blocks.4/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 1176 /blocks.4/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 1290 /blocks.4/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1289 /blocks.4/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 1382 /blocks.4/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 94 /blocks.4/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 126 /blocks.4/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 127 /blocks.4/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 164 /blocks.4/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 243 /blocks.4/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 311 /blocks.4/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 407 /blocks.4/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 163 /blocks.4/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 242 /blocks.4/norm2/Mul - 310 /blocks.4/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 574 /blocks.4/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 777 /blocks.4/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 575 /blocks.4/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 678 /blocks.4/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 125 /blocks.4/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 161 /blocks.5/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 162 /blocks.5/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 241 /blocks.5/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 309 /blocks.5/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 405 /blocks.5/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 493 /blocks.5/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 240 /blocks.5/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 308 /blocks.5/norm1/Mul - 404 /blocks.5/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1079 /blocks.5/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 1080 /blocks.5/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 1081 /blocks.5/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 1165 /blocks.5/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 977 /blocks.5/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 1075 /blocks.5/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1074 /blocks.5/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 1163 /blocks.5/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 160 /blocks.5/Add - INFO:nncf:Not adding activation input quantizer for operation: 238 /blocks.5/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 239 /blocks.5/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 307 /blocks.5/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 403 /blocks.5/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 488 /blocks.5/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 569 /blocks.5/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 306 /blocks.5/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 402 /blocks.5/norm2/Mul - 487 /blocks.5/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 758 /blocks.5/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 968 /blocks.5/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 759 /blocks.5/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 859 /blocks.5/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 237 /blocks.5/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 304 /blocks.6/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 305 /blocks.6/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 401 /blocks.6/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 486 /blocks.6/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 567 /blocks.6/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 651 /blocks.6/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 400 /blocks.6/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 485 /blocks.6/norm1/Mul - 566 /blocks.6/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1661 /blocks.6/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 1662 /blocks.6/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 1663 /blocks.6/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 1734 /blocks.6/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 1571 /blocks.6/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 1657 /blocks.6/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1656 /blocks.6/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 1732 /blocks.6/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 303 /blocks.6/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 398 /blocks.6/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 399 /blocks.6/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 484 /blocks.6/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 565 /blocks.6/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 645 /blocks.6/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 752 /blocks.6/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 483 /blocks.6/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 564 /blocks.6/norm2/Mul - 644 /blocks.6/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 954 /blocks.6/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 1148 /blocks.6/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 955 /blocks.6/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 1060 /blocks.6/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 397 /blocks.6/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 481 /blocks.7/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 482 /blocks.7/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 563 /blocks.7/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 643 /blocks.7/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 750 /blocks.7/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 846 /blocks.7/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 562 /blocks.7/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 642 /blocks.7/norm1/Mul - 749 /blocks.7/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1821 /blocks.7/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 1822 /blocks.7/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 1823 /blocks.7/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 1897 /blocks.7/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 1718 /blocks.7/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 1817 /blocks.7/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1816 /blocks.7/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 1895 /blocks.7/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 480 /blocks.7/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 560 /blocks.7/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 561 /blocks.7/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 641 /blocks.7/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 748 /blocks.7/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 840 /blocks.7/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 948 /blocks.7/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 640 /blocks.7/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 747 /blocks.7/norm2/Mul - 839 /blocks.7/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1134 /blocks.7/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 1341 /blocks.7/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 1135 /blocks.7/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 1241 /blocks.7/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 559 /blocks.7/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 638 /blocks.8/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 639 /blocks.8/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 746 /blocks.8/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 838 /blocks.8/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 946 /blocks.8/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 1047 /blocks.8/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 745 /blocks.8/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 837 /blocks.8/norm1/Mul - 945 /blocks.8/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1630 /blocks.8/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 1631 /blocks.8/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 1632 /blocks.8/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 1707 /blocks.8/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 1535 /blocks.8/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 1626 /blocks.8/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1625 /blocks.8/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 1705 /blocks.8/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 637 /blocks.8/Add - INFO:nncf:Not adding activation input quantizer for operation: 743 /blocks.8/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 744 /blocks.8/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 836 /blocks.8/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 944 /blocks.8/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1042 /blocks.8/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 1129 /blocks.8/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 835 /blocks.8/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 943 /blocks.8/norm2/Mul - 1041 /blocks.8/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1322 /blocks.8/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 1526 /blocks.8/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 1323 /blocks.8/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 1422 /blocks.8/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 742 /blocks.8/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 833 /blocks.9/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 834 /blocks.9/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 942 /blocks.9/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 1040 /blocks.9/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1127 /blocks.9/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 1214 /blocks.9/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 941 /blocks.9/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 1039 /blocks.9/norm1/Mul - 1126 /blocks.9/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 2098 /blocks.9/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 2099 /blocks.9/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 2100 /blocks.9/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 2137 /blocks.9/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 2038 /blocks.9/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 2094 /blocks.9/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 2093 /blocks.9/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 2135 /blocks.9/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 832 /blocks.9/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 939 /blocks.9/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 940 /blocks.9/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 1038 /blocks.9/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 1125 /blocks.9/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1208 /blocks.9/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 1316 /blocks.9/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 1037 /blocks.9/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 1124 /blocks.9/norm2/Mul - 1207 /blocks.9/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1512 /blocks.9/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 1690 /blocks.9/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 1513 /blocks.9/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 1611 /blocks.9/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 938 /blocks.9/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1035 /blocks.10/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 1036 /blocks.10/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 1123 /blocks.10/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 1206 /blocks.10/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1314 /blocks.10/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 1409 /blocks.10/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 1122 /blocks.10/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 1205 /blocks.10/norm1/Mul - 1313 /blocks.10/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 2155 /blocks.10/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 2156 /blocks.10/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 2157 /blocks.10/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 2177 /blocks.10/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 2121 /blocks.10/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 2151 /blocks.10/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 2150 /blocks.10/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 2175 /blocks.10/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 1034 /blocks.10/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 1120 /blocks.10/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 1121 /blocks.10/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 1204 /blocks.10/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 1312 /blocks.10/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1403 /blocks.10/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 1506 /blocks.10/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 1203 /blocks.10/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 1311 /blocks.10/norm2/Mul - 1402 /blocks.10/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1676 /blocks.10/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 1854 /blocks.10/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 1677 /blocks.10/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 1768 /blocks.10/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 1119 /blocks.10/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 1201 /blocks.11/norm1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 1202 /blocks.11/norm1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 1310 /blocks.11/norm1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 1401 /blocks.11/norm1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1504 /blocks.11/norm1/Add - INFO:nncf:Not adding activation input quantizer for operation: 1598 /blocks.11/norm1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 1309 /blocks.11/norm1/Div - INFO:nncf:Not adding activation input quantizer for operation: 1400 /blocks.11/norm1/Mul - 1503 /blocks.11/norm1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 2067 /blocks.11/attn/Squeeze - INFO:nncf:Not adding activation input quantizer for operation: 2068 /blocks.11/attn/Squeeze_1 - INFO:nncf:Not adding activation input quantizer for operation: 2069 /blocks.11/attn/Squeeze_2 - INFO:nncf:Not adding activation input quantizer for operation: 2110 /blocks.11/attn/Mul_2 - INFO:nncf:Not adding activation input quantizer for operation: 2002 /blocks.11/attn/Add_2 - INFO:nncf:Not adding activation input quantizer for operation: 2063 /blocks.11/attn/Add_3 - INFO:nncf:Not adding activation input quantizer for operation: 2062 /blocks.11/attn/Softmax - INFO:nncf:Not adding activation input quantizer for operation: 2108 /blocks.11/attn/MatMul_1 - INFO:nncf:Not adding activation input quantizer for operation: 1200 /blocks.11/Add - INFO:nncf:Not adding activation input quantizer for operation: 1307 /blocks.11/norm2/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 1308 /blocks.11/norm2/Sub - INFO:nncf:Not adding activation input quantizer for operation: 1399 /blocks.11/norm2/Pow - INFO:nncf:Not adding activation input quantizer for operation: 1502 /blocks.11/norm2/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1593 /blocks.11/norm2/Add - INFO:nncf:Not adding activation input quantizer for operation: 1671 /blocks.11/norm2/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 1398 /blocks.11/norm2/Div - INFO:nncf:Not adding activation input quantizer for operation: 1501 /blocks.11/norm2/Mul - 1592 /blocks.11/norm2/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1835 /blocks.11/mlp/act/Div - INFO:nncf:Not adding activation input quantizer for operation: 1993 /blocks.11/mlp/act/Add - INFO:nncf:Not adding activation input quantizer for operation: 1836 /blocks.11/mlp/act/Mul - INFO:nncf:Not adding activation input quantizer for operation: 1913 /blocks.11/mlp/act/Mul_1 - INFO:nncf:Not adding activation input quantizer for operation: 1306 /blocks.11/Add_1 - INFO:nncf:Not adding activation input quantizer for operation: 1590 /neck/neck.1/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 1591 /neck/neck.1/Sub - INFO:nncf:Not adding activation input quantizer for operation: 1669 /neck/neck.1/Pow - INFO:nncf:Not adding activation input quantizer for operation: 1741 /neck/neck.1/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 1834 /neck/neck.1/Add - INFO:nncf:Not adding activation input quantizer for operation: 1911 /neck/neck.1/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 1668 /neck/neck.1/Div - INFO:nncf:Not adding activation input quantizer for operation: 1740 /neck/neck.1/Mul - 1833 /neck/neck.1/Add_1 - - INFO:nncf:Not adding activation input quantizer for operation: 1991 /neck/neck.3/ReduceMean - INFO:nncf:Not adding activation input quantizer for operation: 1992 /neck/neck.3/Sub - INFO:nncf:Not adding activation input quantizer for operation: 2058 /neck/neck.3/Pow - INFO:nncf:Not adding activation input quantizer for operation: 2106 /neck/neck.3/ReduceMean_1 - INFO:nncf:Not adding activation input quantizer for operation: 2144 /neck/neck.3/Add - INFO:nncf:Not adding activation input quantizer for operation: 2168 /neck/neck.3/Sqrt - INFO:nncf:Not adding activation input quantizer for operation: 2057 /neck/neck.3/Div - INFO:nncf:Not adding activation input quantizer for operation: 2105 /neck/neck.3/Mul - 2143 4017 - + 2023-09-11 20:39:36.145499: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-11 20:39:36.181406: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-09-11 20:39:36.769588: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Statistics collection: 100%|██████████████████| 128/128 [02:12<00:00, 1.03s/it] + Applying Smooth Quant: 100%|████████████████████| 48/48 [00:01<00:00, 32.29it/s] + + +.. parsed-literal:: + + INFO:nncf:36 ignored nodes was found by name in the NNCFGraph .. parsed-literal:: - Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 128/128 [05:14<00:00, 2.45s/it] - Biases correction: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [06:34<00:00, 8.21s/it] + Statistics collection: 100%|██████████████████| 128/128 [04:36<00:00, 2.16s/it] + Applying Fast Bias correction: 100%|████████████| 49/49 [00:28<00:00, 1.72it/s] .. parsed-literal:: @@ -1891,11 +1429,10 @@ activations. .. code:: ipython3 ov_encoder_path_int8 = "sam_image_encoder_int8.xml" - serialize(quantized_model, ov_encoder_path_int8) - -Validate Quantized Model Inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + ov.save_model(quantized_model, ov_encoder_path_int8) +Validate Quantized Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We can reuse the previous code to validate the output of ``INT8`` model. @@ -1949,7 +1486,7 @@ Run ``INT8`` model in automatic mask generation mode .. parsed-literal:: - 0%| | 0/48 [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Compare Performance of the Original and Quantized Models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Finally, use the OpenVINO `Benchmark -Tool `__ +Tool `__ to measure the inference performance of the ``FP32`` and ``INT8`` models. @@ -1979,71 +1516,74 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 69.37 ms + [ INFO ] Read model took 31.21 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: - [ INFO ] input.1 (node: input.1) : f32 / [...] / [1,3,1024,1024] + [ INFO ] x (node: x) : f32 / [...] / [1,3,1024,1024] [ INFO ] Model outputs: - [ INFO ] 4017 (node: 4017) : f32 / [...] / [1,256,64,64] + [ INFO ] ***NO_NAME*** (node: __module.neck.3/aten::add/Add_2933) : f32 / [...] / [1,256,64,64] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [Step 6/11] Configuring input of the model [ INFO ] Model inputs: - [ INFO ] input.1 (node: input.1) : u8 / [N,C,H,W] / [1,3,1024,1024] + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,1024,1024] [ INFO ] Model outputs: - [ INFO ] 4017 (node: 4017) : f32 / [...] / [1,256,64,64] + [ INFO ] ***NO_NAME*** (node: __module.neck.3/aten::add/Add_2933) : f32 / [...] / [1,256,64,64] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1196.87 ms + [ INFO ] Compile model took 956.62 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model474 + [ INFO ] EXECUTION_DEVICES: ['CPU'] [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT - [ INFO ] NETWORK_NAME: torch_jit [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] MODEL_PRIORITY: Priority.MEDIUM [ INFO ] MULTI_DEVICE_PRIORITIES: CPU [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 12 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO - [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] NETWORK_NAME: Model474 + [ INFO ] NUM_STREAMS: 12 [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values! - [ INFO ] Fill input 'input.1' with random values + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 4043.51 ms + [ INFO ] First inference took 3347.39 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 108 iterations - [ INFO ] Duration: 135037.41 ms + [ INFO ] Count: 132 iterations + [ INFO ] Duration: 135907.17 ms [ INFO ] Latency: - [ INFO ] Median: 14646.89 ms - [ INFO ] Average: 14615.54 ms - [ INFO ] Min: 6295.79 ms - [ INFO ] Max: 19356.55 ms - [ INFO ] Throughput: 0.80 FPS + [ INFO ] Median: 12159.63 ms + [ INFO ] Average: 12098.43 ms + [ INFO ] Min: 7652.77 ms + [ INFO ] Max: 13027.98 ms + [ INFO ] Throughput: 0.97 FPS .. code:: ipython3 @@ -2059,69 +1599,72 @@ models. [Step 2/11] Loading OpenVINO Runtime [ WARNING ] Default duration 120 seconds is used for unknown device AUTO [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2023.0.1-11005-fa1c41994f3-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12050-e33de350633 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 104.31 ms + [ INFO ] Read model took 40.67 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: - [ INFO ] input.1 (node: input.1) : f32 / [...] / [1,3,1024,1024] + [ INFO ] x (node: x) : f32 / [...] / [1,3,1024,1024] [ INFO ] Model outputs: - [ INFO ] 4017 (node: 4017) : f32 / [...] / [1,256,64,64] + [ INFO ] ***NO_NAME*** (node: __module.neck.3/aten::add/Add_2933) : f32 / [...] / [1,256,64,64] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [Step 6/11] Configuring input of the model [ INFO ] Model inputs: - [ INFO ] input.1 (node: input.1) : u8 / [N,C,H,W] / [1,3,1024,1024] + [ INFO ] x (node: x) : u8 / [N,C,H,W] / [1,3,1024,1024] [ INFO ] Model outputs: - [ INFO ] 4017 (node: 4017) : f32 / [...] / [1,256,64,64] + [ INFO ] ***NO_NAME*** (node: __module.neck.3/aten::add/Add_2933) : f32 / [...] / [1,256,64,64] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1414.62 ms + [ INFO ] Compile model took 1151.47 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: + [ INFO ] NETWORK_NAME: Model474 + [ INFO ] EXECUTION_DEVICES: ['CPU'] [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT - [ INFO ] NETWORK_NAME: torch_jit [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] MODEL_PRIORITY: Priority.MEDIUM [ INFO ] MULTI_DEVICE_PRIORITIES: CPU [ INFO ] CPU: - [ INFO ] CPU_BIND_THREAD: YES - [ INFO ] CPU_THREADS_NUM: 0 - [ INFO ] CPU_THROUGHPUT_STREAMS: 12 - [ INFO ] DEVICE_ID: - [ INFO ] DUMP_EXEC_GRAPH_AS_DOT: - [ INFO ] DYN_BATCH_ENABLED: NO - [ INFO ] DYN_BATCH_LIMIT: 0 - [ INFO ] ENFORCE_BF16: NO - [ INFO ] EXCLUSIVE_ASYNC_REQUESTS: NO - [ INFO ] NETWORK_NAME: torch_jit + [ INFO ] AFFINITY: Affinity.CORE + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 + [ INFO ] ENABLE_CPU_PINNING: True + [ INFO ] ENABLE_HYPER_THREADING: True + [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE + [ INFO ] INFERENCE_NUM_THREADS: 36 + [ INFO ] INFERENCE_PRECISION_HINT: + [ INFO ] NETWORK_NAME: Model474 + [ INFO ] NUM_STREAMS: 12 [ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 12 - [ INFO ] PERFORMANCE_HINT: THROUGHPUT + [ INFO ] PERFORMANCE_HINT: PerformanceMode.THROUGHPUT [ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0 - [ INFO ] PERF_COUNT: NO - [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] PERF_COUNT: False + [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE + [ INFO ] MODEL_PRIORITY: Priority.MEDIUM + [ INFO ] LOADED_FROM_CACHE: False [Step 9/11] Creating infer requests and preparing input tensors - [ WARNING ] No input files were given for input 'input.1'!. This input will be filled with random values! - [ INFO ] Fill input 'input.1' with random values + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 2694.03 ms + [ INFO ] First inference took 1951.78 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 132 iterations - [ INFO ] Duration: 129404.57 ms + [ INFO ] Count: 216 iterations + [ INFO ] Duration: 130123.96 ms [ INFO ] Latency: - [ INFO ] Median: 11651.20 ms - [ INFO ] Average: 11526.49 ms - [ INFO ] Min: 5003.59 ms - [ INFO ] Max: 13329.53 ms - [ INFO ] Throughput: 1.02 FPS + [ INFO ] Median: 7192.03 ms + [ INFO ] Average: 7197.18 ms + [ INFO ] Min: 6134.35 ms + [ INFO ] Max: 7888.28 ms + [ INFO ] Throughput: 1.66 FPS diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_21_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_21_0.png index c6bb0b4bdca0d7..0f4fae19177398 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_21_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_21_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:300b3c9db63a5fdb80cf4156f6c50ac72d64359111791ed9dfca167071a3d8e8 +oid sha256:346047e07dbc27868c4617ea1a9a0b57e6788980308c2cf9785112639b6202f9 size 467418 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_28_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_28_0.png index a4484cc8c5ab06..777175ba957427 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_28_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d7ad488d386ef255fcc9843a4c52297f9242456be6652945676d27aa66e36bf +oid sha256:706d62427504453c1dabd26b6f91dcf1bc4354ff8c357972aee57499258d2945 size 468529 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_35_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_35_0.png index 5b7f897a0fc131..88d01d5d96a7d3 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_35_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_35_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d7bea1506407cac81d630310269f5903c2a5b6106dbbad489fc0f02f5c3452f +oid sha256:eef2326ff43ff103751c1e4e985eb1d00584bc43deaad7bae0becb27840731cf size 469443 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_39_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_39_0.png index 0036aeaefc8b5d..8def1b9bc0117e 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_39_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_39_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:602034052a3cb68853676e3ec0309ed4554c786b8fd6af2cf4d3c6d75fb05cd3 +oid sha256:7f5fb120f01a644da1f7c8d5ff12850bac6da2dc208a95741230ad3c251598e0 size 470668 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_44_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_44_0.png index 9ff95b0d8aba49..849e7f5b4aa04f 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_44_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_44_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bf87c5699a07bc45057b539825b1cfd668792f171b74e0a469e95afbc0b99d7 -size 468091 +oid sha256:f237227062ab2ccd8ef03d3b0b3f841c6e68ad5c55737b4ae4852bb73a30f22d +size 468092 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_48_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_48_0.png index ef9be641056053..aa6e6e273bbd19 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_48_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_48_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fd1894491acc22b9c7eb7db28eee021e11af3c323dd30c7f93a363e6ba55899 +oid sha256:26031e0dc1c7895368afbed82a19bbcf62f0e6c9cbc5cc6add149f7e2ddca211 size 468088 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_53_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_53_0.png index 04e80ce21bdb95..de86300f41102c 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_53_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_53_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63cec12807e5b72c324cc14400458ea6415ccc1fabfd232fc3a972975f2e0a7e -size 472754 +oid sha256:687b590a1853070d43145ac92db47f2cf93bfb245b970e2aaf4fd2dfef4f0f5c +size 472756 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.jpg b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.jpg index 9128bc05b81b12..837f6ddf6b741b 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.jpg +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8d9b804802277c0d0481f24aa18680a8ef2ea16a9cfd1ab74f26569433b85b3 -size 261936 +oid sha256:05935769e17190de09a2b0f985e1fd2687e23d26c96c06ec603347f17e38f4b1 +size 262203 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.png index 2fbb8cd039a0fe..e21b54d61e7d2c 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_68_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7898288cc74d40771eb95d54c384d6e886a01ab7e307638c727e4968a9f6ddba -size 2431859 +oid sha256:8cd940fe035baf9e0bf49a12c16132dd37fb75a8ab69a8c7dad069ed31265699 +size 2409333 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_80_0.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_80_0.png index 5f6c56307ee26e..09e40842442380 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_80_0.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_80_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd592a78aa28237ff5e3738215368873a50a2af828bb84863a1e55d4e764c91a -size 469423 +oid sha256:5efe373fadc700dc7e9651c52bd657ac6075b75ee657449e3f3e8960b31487bd +size 469432 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.jpg b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.jpg index 62bb879f1cbb91..0b5ca693a9ded4 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.jpg +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b85995823f6fad126857e3b3ab6eed4304d22648b37df75343ebd4fa9a303ee8 -size 262791 +oid sha256:4ba1f085c14700f85188daf0d68c2cc23a9b42066f3fc3179dd8991c96df5497 +size 262535 diff --git a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.png b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.png index de392dfacb7e8c..9ed47e66f764a4 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.png +++ b/docs/notebooks/237-segment-anything-with-output_files/237-segment-anything-with-output_82_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e810c37a2dc6cd4b05e40a834c024018dc42d1672d5d22a3bfc7fb8f82b748c -size 2434363 +oid sha256:8d8ff846bbc449eb1e1cdb8b7e33b71365c8d514ca55f4221871dfbe45ec5444 +size 2397126 diff --git a/docs/notebooks/237-segment-anything-with-output_files/index.html b/docs/notebooks/237-segment-anything-with-output_files/index.html index 28568c66ba6d18..71c3c02fad6a0e 100644 --- a/docs/notebooks/237-segment-anything-with-output_files/index.html +++ b/docs/notebooks/237-segment-anything-with-output_files/index.html @@ -1,18 +1,18 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/237-segment-anything-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/237-segment-anything-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/237-segment-anything-with-output_files/


../
-237-segment-anything-with-output_21_0.png          16-Aug-2023 01:31              467418
-237-segment-anything-with-output_28_0.png          16-Aug-2023 01:31              468529
-237-segment-anything-with-output_35_0.png          16-Aug-2023 01:31              469443
-237-segment-anything-with-output_39_0.png          16-Aug-2023 01:31              470668
-237-segment-anything-with-output_44_0.png          16-Aug-2023 01:31              468091
-237-segment-anything-with-output_48_0.png          16-Aug-2023 01:31              468088
-237-segment-anything-with-output_53_0.png          16-Aug-2023 01:31              472754
-237-segment-anything-with-output_68_1.jpg          16-Aug-2023 01:31              261936
-237-segment-anything-with-output_68_1.png          16-Aug-2023 01:31             2431859
-237-segment-anything-with-output_80_0.png          16-Aug-2023 01:31              469423
-237-segment-anything-with-output_82_1.jpg          16-Aug-2023 01:31              262791
-237-segment-anything-with-output_82_1.png          16-Aug-2023 01:31             2434363
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/237-segment-anything-with-output_files/


../
+237-segment-anything-with-output_21_0.png          31-Oct-2023 00:35              467418
+237-segment-anything-with-output_28_0.png          31-Oct-2023 00:35              468529
+237-segment-anything-with-output_35_0.png          31-Oct-2023 00:35              469443
+237-segment-anything-with-output_39_0.png          31-Oct-2023 00:35              470668
+237-segment-anything-with-output_44_0.png          31-Oct-2023 00:35              468092
+237-segment-anything-with-output_48_0.png          31-Oct-2023 00:35              468088
+237-segment-anything-with-output_53_0.png          31-Oct-2023 00:35              472756
+237-segment-anything-with-output_68_1.jpg          31-Oct-2023 00:35              262203
+237-segment-anything-with-output_68_1.png          31-Oct-2023 00:35             2409333
+237-segment-anything-with-output_80_0.png          31-Oct-2023 00:35              469432
+237-segment-anything-with-output_82_1.jpg          31-Oct-2023 00:35              262535
+237-segment-anything-with-output_82_1.png          31-Oct-2023 00:35             2397126
 

diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output.rst b/docs/notebooks/238-deep-floyd-if-optimize-with-output.rst new file mode 100644 index 00000000000000..2d704dbf1c2431 --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output.rst @@ -0,0 +1,816 @@ +Post-Training Quantization and Weights Compression of DeepFloyd IF model with NNCF +================================================================================== + +The goal of this tutorial is to demonstrate how to speed up the model by +applying 8-bit post-training quantization and weights compression from +`NNCF `__ (Neural Network +Compression Framework) and infer optimized model via OpenVINO™ Toolkit. + + **NOTE**: you should run + `238-deep-floyd-if-convert <238-deep-floyd-if-convert.ipynb>`__ + notebook first to generate OpenVINO IR model that is used for + optimization. + +The optimization process contains the following steps: 1. Compress +weights of the converted OpenVINO text encoder from +`notebook <238-deep-floyd-if-convert.ipynb>`__ with NNCF. 2. Quantize +the converted stage_1 and stage_2 U-Nets from +`notebook <238-deep-floyd-if-convert.ipynb>`__ with NNCF. 2. Check the +model result using the same input data from the +`notebook <238-deep-floyd-if-convert.ipynb>`__. 3. Compare model size of +converted and optimized models. 4. Compare performance of converted and +optimized models. + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Compress text encoder + weights <#compress-text-encoder-weights>`__ +- `Create and initialize + quantization <#create-and-initialize-quantization>`__ + + - `Prepare datasets <#prepare-datasets>`__ + - `Quantize first stage + U-Net <#quantize-first-stage-u-net>`__ + - `Quantize second stage + U-Net <#quantize-second-stage-u-net>`__ + +- `Run optimized OpenVINO + models <#run-quantized-openvino-model>`__ + + - `Compare file sizes <#compare-file-sizes>`__ + - `Compare performance time of the converted and optimized + models <#compare-performance-time-of-the-converted-and-optimized-models>`__ + +Prerequisites +------------------------------------------------------- + +.. code:: ipython3 + + %pip install -q datasets "nncf>=2.6.0" + +.. code:: ipython3 + + import nncf + import torch + import openvino as ov + + from diffusers import DiffusionPipeline + from diffusers.utils.pil_utils import pt_to_pil + from pathlib import Path + from typing import Any, List + + from utils import TextEncoder, UnetFirstStage, UnetSecondStage + + checkpoint_variant = 'fp16' + model_dtype = torch.float32 + RANDOM_SEED = 42 + N_DIFFUSION_STEPS = 50 + UNET_2_STEPS = 20 + + core = ov.Core() + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. code:: ipython3 + + MODEL_DIR = Path('./models') + TEXT_ENCODER_IR_PATH = MODEL_DIR / "encoder_ir.xml" + UNET_I_IR_PATH = MODEL_DIR / "unet_ir_I.xml" + UNET_II_IR_PATH = MODEL_DIR / "unet_ir_II.xml" + + if not (TEXT_ENCODER_IR_PATH.exists() and UNET_I_IR_PATH.exists() and UNET_II_IR_PATH.exists()): + raise RuntimeError('This notebook should be run after 238-deep-floyd-if notebook') + +.. code:: ipython3 + + import ipywidgets as widgets + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +Compress weights +---------------------------------------------------------- + +Text encoder model consumes ~22 GB of disk space. To avoid running out +of memory, we suggest using 8-bit weights compression instead of +quantization. An optimized model will show less speed up than a +quantized model, but this will significantly reduce the model footprint. + +.. code:: ipython3 + + %%time + + text_encoder = core.read_model(TEXT_ENCODER_IR_PATH) + text_encoder_optimized = nncf.compress_weights(text_encoder) + + TEXT_ENCODER_INT8_IR_PATH = Path("_optimized.".join(TEXT_ENCODER_IR_PATH.as_posix().split("."))) + ov.save_model(text_encoder_optimized, TEXT_ENCODER_INT8_IR_PATH) + + +.. parsed-literal:: + + 2023-10-30 08:36:34.384792: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-30 08:36:34.423283: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-30 08:36:35.184200: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. parsed-literal:: + + CPU times: user 3min 16s, sys: 58 s, total: 4min 14s + Wall time: 4min 12s + + +Quantize +-------------------------------------------------- + +Prepare dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +DeepFloyd IF consists of a U-Net model for first and second stages. +First stage U-Net generates 64x64 px image based on text prompt, second +stage U-Net generates a 256x256 px image based on image from previous +step. We use a portion of train +`LAION2B `__ dataset +from Hugging Face as calibration data. LAION2B is the English subset of +the `LAION5B `__ dataset, contains over +2 billion objects. + +.. code:: ipython3 + + import numpy as np + from datasets import load_dataset + + np.random.seed(RANDOM_SEED) + + def get_negative_prompt(): + negative_prompts = [ + "amateur", "blurred", "deformed", "disfigured", "disgusting", "jpeg artifacts", "low contrast", + "low quality", "low saturation", "mangled", "morbid", "mutilated", "mutation", + "out of frame", "out of frame", "ugly", "uncentered", "underexposed", "unreal", + ] + num_elements = np.random.randint(2, 6) + random_elements = np.random.choice(negative_prompts, num_elements) + return [" ".join(random_elements)] + + def prepare_calibration_data(dataloader, stage_1): + """ + This function prepares calibration data from a dataloader for a specified number of initialization steps. + It iterates over the dataloader, fetching batches and storing the relevant data. + """ + data = [] + for batch in dataloader: + prompt = batch["TEXT"] + negative_prompt = get_negative_prompt() + prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt, negative_prompt=negative_prompt) + data.append((prompt_embeds, negative_embeds)) + return data + + + def prepare_dataset(stage_1, opt_init_steps=300): + """ + Prepares a text dataset for quantization. + """ + dataset = load_dataset("laion/laion2B-en", streaming=True, split="train") + train_dataset = dataset.shuffle(seed=RANDOM_SEED, buffer_size=1000).take(opt_init_steps) + dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=1) + calibration_data = prepare_calibration_data(dataloader, stage_1) + return calibration_data + +.. code:: ipython3 + + %%time + + generator = torch.manual_seed(RANDOM_SEED) + opt_init_steps = 300 + selection_prob = 0.5 + prompts_number = np.ceil(opt_init_steps // (min(N_DIFFUSION_STEPS, UNET_2_STEPS) * selection_prob)) + + stage_1 = DiffusionPipeline.from_pretrained( + "DeepFloyd/IF-I-M-v1.0", + variant=checkpoint_variant, + torch_dtype=model_dtype + ) + encoded_prompts = prepare_dataset(stage_1, int(prompts_number)) + + +.. parsed-literal:: + + safety_checker/model.safetensors not found + + A mixture of fp16 and non-fp16 filenames will be loaded. + Loaded fp16 filenames: + [text_encoder/pytorch_model.fp16-00001-of-00002.bin, unet/diffusion_pytorch_model.fp16.bin, text_encoder/pytorch_model.fp16-00002-of-00002.bin] + Loaded non-fp16 filenames: + [safety_checker/pytorch_model.bin, watermarker/diffusion_pytorch_model.bin + If this behavior is not expected, please check your folder structure. + Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: + ``` + pip install accelerate + ``` + . + + + +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/7 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 + /home/ea/work/ov_venv/lib/python3.8/site-packages/torch/cuda/__init__.py:138: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 11080). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.) + return torch._C._cuda_getDeviceCount() > 0 + + + +.. parsed-literal:: + + Loading checkpoint shards: 0%| | 0/2 [00:00= self.prob: + self.data_cache.append(*args) + return super().__call__(*args, **kwargs) + +.. code:: ipython3 + + stage_1.unet = UnetFirstStage( + UNET_I_IR_PATH, + stage_1.unet.config, + dtype=model_dtype, + device=device.value + ) + stage_1.set_progress_bar_config(disable=True) + + stage_1_data_cache = [] + stage_1.unet.unet_openvino = CompiledModelDecorator(stage_1.unet.unet_openvino, prob=selection_prob, data_cache=stage_1_data_cache) + + generator = torch.manual_seed(RANDOM_SEED) + stage_2_inputs = [] # to speed up dataset preparation for stage 2 U-Net we can collect several images below + for data in encoded_prompts: + prompt_embeds, negative_embeds = data + image = stage_1(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, + generator=generator, output_type="pt", num_inference_steps=N_DIFFUSION_STEPS).images + stage_2_inputs.append((image, prompt_embeds, negative_embeds)) + + if len(stage_1_data_cache) >= opt_init_steps: + break + +Quantize first stage U-Net +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + %%time + + ov_model = core.read_model(UNET_I_IR_PATH) + stage_1_calibration_dataset = nncf.Dataset(stage_1_data_cache, lambda x: x) + + quantized_model = nncf.quantize( + model=ov_model, + calibration_dataset=stage_1_calibration_dataset, + model_type=nncf.ModelType.TRANSFORMER, + advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.25) + ) + + UNET_I_INT8_PATH = "_optimized.".join(UNET_I_IR_PATH.as_posix().split(".")) + ov.save_model(quantized_model, UNET_I_INT8_PATH) + + +.. parsed-literal:: + + Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [01:35<00:00, 3.14it/s] + Applying Smooth Quant: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 73/73 [00:04<00:00, 17.55it/s] + Statistics collection: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [05:44<00:00, 1.15s/it] + Applying Fast Bias correction: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 268/268 [00:35<00:00, 7.50it/s] + + +.. parsed-literal:: + + CPU times: user 1h 8min 46s, sys: 1min 22s, total: 1h 10min 8s + Wall time: 9min 46s + + +.. code:: ipython3 + + %%time + + from tqdm.notebook import tqdm + + start = len(stage_2_inputs) + for i, data in tqdm(enumerate(encoded_prompts[start:])): + prompt_embeds, negative_embeds = data + image = stage_1(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, + generator=generator, output_type="pt", num_inference_steps=N_DIFFUSION_STEPS).images + stage_2_inputs.append((image, prompt_embeds, negative_embeds)) + + + +.. parsed-literal:: + + 0it [00:00, ?it/s] + + +.. parsed-literal:: + + CPU times: user 1h 17min 46s, sys: 44.9 s, total: 1h 18min 31s + Wall time: 4min 46s + + +.. code:: ipython3 + + %%time + + generator = torch.manual_seed(RANDOM_SEED) + opt_init_steps = 300 + + stage_2 = DiffusionPipeline.from_pretrained( + "DeepFloyd/IF-II-M-v1.0", + text_encoder=None, + variant=checkpoint_variant, + torch_dtype=model_dtype + ) + stage_2.set_progress_bar_config(disable=True) + + stage_2.unet = UnetSecondStage( + UNET_II_IR_PATH, + stage_2.unet.config, + dtype=model_dtype, + device=device.value + ) + stage_2_data_cache = [] + stage_2.unet.unet_openvino = CompiledModelDecorator(stage_2.unet.unet_openvino, prob=selection_prob, data_cache=stage_2_data_cache) + + for data in tqdm(stage_2_inputs): + image, prompt_embeds, negative_embeds = data + image = stage_2(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, + generator=generator, output_type="pt", num_inference_steps=UNET_2_STEPS).images + + if len(stage_2_data_cache) >= opt_init_steps: + break + + +.. parsed-literal:: + + + A mixture of fp16 and non-fp16 filenames will be loaded. + Loaded fp16 filenames: + [text_encoder/model.fp16-00001-of-00002.safetensors, unet/diffusion_pytorch_model.fp16.safetensors, text_encoder/model.fp16-00002-of-00002.safetensors, safety_checker/model.fp16.safetensors] + Loaded non-fp16 filenames: + [watermarker/diffusion_pytorch_model.safetensors + If this behavior is not expected, please check your folder structure. + Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: + ``` + pip install accelerate + ``` + . + + + +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/7 [00:00`__. + +.. code:: ipython3 + + prompt = 'ultra close color photo portrait of rainbow owl with deer horns in the woods' + negative_prompt = 'blurred unreal uncentered occluded' + +.. code:: ipython3 + + %%time + + stage_1 = DiffusionPipeline.from_pretrained( + "DeepFloyd/IF-I-M-v1.0", + variant=checkpoint_variant, + torch_dtype=model_dtype + ) + + # Initialize the First Stage U-Net wrapper class + stage_1.unet = UnetFirstStage( + UNET_I_INT8_PATH, + stage_1.unet.config, + dtype=model_dtype, + device=device.value + ) + + stage_1.text_encoder = TextEncoder(TEXT_ENCODER_INT8_IR_PATH, dtype=model_dtype, device=device.value) + print('The model has been loaded') + + # Generate text embeddings + prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt, negative_prompt=negative_prompt) + + # Fix PRNG seed + generator = torch.manual_seed(RANDOM_SEED) + + # Inference + image = stage_1(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, + generator=generator, output_type="pt", num_inference_steps=N_DIFFUSION_STEPS).images + + # Show the image + pt_to_pil(image)[0] + + +.. parsed-literal:: + + safety_checker/model.safetensors not found + + A mixture of fp16 and non-fp16 filenames will be loaded. + Loaded fp16 filenames: + [text_encoder/pytorch_model.fp16-00001-of-00002.bin, unet/diffusion_pytorch_model.fp16.bin, text_encoder/pytorch_model.fp16-00002-of-00002.bin] + Loaded non-fp16 filenames: + [safety_checker/pytorch_model.bin, watermarker/diffusion_pytorch_model.bin + If this behavior is not expected, please check your folder structure. + Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: + ``` + pip install accelerate + ``` + . + + + +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/7 [00:00`__. + + **NOTE**: For more accurate performance, run ``benchmark_app`` in a + terminal/command prompt after closing other applications. Run + ``benchmark_app --help`` to see an overview of all command-line + options. + +.. code:: ipython3 + + import re + + def get_fps(benchmark_output: str): + parsed_output = [line for line in benchmark_output if 'Throughput:' in line] + fps = re.findall(r"\d+\.\d+", parsed_output[0])[0] + return fps + +Text encoder + +.. code:: ipython3 + + benchmark_output = !benchmark_app -m $TEXT_ENCODER_IR_PATH -d $device.value -api async + original_fps = get_fps(benchmark_output) + print(f"FP16 Text Encoder Throughput: {original_fps} FPS") + + benchmark_output = !benchmark_app -m $TEXT_ENCODER_INT8_IR_PATH -d $device.value -api async + optimized_fps = get_fps(benchmark_output) + print(f"INT8 Text Encoder Throughput: {optimized_fps} FPS") + print(f"Text encoder speed up: {float(optimized_fps) / float(original_fps)}") + + +.. parsed-literal:: + + FP16 Text Encoder Throughput: 0.99 FPS + INT8 Text Encoder Throughput: 2.47 FPS + Text encoder speed up: 2.4949494949494953 + + +First stage UNet + +.. code:: ipython3 + + benchmark_output = !benchmark_app -m $UNET_I_IR_PATH -d $device.value -api async + original_fps = get_fps(benchmark_output) + print(f"FP16 1 stage U-Net Throughput: {original_fps} FPS") + + benchmark_output = !benchmark_app -m $UNET_I_INT8_PATH -d $device.value -api async + optimized_fps = get_fps(benchmark_output) + print(f"INT8 1 stage U-Net Throughput: {optimized_fps} FPS") + print(f"1 stage U-Net speed up: {float(optimized_fps) / float(original_fps)}") + + +.. parsed-literal:: + + FP16 1 stage U-Net Throughput: 4.65 FPS + INT8 1 stage U-Net Throughput: 12.06 FPS + 1 stage U-Net speed up: 2.593548387096774 + + +Second stage UNet + +.. code:: ipython3 + + benchmark_output = !benchmark_app -m $UNET_II_IR_PATH -d $device.value -api async + original_fps = get_fps(benchmark_output) + print(f"FP16 2 stage U-Net Throughput: {original_fps} FPS") + + benchmark_output = !benchmark_app -m $UNET_II_INT8_PATH -d $device.value -api async + optimized_fps = get_fps(benchmark_output) + print(f"INT8 2 stage U-Net Throughput: {optimized_fps} FPS") + print(f"2 stage U-Net speed up: {float(optimized_fps) / float(original_fps)}") + + +.. parsed-literal:: + + FP16 2 stage U-Net Throughput: 0.28 FPS + INT8 2 stage U-Net Throughput: 0.92 FPS + 2 stage U-Net speed up: 3.2857142857142856 + diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.jpg b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.jpg new file mode 100644 index 00000000000000..6366c21515ae39 --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811dbf6cfc44f8eb6babf8771a3a98edb04aab2a92626add147b09205858dbae +size 2577 diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.png b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.png new file mode 100644 index 00000000000000..e3cbc9ce7c746a --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_23_6.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8734c254db111a957ea26abd818829ae86fc04d0a007320bc978e4baf2d60913 +size 11369 diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.jpg b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.jpg new file mode 100644 index 00000000000000..e4317f1326d785 --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2292f0150cb59cb15ecc6719ad2f260b3d366d768c1ced1d3b96327a029a776a +size 23341 diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.png b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.png new file mode 100644 index 00000000000000..9d485b5b5703fb --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_24_5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94733cc071e9db841a5b317e4a91919e608b238ca613df63d6bf26064ff9e0f +size 164221 diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.jpg b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.jpg new file mode 100644 index 00000000000000..66547cbf23c790 --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17806ca0d95899c5a82007a9719ef58a43a84ac1843beda3c4825a5d031ed57c +size 210327 diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.png b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.png new file mode 100644 index 00000000000000..de070b06159343 --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/238-deep-floyd-if-optimize-with-output_25_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a8d612863d32d45ed612993997e6c0d773e540b0f7eb619c543156d0231e29 +size 1937722 diff --git a/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/index.html b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/index.html new file mode 100644 index 00000000000000..a18ad0334b502d --- /dev/null +++ b/docs/notebooks/238-deep-floyd-if-optimize-with-output_files/index.html @@ -0,0 +1,12 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/238-deep-floyd-if-optimize-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/238-deep-floyd-if-optimize-with-output_files/


../
+238-deep-floyd-if-optimize-with-output_23_6.jpg    31-Oct-2023 00:35                2577
+238-deep-floyd-if-optimize-with-output_23_6.png    31-Oct-2023 00:35               11369
+238-deep-floyd-if-optimize-with-output_24_5.jpg    31-Oct-2023 00:35               23341
+238-deep-floyd-if-optimize-with-output_24_5.png    31-Oct-2023 00:35              164221
+238-deep-floyd-if-optimize-with-output_25_1.jpg    31-Oct-2023 00:35              210327
+238-deep-floyd-if-optimize-with-output_25_1.png    31-Oct-2023 00:35             1937722
+

+ diff --git a/docs/notebooks/238-deep-floyd-if-with-output.rst b/docs/notebooks/238-deep-floyd-if-with-output.rst deleted file mode 100644 index 4f46b3c9ead026..00000000000000 --- a/docs/notebooks/238-deep-floyd-if-with-output.rst +++ /dev/null @@ -1,884 +0,0 @@ -Image generation with DeepFloyd IF and OpenVINO™ -================================================ - -DeepFloyd IF is an advanced open-source text-to-image model that -delivers remarkable photorealism and language comprehension. DeepFloyd -IF consists of a frozen text encoder and three cascaded pixel diffusion -modules: a base model that creates 64x64 pixel images based on text -prompts and two super-resolution models, each designed to generate -images with increasing resolution: 256x256 pixel and 1024x1024 pixel. -All stages of the model employ a frozen text encoder, built on the T5 -transformer, to derive text embeddings, which are then passed to a UNet -architecture enhanced with cross-attention and attention pooling. - -Text encoder impact -~~~~~~~~~~~~~~~~~~~ - -- **Profound text prompt comprehension.** The generation pipeline - leverages the T5-XXL-1.1 Large Language Model (LLM) as a text - encoder. Its intelligence is backed by a substantial number of - text-image cross-attention layers, this ensures superior alignment - between the prompt and the generated image. - -- **Realistic text in generated images.** Capitalizing on the - capabilities of the T5 model, DeepFloyd IF produces readable text - depictions alongside objects with distinct attributes, which have - typically been a challenge for most existing text-to-image models. - -DeepFloyd IF Distinctive Features -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -First of all, it is **Modular**. DeepFloyd IF pipeline is a consecutive -inference of several neural networks. - -Which makes it **Cascaded**. The base model generates low-resolution -samples, then super-resolution models upsample the images to produce -high-resolution results. The models were individually trained at -different resolutions. - -DeepFloyd IF employs **Diffusion** models. Diffusion models are machine -learning systems that are trained to denoise random Gaussian noise step -by step, to get to a sample of interest, such as an image. Diffusion -models have been shown to achieve state-of-the-art results for -generating image data. - -And finally, DeepFloyd IF operates in **Pixel** space. Unlike latent -diffusion models (Stable Diffusion for instance), the diffusion is -implemented on a pixel level. - -.. figure:: https://github.com/deep-floyd/IF/raw/develop/pics/deepfloyd_if_scheme.jpg - :alt: deepfloyd_if_scheme - - deepfloyd_if_scheme - -The graph above depicts the three-stage generation pipeline: A text -prompt is passed through the frozen T5-XXL LLM to convert it into a -vector in embedded space. - -1. Stage 1: The first diffusion model in the cascade transforms the - embedding vector into a 64x64 image. The DeepFloyd team has trained - **three versions** of the base model, each with different parameters: - IF-I 400M, IF-I 900M, and IF-I 4.3B. The smallest one is used by - default, but users are free to change the checkpoint name to - `“DeepFloyd/IF-I-L-v1.0” `__ - or - `“DeepFloyd/IF-I-XL-v1.0” `__ - -2. Stage 2: To upscale the image, two text-conditional super-resolution - models (Efficient U-Net) are applied to the output of the first - diffusion model. The first of these upscales the sample from 64x64 - pixel to 256x256 pixel resolution. Again, several versions of this - model are available: IF-II 400M (default) and IF-II 1.2B (checkpoint - name “DeepFloyd/IF-II-L-v1.0”). - -3. Stage 3: Follows the same path as Stage 2 and upscales the image to - 1024x1024 pixel resolution. It is not released yet, so we will use a - conventional Super Resolution network to get hi-res results. - - - - -.. _top: - -**Table of contents**: - -- `Prerequisites <#prerequisites>`__ - - - `Authentication <#authentication>`__ - -- `DeepFloyd IF in Diffusers library <#deepfloyd-if-in-diffusers-library>`__ -- `Convert models to OpenVINO Intermediate representation (IR) format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ -- `Convert Text Encoder <#convert-text-encoder>`__ -- `Convert the first Pixel Diffusion module’s UNet <#convert-the-first-pixel-diffusion-modules-unet>`__ -- `Convert the second pixel diffusion module <#convert-the-second-pixel-diffusion-module>`__ -- `Prepare Inference pipeline <#prepare-inference-pipeline>`__ -- `Run Text-to-Image generation <#run-text-to-image-generation>`__ - - - `Text Encoder inference <#text-encoder-inference>`__ - - `First Stage diffusion block inference <#first-stage-diffusion-block-inference>`__ - - `Second Stage diffusion block inference <#second-stage-diffusion-block-inference>`__ - - `Third Stage diffusion block <#third-stage-diffusion-block>`__ - - `Upscale the generated image using a Super Resolution network <#upscale-the-generated-image-using-a-super-resolution-network>`__ - - - `Download the Super Resolution model weights <#download-the-super-resolution-model-weights>`__ - - `Reshape the model’s inputs <#reshape-the-models-inputs>`__ - - `Prepare the input images and run the model <#prepare-the-input-images-and-run-the-model>`__ - - `Display the result <#display-the-result>`__ - -.. note:: - - - *This example requires the download of roughly 27 GB of model - checkpoints, which could take some time depending on your internet - connection speed. Additionally, the converted models will consume - another 27 GB of disk space.* - - *Please be aware that a minimum of 32 GB of RAM is necessary to - convert and run inference on the models. There may be instances - where the notebook appears to freeze or stop responding.* - - *To access the model checkpoints, you’ll need a Hugging Face - account. You’ll also be prompted to explicitly accept the*\ `model - license `__\ *.* - -Prerequisites `⇑ <#top>`__ -############################################################################################################################### - -Install required packages. - -.. code:: ipython3 - - # Set up requirements - - !pip install -q --upgrade pip - !pip install -q "diffusers>=0.16.1" accelerate transformers safetensors sentencepiece huggingface_hub - !pip install -q "openvino-dev>=2023.0.0" - -.. code:: ipython3 - - from collections import namedtuple - import gc - from pathlib import Path - from typing import Union, Tuple - - import diffusers - from diffusers import DiffusionPipeline - from diffusers.utils import pt_to_pil - from openvino.runtime import Core, PartialShape, serialize - from openvino.tools import mo - from openvino.tools.mo.convert import InputCutInfo - import torch - - -.. parsed-literal:: - - 2023-05-29 11:26:42.788524: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-05-29 11:26:42.825669: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-05-29 11:26:43.383859: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -.. code:: ipython3 - - checkpoint_variant = 'fp16' - model_dtype = torch.float32 - ir_input_type = 'f32' - compress_to_fp16 = False - - models_dir = Path('./models') - models_dir.mkdir(exist_ok=True) - - encoder_ir_path = models_dir / 'encoder_ir.xml' - first_stage_unet_ir_path = models_dir / 'unet_ir_I.xml' - second_stage_unet_ir_path = models_dir / 'unet_ir_II.xml' - -Authentication `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -In order to access IF checkpoints, users need to provide an authentication token. - -If you already have a token, you can input it into the provided form in -the next cell. If not, please proceed according to the following -instructions: - -1. Make sure to have a `Hugging Face `__ - account and be logged in -2. Accept the license on the model card of - `DeepFloyd/IF-I-M-v1.0 `__ -3. To generate a token, proceed to `this - page `__ - -Uncheck the ``Add token as git credential?`` box. - -.. code:: ipython3 - - from huggingface_hub import login - - # Execute this cell to access the authentication form - login() - - - -.. parsed-literal:: - - VBox(children=(HTML(value='
`__ -############################################################################################################################### - -To work with IF by DeepFloyd Lab, we will use `Hugging Face Diffusers -package `__. Diffusers package -exposes the ``DiffusionPipeline`` class, simplifying experiments with -diffusion models. The code below demonstrates how to create a -``DiffusionPipeline`` using IF configs: - -.. code:: ipython3 - - %%time - - # Downloading the model weights may take some time. The approximate total checkpoints size is 27GB. - stage_1 = DiffusionPipeline.from_pretrained( - "DeepFloyd/IF-I-M-v1.0", - variant=checkpoint_variant, - torch_dtype=model_dtype - ) - - stage_2 = DiffusionPipeline.from_pretrained( - "DeepFloyd/IF-II-M-v1.0", - text_encoder=None, - variant=checkpoint_variant, - torch_dtype=model_dtype - ) - - -.. parsed-literal:: - - safety_checker/model.safetensors not found - - A mixture of fp16 and non-fp16 filenames will be loaded. - Loaded fp16 filenames: - [unet/diffusion_pytorch_model.fp16.bin, text_encoder/pytorch_model.fp16-00002-of-00002.bin, text_encoder/pytorch_model.fp16-00001-of-00002.bin] - Loaded non-fp16 filenames: - [watermarker/diffusion_pytorch_model.bin, safety_checker/pytorch_model.bin - If this behavior is not expected, please check your folder structure. - The config attributes {'lambda_min_clipped': -5.1} were passed to DDPMScheduler, but are not expected and will be ignored. Please verify your scheduler_config.json configuration file. - - - -.. parsed-literal:: - - Loading checkpoint shards: 0%| | 0/2 [00:00`__ -############################################################################################################################### - -Model conversion API enables direct conversion of PyTorch -models. We will utilize the ``mo.convert_model`` method to acquire -OpenVINO IR versions of the models. This requires providing a model -object, input data for model tracing, and other relevant parameters. The -``use_legacy_frontend=True`` parameter instructs model conversion API to -employ the ONNX model format as an intermediate step, as opposed to -using the PyTorch JIT compiler, which is not optimal for our situation. - -The pipeline consists of three important parts: - -- A Text Encoder that translates user prompts to vectors in the latent - space that the Diffusion model can understand. -- A Stage 1 U-Net for step-by-step denoising latent image - representation. -- A Stage 2 U-Net that takes low resolution output from the previous - step and the latent representations to upscale the resulting image. - -Let us convert each part. - -1. Convert Text Encoder `⇑ <#top>`__ -############################################################################################################################### - - -The text encoder is responsible for converting the input prompt, such as -“ultra close-up color photo portrait of rainbow owl with deer horns in -the woods” into an embedding space that can be fed to the next stage’s -U-Net. Typically, it is a transformer-based encoder that maps a sequence -of input tokens to a sequence of text embeddings. - -The input for the text encoder consists of a tensor ``input_ids``, which -contains token indices from the text processed by the tokenizer and -padded to the maximum length accepted by the model. - -*Note* the ``input`` argument passed to the ``convert_model`` method. -The ``convert_model`` can be called with the ``input shape`` argument -and/or the PyTorch-specific ``example_input`` argument. However, in this -case, the ``InputCutInfo`` class was utilized to describe the model -input and provide it as the ``input`` argument. Using the -``InputCutInfo`` class offers a framework-agnostic solution and enables -the definition of complex inputs. It allows specifying the input name, -shape, type, and value within a single argument, providing greater -flexibility. - -To learn more, refer to this -`page `__ - -.. code:: ipython3 - - %%time - - if not encoder_ir_path.exists(): - encoder_ir = mo.convert_model( - stage_1.text_encoder, - input=[InputCutInfo(shape=PartialShape([1,77]), type='i64'),], - compress_to_fp16=compress_to_fp16, - ) - - # Serialize the IR model to disk, we will load it at inference time - serialize(encoder_ir, encoder_ir_path) - del encoder_ir - - del stage_1.text_encoder - gc.collect(); - - -.. parsed-literal:: - - CPU times: user 306 ms, sys: 1.05 s, total: 1.36 s - Wall time: 1.37 s - - -Convert the first Pixel Diffusion module’s UNet `⇑ <#top>`__ -############################################################################################################################### - - -U-Net model gradually denoises latent image representation guided by -text encoder hidden state. - -U-Net model has three inputs: - -``sample`` - latent image sample from previous step. Generation process -has not been started yet, so you will use random noise. ``timestep`` - -current scheduler step. ``encoder_hidden_state`` - hidden state of text -encoder. Model predicts the sample state for the next step. - -The first Diffusion module in the cascade generates 64x64 pixel low -resolution images. - -.. code:: ipython3 - - %%time - - if not first_stage_unet_ir_path.exists(): - unet_1_ir = mo.convert_model( - stage_1.unet, - input=[InputCutInfo(shape=PartialShape([2, 3, 64, 64]), type=ir_input_type), - InputCutInfo(shape=PartialShape([]), type='i32'), - InputCutInfo(shape=PartialShape([2, 77, 4096]), type=ir_input_type)], - compress_to_fp16=compress_to_fp16, - ) - - serialize(unet_1_ir, first_stage_unet_ir_path) - - del unet_1_ir - - stage_1_config = stage_1.unet.config - del stage_1.unet - gc.collect(); - - -.. parsed-literal:: - - CPU times: user 282 ms, sys: 16.7 ms, total: 298 ms - Wall time: 298 ms - - -Convert the second pixel diffusion module `⇑ <#top>`__ -############################################################################################################################### - - -The second Diffusion module in the cascade generates 256x256 pixel -images. - -The second stage pipeline will use bilinear interpolation to upscale the -64x64 image that was generated in the previous stage to a higher 256x256 -resolution. Then it will denoise the image taking into account the -encoded user prompt. - -.. code:: ipython3 - - %%time - - if not second_stage_unet_ir_path.exists(): - unet_2_ir = mo.convert_model( - stage_2.unet, - input=[InputCutInfo(shape=PartialShape([2, 6, 256, 256]), type=ir_input_type), - InputCutInfo(shape=PartialShape([]), type='i32'), - InputCutInfo(shape=PartialShape([2, 77, 4096]), type=ir_input_type), - InputCutInfo(shape=PartialShape([2]), type='i32'),], - compress_to_fp16=compress_to_fp16, - ) - - serialize(unet_2_ir, second_stage_unet_ir_path) - - del unet_2_ir - - stage_2_config = stage_2.unet.config - del stage_2.unet - gc.collect(); - - -.. parsed-literal:: - - CPU times: user 240 ms, sys: 33 ms, total: 273 ms - Wall time: 273 ms - - -Prepare Inference pipeline `⇑ <#top>`__ -############################################################################################################################### - - -The original pipeline from the source repository will be reused in this -example. In order to achieve this, adapter classes were created to -enable OpenVINO models to replace Pytorch models and integrate -seamlessly into the pipeline. - -.. code:: ipython3 - - core = Core() - -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: - -.. code:: ipython3 - - import ipywidgets as widgets - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value='AUTO', - description='Device:', - disabled=False, - ) - - device - -.. code:: ipython3 - - class TextEncoder: - """ - Text Encoder Adapter Class. - - This class is designed to seamlessly integrate the OpenVINO compiled model - into the `stage_1.encode_prompt` routine. - """ - - def __init__(self, ir_path: Union[str, Path], dtype: torch.dtype, device: str = 'CPU') -> None: - """ - Init the adapter with the IR model path. - - Parameters: - ir_path (str, Path): text encoder IR model path - dtype (torch.dtype): result dtype - device (str): inference device - Returns: - None - """ - self.ir_path = ir_path - self.dtype = dtype - self.encoder_openvino = core.compile_model(self.ir_path, device) - - def __call__(self, input_ids: torch.LongTensor, attention_mask: torch.FloatTensor = None): - """Adapt the network call.""" - result = self.encoder_openvino(input_ids) - result_numpy = result[self.encoder_openvino.outputs[0]] - return [torch.tensor(result_numpy, dtype=self.dtype)] - -.. code:: ipython3 - - # The pipelines for Stages 1 and 2 expect the UNet models to return an object containing a sample attribute. - result_tuple = namedtuple('result', 'sample') - - - class UnetFirstStage: - """ - IF Stage-1 Unet Adapter Class. - - This class is designed to seamlessly integrate the OpenVINO compiled model into - the `stage_1` diffusion pipeline. - """ - - def __init__(self, unet_ir_path: Union[str, Path], - config: diffusers.configuration_utils.FrozenDict, - dtype: torch.dtype, - device: str = 'CPU' - ) -> None: - """ - Init the adapter with the IR model path and model config. - - Parameters: - unet_ir_path (str, Path): unet IR model path - config (diffusers.configuration_utils.FrozenDict): original model config - dtype (torch.dtype): result dtype - device (str): inference device - Returns: - None - """ - self.unet_openvino = core.compile_model(unet_ir_path, device) - self.config = config - self.dtype = dtype - - def __call__(self, - sample: torch.FloatTensor, - timestamp: int, - encoder_hidden_states: torch.Tensor, - class_labels: torch.Tensor = None, - cross_attention_kwargs: int = None - ) -> Tuple: - """ - Adapt the network call. - - To learn more abould the model parameters please refer to - its source code: https://github.com/huggingface/diffusers/blob/7200985eab7126801fffcf8251fd149c1cf1f291/src/diffusers/models/unet_2d_condition.py#L610 - """ - result = self.unet_openvino([sample, timestamp, encoder_hidden_states]) - result_numpy = result[self.unet_openvino.outputs[0]] - return result_tuple(torch.tensor(result_numpy, dtype=self.dtype)) - - - class UnetSecondStage: - """ - IF Stage-2 Unet Adapter Class. - - This class is designed to seamlessly integrate the OpenVINO compiled model into - the `stage_2` diffusion pipeline. - """ - - def __init__(self, unet_ir_path: Union[str, Path], - config: diffusers.configuration_utils.FrozenDict, - dtype: torch.dtype, - device: str = 'CPU' - ) -> None: - """ - Init the adapter with the IR model path and model config. - - Parameters: - unet_ir_path (str, Path): unet IR model path - config (diffusers.configuration_utils.FrozenDict): original model config - dtype (torch.dtype): result dtype - device (str): inference device - Returns: - None - """ - self.unet_openvino = core.compile_model(unet_ir_path, device) - self.config = config - self.dtype = dtype - - def __call__(self, - sample: torch.FloatTensor, - timestamp: int, - encoder_hidden_states: torch.Tensor, - class_labels: torch.Tensor = None, - cross_attention_kwargs: int = None - ) -> Tuple: - """ - Adapt the network call. - - To learn more abould the model parameters please refer to - its source code: https://github.com/huggingface/diffusers/blob/7200985eab7126801fffcf8251fd149c1cf1f291/src/diffusers/models/unet_2d_condition.py#L610 - """ - result = self.unet_openvino([sample, timestamp, encoder_hidden_states, class_labels]) - result_numpy = result[self.unet_openvino.outputs[0]] - return result_tuple(torch.tensor(result_numpy, dtype=self.dtype)) - -Run Text-to-Image generation `⇑ <#top>`__ -############################################################################################################################### - - -Now, we can set a text prompt for image generation and execute the -inference pipeline. Optionally, you can also modify the random generator -seed for latent state initialization and adjust the number of images to -be generated for the given prompt. - -Text Encoder inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -.. code:: ipython3 - - %%time - - prompt = 'ultra close color photo portrait of rainbow owl with deer horns in the woods' - negative_prompt = 'blurred unreal uncentered occluded' - - # Initialize TextEncoder wrapper class - stage_1.text_encoder = TextEncoder(encoder_ir_path, dtype=model_dtype, device=device.value) - print('The model has been loaded') - - # Generate text embeddings - prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt, negative_prompt=negative_prompt) - - # Delete the encoder to free up memory - del stage_1.text_encoder.encoder_openvino - gc.collect() - - -.. parsed-literal:: - - The model has been loaded - - -.. parsed-literal:: - - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/configuration_utils.py:135: FutureWarning: Accessing config attribute `unet` directly via 'IFPipeline' object attribute is deprecated. Please access 'unet' over 'IFPipeline's config object instead, e.g. 'scheduler.config.unet'. - deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) - - -.. parsed-literal:: - - CPU times: user 52.8 s, sys: 38.2 s, total: 1min 31s - Wall time: 30.2 s - - - - -.. parsed-literal:: - - 0 - - - -First Stage diffusion block inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -.. code:: ipython3 - - %%time - - # Changing the following parameters will affect the model output - # Note that increasing the number of diffusion steps will increase the inference time linearly. - RANDOM_SEED = 42 - N_DIFFUSION_STEPS = 50 - - # Initialize the First Stage UNet wrapper class - stage_1.unet = UnetFirstStage( - first_stage_unet_ir_path, - stage_1_config, - dtype=model_dtype, - device=device.value - ) - print('The model has been loaded') - - # Fix PRNG seed - generator = torch.manual_seed(RANDOM_SEED) - - # Inference - image = stage_1(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, - generator=generator, output_type="pt", num_inference_steps=N_DIFFUSION_STEPS).images - - # Delete the model to free up memory - del stage_1.unet.unet_openvino - gc.collect() - - # Show the image - pt_to_pil(image)[0] - - -.. parsed-literal:: - - The model has been loaded - - - -.. parsed-literal:: - - 0%| | 0/50 [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -.. code:: ipython3 - - %%time - - # Initialize the Second Stage UNet wrapper class - stage_2.unet = UnetSecondStage( - second_stage_unet_ir_path, - stage_2_config, - dtype=model_dtype, - device=device.value - ) - print('The model has been loaded') - - image = stage_2( - image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, - generator=generator, output_type="pt", num_inference_steps=20).images - - # Delete the model to free up memory - del stage_2.unet.unet_openvino - gc.collect() - - # Show the image - pil_image = pt_to_pil(image)[0] - pil_image - - -.. parsed-literal:: - - The model has been loaded - - - -.. parsed-literal:: - - 0%| | 0/20 [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -The final block, which -upscales images to a higher resolution (1024x1024 px), has not been -released by DeepFloyd yet. Stay tuned! - -Upscale the generated image using a Super Resolution network. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -Though the third stage has not been officially released, we’ll employ -the Super Resolution network from `Example -#202 `__ -to enhance our low-resolution result! - -Note, this step will be substituted with the Third IF stage upon its -release! - -.. code:: ipython3 - - # Temporary requirement - !pip install -q matplotlib - -Download the Super Resolution model weights `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - -.. code:: ipython3 - - import sys - sys.path.append("../utils") - - import cv2 - import numpy as np - from PIL import Image - - from notebook_utils import download_file - - # 1032: 4x superresolution, 1033: 3x superresolution - model_name = 'single-image-super-resolution-1032' - - sr_model_xml_name = f'{model_name}.xml' - sr_model_bin_name = f'{model_name}.bin' - - sr_model_xml_path = models_dir / sr_model_xml_name - sr_model_bin_path = models_dir / sr_model_bin_name - - if not sr_model_xml_path.exists(): - base_url = f'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{model_name}/FP16/' - model_xml_url = base_url + sr_model_xml_name - model_bin_url = base_url + sr_model_bin_name - - download_file(model_xml_url, sr_model_xml_name, models_dir) - download_file(model_bin_url, sr_model_bin_name, models_dir) - else: - print(f'{model_name} already downloaded to {models_dir}') - - -.. parsed-literal:: - - single-image-super-resolution-1032 already downloaded to models - - -Reshape the model’s inputs `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - -We need to reshape the inputs for the model. This is necessary because the IR model was converted with -a different target input resolution. The Second IF stage returns 256x256 -pixel images. Using the 4x Super Resolution model makes our target image -size 1024x1024 pixel. - -.. code:: ipython3 - - model = core.read_model(model=sr_model_xml_path) - model.reshape({ - 0: [1, 3, 256, 256], - 1: [1, 3, 1024, 1024] - }) - compiled_model = core.compile_model(model=model, device_name=device.value) - -Prepare the input images and run the model `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - -.. code:: ipython3 - - original_image = np.array(pil_image) - bicubic_image = cv2.resize( - src=original_image, dsize=(1024, 1024), interpolation=cv2.INTER_CUBIC - ) - - # Reshape the images from (H,W,C) to (N,C,H,W) as expected by the model. - input_image_original = np.expand_dims(original_image.transpose(2, 0, 1), axis=0) - input_image_bicubic = np.expand_dims(bicubic_image.transpose(2, 0, 1), axis=0) - - # Model Inference - result = compiled_model( - [input_image_original, input_image_bicubic] - )[compiled_model.output(0)] - -Display the result `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - - -.. code:: ipython3 - - def convert_result_to_image(result) -> np.ndarray: - """ - Convert network result of floating point numbers to image with integer - values from 0-255. Values outside this range are clipped to 0 and 255. - - :param result: a single superresolution network result in N,C,H,W shape - """ - result = 255 * result.squeeze(0).transpose(1, 2, 0) - result[result < 0] = 0 - result[result > 255] = 255 - return Image.fromarray(result.astype(np.uint8), 'RGB') - - img = convert_result_to_image(result) - img - - - - -.. image:: 238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_41_0.png - - diff --git a/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_29_3.png b/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_29_3.png deleted file mode 100644 index b1b76dc3db8fbd..00000000000000 --- a/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_29_3.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e80d41d97270f5a468796b6f875e4f43a3c0155337bf1ad0ea413eb86f78c0fe -size 10886 diff --git a/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_31_3.png b/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_31_3.png deleted file mode 100644 index bfdeb60f610802..00000000000000 --- a/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_31_3.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09c6abed08c84b93c2262a50412c09c273e5403b4f0a25ad17d6420deefe990a -size 129672 diff --git a/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_41_0.png b/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_41_0.png deleted file mode 100644 index a75b4408af9b78..00000000000000 --- a/docs/notebooks/238-deep-floyd-if-with-output_files/238-deep-floyd-if-with-output_41_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ddebbb6fdf6635a072175059c0c39580d132138b120bc037f32f1bb5cccf74f4 -size 1349174 diff --git a/docs/notebooks/238-deep-floyd-if-with-output_files/index.html b/docs/notebooks/238-deep-floyd-if-with-output_files/index.html deleted file mode 100644 index cac17a9f83aff0..00000000000000 --- a/docs/notebooks/238-deep-floyd-if-with-output_files/index.html +++ /dev/null @@ -1,9 +0,0 @@ - -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/238-deep-floyd-if-with-output_files/ - -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/238-deep-floyd-if-with-output_files/


../
-238-deep-floyd-if-with-output_29_3.png             16-Aug-2023 01:31               10886
-238-deep-floyd-if-with-output_31_3.png             16-Aug-2023 01:31              129672
-238-deep-floyd-if-with-output_41_0.png             16-Aug-2023 01:31             1349174
-

- diff --git a/docs/notebooks/239-image-bind-convert-with-output.rst b/docs/notebooks/239-image-bind-convert-with-output.rst index a9354866a28de0..4c8123cbf8306c 100644 --- a/docs/notebooks/239-image-bind-convert-with-output.rst +++ b/docs/notebooks/239-image-bind-convert-with-output.rst @@ -1,8 +1,6 @@ Binding multimodal data using ImageBind and OpenVINO ==================================================== - - Exploring the surrounding world, people get information using multiple senses, for example, seeing a busy street and hearing the sounds of car engines. ImageBind introduces an approach that brings machines one step @@ -69,52 +67,45 @@ represented on the image below: In this tutorial, we consider how to use ImageBind for multimodal zero-shot classification. -.. _top: +**Table of contents:** -**Table of contents**: -- `Prerequisites <#prerequisites>`__ -- `Instantiate PyTorch model <#instantiate-pytorch-model>`__ -- `Prepare input data <#prepare-input-data>`__ -- `Convert Model to OpenVINO Intermediate Representation (IR) format <#convert-model-to-openvino-intermediate-representation-ir-format>`__ +- `Prerequisites <#prerequisites>`__ +- `Instantiate PyTorch model <#instantiate-pytorch-model>`__ +- `Prepare input data <#prepare-input-data>`__ +- `Convert Model to OpenVINO Intermediate Representation (IR) + format <#convert-model-to-openvino-intermediate-representation-ir-format>`__ - - `Select inference device <#select-inference-device>`__ + - `Select inference device <#select-inference-device>`__ -- `Zero-shot classification using ImageBind and OpenVINO <#zero-shot-classification-using-imagebind-and-openvino>`__ +- `Zero-shot classification using ImageBind and + OpenVINO <#zero-shot-classification-using-imagebind-and-openvino>`__ - - `Text-Image classification <#text-image-classification>`__ - - `Text-Audio classification <#text-audio-classification>`__ - - `Image-Audio classification <#image-audio-classification>`__ + - `Text-Image classification <#text-image-classification>`__ + - `Text-Audio classification <#text-audio-classification>`__ + - `Image-Audio + classification <#image-audio-classification>`__ -- `Next Steps <#next-steps>`__ - -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +- `Next Steps <#next-steps>`__ +Prerequisites +------------------------------------------------------- .. code:: ipython3 import sys - !pip install -q soundfile pytorchvideo ftfy "timm==0.6.7" einops fvcore + %pip install -q soundfile pytorchvideo ftfy "timm==0.6.7" einops fvcore "openvino>=2023.1.0" if sys.version_info.minor < 8: - !pip install -q "decord" + %pip install -q "decord" else: - !pip install -q "eva-decord" + %pip install -q "eva-decord" - if sys.platform == "darwin": - !pip install -q "torchaudio==0.13.1" --find-links https://download.pytorch.org/whl/torch_stable.html + if sys.platform != "linux": + %pip install -q "torch==2.0.1" "torchvision==0.15.2" "torchaudio==2.0.2" else: - !pip install -q "torchaudio==0.13.1+cpu" --find-links https://download.pytorch.org/whl/torch_stable.html - - -.. parsed-literal:: - - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + %pip install -q "torch==2.0.1" "torchvision==0.15.2" "torchaudio==2.0.2" --index-url https://download.pytorch.org/whl/cpu .. code:: ipython3 @@ -130,19 +121,11 @@ Prerequisites `⇑ <#top>`__ .. parsed-literal:: - Cloning into 'ImageBind'... - remote: Enumerating objects: 112, done. - remote: Counting objects: 100% (60/60), done. - remote: Compressing objects: 100% (26/26), done. - remote: Total 112 (delta 43), reused 34 (delta 34), pack-reused 52 - Receiving objects: 100% (112/112), 2.64 MiB | 3.81 MiB/s, done. - Resolving deltas: 100% (50/50), done. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/239-image-bind/ImageBind + /home/ea/work/openvino_notebooks/notebooks/239-image-bind/ImageBind -Instantiate PyTorch model `⇑ <#top>`__ -############################################################################################################################### - +Instantiate PyTorch model +------------------------------------------------------------------- To start work with the model, we should instantiate the PyTorch model class. ``imagebind_model.imagebind_huge(pretrained=True)`` downloads @@ -151,8 +134,6 @@ Currently, there is only one ImageBind model available for downloading, ``imagebind_huge``, more details about it can be found in `model card `__. -.. note:: - Please note, depending on internet connection speed, the model downloading process can take some time. It also requires at least 5 GB of free space on disk for saving model checkpoint. @@ -171,26 +152,16 @@ card `__. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + /home/ea/work/ov_venv/lib/python3.8/site-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + /home/ea/work/ov_venv/lib/python3.8/site-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( + /home/ea/work/ov_venv/lib/python3.8/site-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. warnings.warn( -.. parsed-literal:: - - Downloading imagebind weights to .checkpoints/imagebind_huge.pth ... - - - -.. parsed-literal:: - - 0%| | 0.00/4.47G [00:00`__ -############################################################################################################################### - +Prepare input data +------------------------------------------------------------ ImageBind works with data across 6 different modalities. Each of them requires its steps for preprocessing. ``data`` module is responsible for @@ -221,26 +192,15 @@ data reading and preprocessing for each modality. ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, "cpu"), } -Convert Model to OpenVINO Intermediate Representation (IR) format. `⇑ <#top>`__ -############################################################################################################################### - -OpenVINO supports PyTorch through export to the ONNX format. You will -use the ``torch.onnx.export`` function for obtaining the ONNX model. You -can learn more in the `PyTorch -documentation `__. You need -to provide a model object, input data for model tracing, and a path for -saving the model. Optionally, you can provide the target onnx opset for -conversion and other parameters specified in the documentation (for -example, input and output names or dynamic shapes). - -While ONNX models are directly supported by OpenVINO™ runtime, it can be -useful to convert them to IR format to take advantage of advanced -OpenVINO optimization tools and features. You will use `model conversion -Python -API `__ -to convert model to IR format and compress weights to ``FP16`` format. -The ``mo.convert_model`` function returns OpenVINO Model class instance -ready to load on a device or save on a disk for next loading. +Convert Model to OpenVINO Intermediate Representation (IR) format +----------------------------------------------------------------------------------------------------------- + +OpenVINO supports PyTorch through Model Conversion API. You will use +`model conversion Python +API `__ +to convert model to IR format. The ``ov.convert_model`` function returns +OpenVINO Model class instance ready to load on a device or save on a +disk for next loading using ``ov.save_model``. ImageBind accepts data that represents different modalities simultaneously in any combinations, however, their processing is @@ -262,16 +222,14 @@ embeddings. .. code:: ipython3 - from openvino.tools import mo - from openvino.runtime import serialize, Core + import openvino as ov - core = Core() + core = ov.Core() -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -291,7 +249,7 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') @@ -304,16 +262,12 @@ Select device from dropdown list for running inference using OpenVINO: export_dir = Path(f"image-bind-{modality}") file_name = f"image-bind-{modality}" export_dir.mkdir(exist_ok=True) - onnx_path = export_dir / "onnx" / f"{file_name}.onnx" - onnx_path.parent.mkdir(exist_ok=True) - ir_path = export_dir / onnx_path.name.replace(".onnx", ".xml") + ir_path = export_dir / f"{file_name}.xml" if not ir_path.exists(): - if not onnx_path.exists(): - exportable_model = ModelExporter(model, modality) - model_input = inputs[modality] - torch.onnx.export(exportable_model, model_input, str(onnx_path), input_names=["input"], dynamic_axes={"input": {0: "batch_size"}}) - ov_model = mo.convert_model(onnx_path, compress_to_fp16=True) - serialize(ov_model, str(ir_path)) + exportable_model = ModelExporter(model, modality) + model_input = inputs[modality] + ov_model = ov.convert_model(exportable_model, example_input=model_input) + ov.save_model(ov_model, ir_path) else: ov_model = core.read_model(ir_path) ov_modality_models[modality] = core.compile_model(ov_model, device.value) @@ -321,25 +275,22 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:5408: UserWarning: Exporting aten::index operator of advanced indexing in opset 14 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results. - warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/239-image-bind/ImageBind/imagebind/models/multimodal_preprocessors.py:433: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + /home/ea/work/openvino_notebooks/notebooks/239-image-bind/ImageBind/imagebind/models/multimodal_preprocessors.py:433: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x.shape[self.time_dim] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/239-image-bind/ImageBind/imagebind/models/multimodal_preprocessors.py:259: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ea/work/openvino_notebooks/notebooks/239-image-bind/ImageBind/imagebind/models/multimodal_preprocessors.py:259: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert tokens.shape[2] == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/239-image-bind/ImageBind/imagebind/models/multimodal_preprocessors.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /home/ea/work/openvino_notebooks/notebooks/239-image-bind/ImageBind/imagebind/models/multimodal_preprocessors.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch_per_img == N: -Zero-shot classification using ImageBind and OpenVINO `⇑ <#top>`__ -############################################################################################################################### - +Zero-shot classification using ImageBind and OpenVINO +----------------------------------------------------------------------------------------------- In zero-shot classification, a piece of data is embedded and fed to the model to retrieve a label that corresponds with the contents of the @@ -399,9 +350,8 @@ they represent the same object. image_list = [img.split('/')[-1] for img in image_paths] audio_list = [audio.split('/')[-1] for audio in audio_paths] -Text-Image classification `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Text-Image classification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -414,9 +364,8 @@ Text-Image classification `⇑ <#top>`__ .. image:: 239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_20_0.png -Text-Audio classification `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Text-Audio classification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -429,9 +378,8 @@ Text-Audio classification `⇑ <#top>`__ .. image:: 239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_22_0.png -Image-Audio classification `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Image-Audio classification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -491,7 +439,7 @@ Putting all together, we can match text, image, and sound for our data. .. parsed-literal:: Predicted label: A bird - probability for image - 0.987 + probability for image - 0.986 probability for audio - 1.000 @@ -543,9 +491,9 @@ Putting all together, we can match text, image, and sound for our data. -Next Steps `⇑ <#top>`__ -############################################################################################################################### +Next Steps +---------------------------------------------------- -Open the `239-image-bind-quantize <239-image-bind-quantize.ipynb>`__ notebook to -quantize the IR model with the Post-training Quantization API of NNCF -and compare ``FP16`` and ``INT8`` models. +Open the `239-image-bind-quantize <239-image-bind-quantize.ipynb>`__ +notebook to quantize the IR model with the Post-training Quantization +API of NNCF and compare ``FP16`` and ``INT8`` models. diff --git a/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_20_0.png b/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_20_0.png index b2cd5d1a62153c..df016b4362f6d9 100644 --- a/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_20_0.png +++ b/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_20_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6272b9c2c4d9d0d72663a333ce0e40b0d8937b4c16fab955f0d7440a40d7067 -size 15385 +oid sha256:548de0a5d78bfff78d1e2bf1928ad661e1710adbc67a6c5c06217b58da7254c9 +size 15474 diff --git a/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_22_0.png b/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_22_0.png index 9c96df507c350f..d5c4c11fe3f778 100644 --- a/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_22_0.png +++ b/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c40bd21ededa826754bafea3e53d0e15c3f1eda409c41182b267d5dcd5064146 +oid sha256:f1515a23118e88f3ebd077d54a5a4f9aa2646ed900ccf359bd26fedcfd5280ef size 13795 diff --git a/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_24_0.png b/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_24_0.png index 7e80bd97a6e4be..c3e978ac9a14d2 100644 --- a/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_24_0.png +++ b/docs/notebooks/239-image-bind-convert-with-output_files/239-image-bind-convert-with-output_24_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81ff26fa579513e912e907ecec52388d40e407c707e973e863ba0bcedccaf033 -size 18633 +oid sha256:a8d6f6ea71a02f96c438341ffb8cf967653068350a1fb70fe5775f775a9d4f38 +size 18151 diff --git a/docs/notebooks/239-image-bind-convert-with-output_files/index.html b/docs/notebooks/239-image-bind-convert-with-output_files/index.html index 651c95060158cd..3e1c221be0c6b9 100644 --- a/docs/notebooks/239-image-bind-convert-with-output_files/index.html +++ b/docs/notebooks/239-image-bind-convert-with-output_files/index.html @@ -1,15 +1,15 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/239-image-bind-convert-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/239-image-bind-convert-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/239-image-bind-convert-with-output_files/


../
-239-image-bind-convert-with-output_20_0.png        16-Aug-2023 01:31               15385
-239-image-bind-convert-with-output_22_0.png        16-Aug-2023 01:31               13795
-239-image-bind-convert-with-output_24_0.png        16-Aug-2023 01:31               18633
-239-image-bind-convert-with-output_26_1.jpg        16-Aug-2023 01:31               36700
-239-image-bind-convert-with-output_26_1.png        16-Aug-2023 01:31              341289
-239-image-bind-convert-with-output_27_1.jpg        16-Aug-2023 01:31               71448
-239-image-bind-convert-with-output_27_1.png        16-Aug-2023 01:31              839471
-239-image-bind-convert-with-output_28_1.jpg        16-Aug-2023 01:31               54208
-239-image-bind-convert-with-output_28_1.png        16-Aug-2023 01:31              658748
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/239-image-bind-convert-with-output_files/


../
+239-image-bind-convert-with-output_20_0.png        31-Oct-2023 00:35               15474
+239-image-bind-convert-with-output_22_0.png        31-Oct-2023 00:35               13795
+239-image-bind-convert-with-output_24_0.png        31-Oct-2023 00:35               18151
+239-image-bind-convert-with-output_26_1.jpg        31-Oct-2023 00:35               36700
+239-image-bind-convert-with-output_26_1.png        31-Oct-2023 00:35              341289
+239-image-bind-convert-with-output_27_1.jpg        31-Oct-2023 00:35               71448
+239-image-bind-convert-with-output_27_1.png        31-Oct-2023 00:35              839471
+239-image-bind-convert-with-output_28_1.jpg        31-Oct-2023 00:35               54208
+239-image-bind-convert-with-output_28_1.png        31-Oct-2023 00:35              658748
 

diff --git a/docs/notebooks/240-dolly-2-instruction-following-with-output.rst b/docs/notebooks/240-dolly-2-instruction-following-with-output.rst index 7dda0634e62c7a..c8f4ccd7a71d73 100644 --- a/docs/notebooks/240-dolly-2-instruction-following-with-output.rst +++ b/docs/notebooks/240-dolly-2-instruction-following-with-output.rst @@ -1,8 +1,6 @@ Instruction following using Databricks Dolly 2.0 and OpenVINO ============================================================= - - The instruction following is one of the cornerstones of the current generation of large language models(LLMs). Reinforcement learning with human preferences (`RLHF `__) and @@ -40,6 +38,8 @@ The tutorial consists of the following steps: - Download and convert the model from a public source using the `OpenVINO integration with Hugging Face Optimum `__. +- Compress model weights to INT8 with `OpenVINO + NNCF `__ - Create an instruction-following inference pipeline - Run instruction-following pipeline @@ -81,29 +81,34 @@ dataset can be found in `Databricks blog post `__ and `repo `__ +**Table of contents:** -.. _top: - -**Table of contents**: - -- `Prerequisites <#prerequisites>`__ - - `Select inference device <#select-inference-device>`__ +- `Prerequisites <#prerequisites>`__ -- `Download and Convert Model <#download-and-convert-model>`__ -- `Create an instruction-following inference pipeline <#create-an-instruction-following-inference-pipeline>`__ + - `Select inference device <#select-inference-device>`__ - - `Setup imports <#setup-imports>`__ - - `Prepare template for user prompt <#prepare-template-for-user-prompt>`__ - - `Helpers for output parsing <#helpers-for-output-parsing>`__ - - `Main generation function <#main-generation-function>`__ - - `Helpers for application <#helpers-for-application>`__ +- `Download and Convert + Model <#download-and-convert-model>`__ +- `NNCF model weights + compression <#nncf-model-weights-compression>`__ +- `Create an instruction-following inference + pipeline <#create-an-instruction-following-inference-pipeline>`__ -- `Run instruction-following pipeline <#run-instruction-following-pipeline>`__ + - `Setup imports <#setup-imports>`__ + - `Prepare template for user + prompt <#prepare-template-for-user-prompt>`__ + - `Helpers for output + parsing <#helpers-for-output-parsing>`__ + - `Main generation + function <#main-generation-function>`__ + - `Helpers for application <#helpers-for-application>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +- `Run instruction-following + pipeline <#run-instruction-following-pipeline>`__ +Prerequisites +-------------------------------------------------------- First, we should install the `Hugging Face Optimum `__ library @@ -115,36 +120,24 @@ documentation `__. .. code:: ipython3 - !pip install -q "diffusers>=0.16.1" "transformers>=4.28.0" - !pip install -q "git+https://github.com/huggingface/optimum-intel.git" datasets onnx onnxruntime gradio + %pip install -q "diffusers>=0.16.1" "transformers>=4.28.0" "openvino==2023.2.0.dev20230922" "nncf>=2.6.0" datasets onnx onnxruntime gradio + %pip install -q --upgrade "git+https://github.com/huggingface/optimum-intel.git" +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. parsed-literal:: - - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - from openvino.runtime import Core + import openvino as ov - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], - value='AUTO', + value='CPU', description='Device:', disabled=False, ) @@ -156,13 +149,12 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') -Download and Convert Model `⇑ <#top>`__ -############################################################################################################################### - +Download and Convert Model +--------------------------------------------------------------------- Optimum Intel can be used to load optimized models from the `Hugging Face Hub `__ and @@ -186,9 +178,10 @@ Below is an example of the Dolly model Model class initialization starts with calling ``from_pretrained`` method. When downloading and converting Transformers model, the -parameter ``from_transformers=True`` should be added. We can save the -converted model for the next usage with the ``save_pretrained`` method. -Tokenizer class and pipelines API are compatible with Optimum models. +parameter ``export=True`` should be added. For models where size more We +can save the converted model for the next usage with the +``save_pretrained`` method. Tokenizer class and pipelines API are +compatible with Optimum models. .. code:: ipython3 @@ -203,82 +196,111 @@ Tokenizer class and pipelines API are compatible with Optimum models. current_device = device.value + ov_config = {'PERFORMANCE_HINT': 'LATENCY', 'NUM_STREAMS': '1', "CACHE_DIR": ""} + if model_path.exists(): - ov_model = OVModelForCausalLM.from_pretrained(model_path, device=current_device) + ov_model = OVModelForCausalLM.from_pretrained(model_path, device=current_device, ov_config=ov_config) else: - ov_model = OVModelForCausalLM.from_pretrained(model_id, device=current_device, from_transformers=True) + ov_model = OVModelForCausalLM.from_pretrained(model_id, device=current_device, export=True, ov_config=ov_config, load_in_8bit=False) + ov_model.half() ov_model.save_pretrained(model_path) .. parsed-literal:: - 2023-07-17 14:47:00.308996: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-17 14:47:00.348466: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + 2023-10-09 11:07:22.234444: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-09 11:07:22.273745: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-17 14:47:01.039895: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-09 11:07:22.903943: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Compiling the model to CPU ... + + +NNCF model weights compression +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +NNCF `Weights Compression +algorithm `__ +compresses weights of a model to ``INT8``. This is an alternative to +`Quantization +algorithm `__ +that compresses both weights and activations. Weight compression is +effective in optimizing footprint and performance of large models where +the size of weights is significantly larger than the size of +activations, for example, in Large Language Models (LLMs) such as Dolly +2.0. Additionally, Weight Compression usually leads to almost no +accuracy drop. + +.. code:: ipython3 + + to_compress = widgets.Checkbox( + value=True, + description='INT8 Compression', + disabled=False, + ) + print("Click on checkbox for enabling / disabling weights compression") + to_compress .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + Click on checkbox for enabling / disabling weights compression -.. code:: - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - comet_ml is installed but `COMET_API_KEY` is not set. - The argument `from_transformers` is deprecated, and will be removed in optimum 2.0. Use `export` instead - Framework not specified. Using pt to export to ONNX. - Using framework PyTorch: 1.13.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py:504: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert batch_size > 0, "batch_size has to be defined and > 0" - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py:270: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.max_seq_len_cached: - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - op1 = operator(*args, **kwargs) - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode - Saving external data to one file... - Compiling the model... - Set CACHE_DIR to /tmp/tmpndw8_20n/model_cache - - -Create an instruction-following inference pipeline `⇑ <#top>`__ -############################################################################################################################### + +.. parsed-literal:: + + Checkbox(value=True, description='INT8 Compression') + + + +.. code:: ipython3 + + import gc + from optimum.intel import OVQuantizer + + compressed_model_path = Path(f'{model_path}_compressed') + + def calculate_compression_rate(model_path_ov, model_path_ov_compressed): + model_size_original = model_path_ov.with_suffix(".bin").stat().st_size / 2 ** 20 + model_size_compressed = model_path_ov_compressed.with_suffix(".bin").stat().st_size / 2 ** 20 + print(f"* Original IR model size: {model_size_original:.2f} MB") + print(f"* Compressed IR model size: {model_size_compressed:.2f} MB") + print(f"* Model compression rate: {model_size_original / model_size_compressed:.3f}") + + if to_compress.value: + if not compressed_model_path.exists(): + ov_model = OVModelForCausalLM.from_pretrained(model_id, device=current_device, export=True, ov_config=ov_config) + quantizer = OVQuantizer.from_pretrained(ov_model) + quantizer.quantize(save_directory=compressed_model_path, weights_only=True) + del quantizer + gc.collect() + + calculate_compression_rate(model_path / 'openvino_model.xml', compressed_model_path / 'openvino_model.xml') + ov_model = OVModelForCausalLM.from_pretrained(compressed_model_path, device=current_device, ov_config=ov_config) + + +.. parsed-literal:: + + * Original IR model size: 5297.21 MB + * Compressed IR model size: 2660.29 MB + * Model compression rate: 1.991 + + +.. parsed-literal:: + + Compiling the model to CPU ... +Create an instruction-following inference pipeline +--------------------------------------------------------------------------------------------- + The ``run_generation`` function accepts user-provided text input, tokenizes it, and runs the generation process. Text generation is an iterative process, where each next token depends on previously generated @@ -380,9 +402,8 @@ generated tokens without waiting until when the whole generation is finished using Streaming API, it adds a new token to the output queue and then prints them when they are ready. -Setup imports `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Setup imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -393,9 +414,8 @@ Setup imports `⇑ <#top>`__ from transformers import AutoTokenizer, TextIteratorStreamer import numpy as np -Prepare template for user prompt `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Prepare template for user prompt +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For effective generation, model expects to have input in specific format. The code below prepare template for passing user instruction @@ -425,12 +445,11 @@ into model with providing additional context. response_key=RESPONSE_KEY, ) -Helpers for output parsing `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Helpers for output parsing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Model was retrained to finish generation using special token ``### End``. -The code below find its id for using it as generation stop-criteria. +Model was retrained to finish generation using special token ``### End`` +the code below find its id for using it as generation stop-criteria. .. code:: ipython3 @@ -466,9 +485,8 @@ The code below find its id for using it as generation stop-criteria. except ValueError: pass -Main generation function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Main generation function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As it was discussed above, ``run_generation`` function is the entry point for starting generation. It gets provided input instruction as @@ -527,9 +545,8 @@ parameter and returns model response. start = perf_counter() return model_output, perf_text -Helpers for application `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Helpers for application +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For making interactive user interface we will use Gradio library. The code bellow provides useful functions used for communication with UI @@ -594,9 +611,8 @@ elements. ov_model.compile() return current_text -Run instruction-following pipeline `⇑ <#top>`__ -############################################################################################################################### - +Run instruction-following pipeline +----------------------------------------------------------------------------- Now, we are ready to explore model capabilities. This demo provides a simple interface that allows communication with a model using text @@ -618,7 +634,7 @@ generation parameters: .. code:: ipython3 - available_devices = Core().available_devices + ["AUTO"] + available_devices = ov.Core().available_devices + ["AUTO"] examples = [ "Give me recipe for pizza with pineapple", @@ -681,7 +697,7 @@ generation parameters: .. parsed-literal:: - /tmp/ipykernel_1272681/896135151.py:57: GradioDeprecationWarning: The `enable_queue` parameter has been deprecated. Please use the `.queue()` method instead. + /tmp/ipykernel_709262/2332051390.py:57: GradioDeprecationWarning: The `enable_queue` parameter has been deprecated. Please use the `.queue()` method instead. demo.launch(enable_queue=True, share=False, height=800) @@ -695,5 +711,5 @@ generation parameters: .. .. raw:: html -..
+..
diff --git a/docs/notebooks/241-riffusion-text-to-music-with-output.rst b/docs/notebooks/241-riffusion-text-to-music-with-output.rst index 121ec4aa61f53b..27952900e7624d 100644 --- a/docs/notebooks/241-riffusion-text-to-music-with-output.rst +++ b/docs/notebooks/241-riffusion-text-to-music-with-output.rst @@ -1,8 +1,6 @@ Text-to-Music generation using Riffusion and OpenVINO ===================================================== - - `Riffusion `__ is a latent text-to-image diffusion model capable of generating spectrogram images given any text input. These spectrograms can be converted into @@ -76,59 +74,31 @@ The STFT is invertible, so the original audio can be reconstructed from a spectrogram. This idea is a behind approach to using Riffusion for audio generation. -.. _top: - -**Table of contents**: - -- `Prerequisites <#prerequisites>`__ -- `Stable Diffusion pipeline in Optimum Intel <#stable-diffusion-pipeline-in-optimum-intel>`__ - - - `Select inference device <#select-inference-device>`__ - -- `Prepare postprocessing for reconstruction audio from spectrogram image <#prepare-postprocessing-for-reconstruction-audio-from-spectrogram-image>`__ -- `Run Inference pipeline <#run-inference-pipeline>`__ -- `Interactive demo <#interactive-demo>`__ - -Prerequisites `⇑ <#top>`__ -############################################################################################################################### - +**Table of contents:** -.. code:: ipython3 - - !pip install -q "diffusers>=0.16.1" "transformers>=4.28.0" - !pip install -q "git+https://github.com/huggingface/optimum-intel.git" onnx onnxruntime "gradio>=3.34.0" +- `Prerequisites <#prerequisites>`__ +- `Stable Diffusion pipeline in Optimum + Intel <#stable-diffusion-pipeline-in-optimum-intel>`__ -.. parsed-literal:: + - `Select inference device <#select-inference-device>`__ - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip +- `Prepare postprocessing for reconstruction audio from spectrogram + image <#prepare-postprocessing-for-reconstruction-audio-from-spectrogram-image>`__ +- `Run Inference pipeline <#run-inference-pipeline>`__ +- `Interactive demo <#interactive-demo>`__ +Prerequisites +------------------------------------------------------- .. code:: ipython3 - import sys - - if sys.platform == "darwin": - !pip install -q "torchaudio==0.13.1" --find-links https://download.pytorch.org/whl/torch_stable.html - else: - !pip install -q "torchaudio==0.13.1+cpu" --find-links https://download.pytorch.org/whl/torch_stable.html - - -.. parsed-literal:: - - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - -Stable Diffusion pipeline in Optimum Intel `⇑ <#top>`__ -############################################################################################################################### + %pip install -q "diffusers>=0.16.1" "transformers>=4.28.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchaudio + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" onnx onnxruntime "gradio>=3.34.0" "openvino>=2023.1.0" +Stable Diffusion pipeline in Optimum Intel +------------------------------------------------------------------------------------ As the riffusion model architecture is the same as Stable Diffusion, we can use it with the Stable Diffusion pipeline for text-to-image @@ -164,11 +134,10 @@ running. MODEL_ID = "riffusion/riffusion-model-v1" MODEL_DIR = Path("riffusion_pipeline") -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -209,14 +178,6 @@ Select device from dropdown list for running inference using OpenVINO: pipe = OVStableDiffusionPipeline.from_pretrained(MODEL_DIR, device=DEVICE, compile=False) -.. parsed-literal:: - - 2023-07-17 16:22:33.905103: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-17 16:22:33.943298: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-17 16:22:34.567997: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -225,167 +186,16 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - comet_ml is installed but `COMET_API_KEY` is not set. - - - -.. parsed-literal:: - - Downloading (…)ain/model_index.json: 0%| | 0.00/541 [00:00= 64: - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/diffusers/models/unet_2d_condition.py:977: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not return_dict: - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) - _C._jit_pass_onnx_graph_shape_type_inference( - Saving external data to one file... - Using framework PyTorch: 1.13.1+cpu - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) - _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) - _C._jit_pass_onnx_graph_shape_type_inference( - /home/ea/work/notebooks_convert/notebooks_conv_env/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) - _C._jit_pass_onnx_graph_shape_type_inference( - Using framework PyTorch: 1.13.1+cpu - - -Prepare postprocessing for reconstruction audio from spectrogram image. `⇑ <#top>`__ -############################################################################################################################### +Prepare postprocessing for reconstruction audio from spectrogram image +---------------------------------------------------------------------------------------------------------------- The riffusion model generates an audio spectrogram image, which can be used to reconstruct audio. However, the spectrogram images from the @@ -551,9 +361,8 @@ from a spectrogram image using Griffin-Lim Algorithm. return waveform -Run Inference pipeline `⇑ <#top>`__ -############################################################################################################################### - +Run Inference pipeline +---------------------------------------------------------------- The diagram below briefly describes the workflow of our pipeline @@ -606,9 +415,10 @@ reconstructed audio. .. parsed-literal:: - Compiling the text_encoder... Compiling the vae_decoder... Compiling the unet... + Compiling the vae_encoder... + Compiling the text_encoder... Now, we can test our generation. Function generate accepts text input @@ -626,12 +436,28 @@ without the other. More explanation of how it works can be found in this spectrogram, wav_path = generate("Techno beat") +.. parsed-literal:: + + `height` was set to 256 but the static model will output images of height 512.To fix the height, please reshape your model accordingly using the `.reshape()` method. + `width` was set to 256 but the static model will output images of width 512.To fix the width, please reshape your model accordingly using the `.reshape()` method. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:559: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + outputs = self.request(inputs, shared_memory=True) + + .. parsed-literal:: 0%| | 0/21 [00:00 - + Your browser does not support the audio element. -Interactive demo `⇑ <#top>`__ -############################################################################################################################### - +Interactive demo +---------------------------------------------------------- .. code:: ipython3 @@ -737,15 +562,15 @@ Interactive demo `⇑ <#top>`__ .. parsed-literal:: - /tmp/ipykernel_1282292/2438576232.py:56: GradioDeprecationWarning: The `style` method is deprecated. Please set these arguments in the constructor instead. + /tmp/ipykernel_180612/330468370.py:56: GradioDeprecationWarning: The `style` method is deprecated. Please set these arguments in the constructor instead. spectrogram_output.style(height=256) - /tmp/ipykernel_1282292/2438576232.py:63: GradioDeprecationWarning: The `enable_queue` parameter has been deprecated. Please use the `.queue()` method instead. + /tmp/ipykernel_180612/330468370.py:63: GradioDeprecationWarning: The `enable_queue` parameter has been deprecated. Please use the `.queue()` method instead. demo.launch(enable_queue=True, height=800) .. parsed-literal:: - Running on local URL: http://127.0.0.1:7861 + Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. @@ -753,5 +578,5 @@ Interactive demo `⇑ <#top>`__ .. .. raw:: html -..
+..
diff --git a/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.jpg b/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.jpg new file mode 100644 index 00000000000000..4a09c8bc4f4b96 --- /dev/null +++ b/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a31b8b6658c858732a376cb99705c80ef2b851ae60b17582cca830b9095954 +size 61095 diff --git a/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.png b/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.png new file mode 100644 index 00000000000000..f73070696526be --- /dev/null +++ b/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_14_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f983a7505aaca44b18d477608d8859f85e4fbc41f80216986b41e8f8115a55 +size 524399 diff --git a/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.jpg b/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.jpg deleted file mode 100644 index 605e464bf827a2..00000000000000 --- a/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eab8042875afdad8eb30bf5edd079896eb72338a3c65d682dc7d6aac489bc354 -size 55481 diff --git a/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.png b/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.png deleted file mode 100644 index 39b7e78b64a411..00000000000000 --- a/docs/notebooks/241-riffusion-text-to-music-with-output_files/241-riffusion-text-to-music-with-output_15_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41d293c15a42032bfa5b9f22f808f31b127276aae59afdd1fd36320a11e747ba -size 493898 diff --git a/docs/notebooks/241-riffusion-text-to-music-with-output_files/index.html b/docs/notebooks/241-riffusion-text-to-music-with-output_files/index.html index 495e93bde31e03..357065094ecfa4 100644 --- a/docs/notebooks/241-riffusion-text-to-music-with-output_files/index.html +++ b/docs/notebooks/241-riffusion-text-to-music-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/241-riffusion-text-to-music-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/241-riffusion-text-to-music-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/241-riffusion-text-to-music-with-output_files/


../
-241-riffusion-text-to-music-with-output_15_0.jpg   16-Aug-2023 01:31               55481
-241-riffusion-text-to-music-with-output_15_0.png   16-Aug-2023 01:31              493898
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/241-riffusion-text-to-music-with-output_files/


../
+241-riffusion-text-to-music-with-output_14_0.jpg   31-Oct-2023 00:35               61095
+241-riffusion-text-to-music-with-output_14_0.png   31-Oct-2023 00:35              524399
 

diff --git a/docs/notebooks/242-freevc-voice-conversion-with-output.rst b/docs/notebooks/242-freevc-voice-conversion-with-output.rst index 3e8ac5bdaaff0d..2fc050361e0c88 100644 --- a/docs/notebooks/242-freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/242-freevc-voice-conversion-with-output.rst @@ -1,8 +1,6 @@ High-Quality Text-Free One-Shot Voice Conversion with FreeVC and OpenVINO™ ========================================================================== - - `FreeVC `__ allows alter the voice of a source speaker to a target style, while keeping the linguistic content unchanged, without text annotation. @@ -30,54 +28,56 @@ devices. It consists of the following steps: - Convert models to OpenVINO Intermediate Representation. - Inference using only OpenVINO’s IR models. -.. _top: - -**Table of contents**: - -- `Prerequisites <#prerequisites>`__ -- `Imports and settings <#imports-and-settings>`__ -- `Convert Modes to OpenVINO Intermediate Representation <#convert-modes-to-openvino-intermediate-representation>`__ - - - `Convert Prior Encoder. <#convert-prior-encoder>`__ - - `Convert SpeakerEncoder <#convert-speakerencoder>`__ - - `Convert Decoder <#convert-decoder>`__ - -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +**Table of contents:** -This steps can be done manually or will be performed automatically during the execution of the notebook, but in -minimum necessary scope. -1. Clone this repo: +- `Pre-requisites <#pre-requisites>`__ +- `Imports and settings <#imports-and-settings>`__ +- `Convert Modes to OpenVINO Intermediate + Representation <#convert-modes-to-openvino-intermediate-representation>`__ -.. code-block:: sh + - `Convert Prior Encoder. <#convert-prior-encoder>`__ + - `Convert SpeakerEncoder <#convert-speakerencoder>`__ + - `Convert Decoder <#convert-decoder>`__ - git clone https://github.com/OlaWod/FreeVC.git +Pre-requisites +-------------------------------------------------------- -2. Download `WavLM-Large `__ - and put it under directory ``FreeVC/wavlm/``. -3. You can download the `VCTK `__ dataset. For - this example we download only two of them from - `Hugging Face FreeVC example `__. -4. Download `pretrained models `__ - and put it under directory ‘checkpoints’ (for current example only - ``freevc.pth`` are required). +This steps can be done manually or will be performed automatically +during the execution of the notebook, but in minimum necessary scope. 1. +Clone this repo: git clone https://github.com/OlaWod/FreeVC.git. 2. +Download +`WavLM-Large `__ +and put it under directory ``FreeVC/wavlm/``. 3. You can download the +`VCTK `__ dataset. For +this example we download only two of them from `Hugging Face FreeVC +example `__. 4. +Download `pretrained +models `__ +and put it under directory ‘checkpoints’ (for current example only +``freevc.pth`` are required). Install extra requirements .. code:: ipython3 - !pip install -q "librosa>=0.8.1" - !pip install -q "webrtcvad==2.0.10" - !pip install -q gradio + %pip install -q "librosa>=0.8.1" + %pip install -q "webrtcvad==2.0.10" + %pip install -q "openvino>=2023.1.0" + %pip install -q gradio .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + Check if FreeVC is installed and append its path to ``sys.path`` @@ -98,10 +98,10 @@ Check if FreeVC is installed and append its path to ``sys.path`` Cloning into 'FreeVC'... remote: Enumerating objects: 131, done. - remote: Counting objects: 100% (61/61), done. - remote: Compressing objects: 100% (40/40), done. - remote: Total 131 (delta 36), reused 21 (delta 21), pack-reused 70 - Receiving objects: 100% (131/131), 15.28 MiB | 4.14 MiB/s, done. + remote: Counting objects: 100% (65/65), done. + remote: Compressing objects: 100% (41/41), done. + remote: Total 131 (delta 39), reused 24 (delta 24), pack-reused 66 + Receiving objects: 100% (131/131), 15.28 MiB | 3.52 MiB/s, done. Resolving deltas: 100% (43/43), done. @@ -171,9 +171,8 @@ Check if FreeVC is installed and append its path to ``sys.path`` p226_002.wav: 0%| | 0.00/135k [00:00`__ -############################################################################################################################### - +Imports and settings +-------------------------------------------------------------- .. code:: ipython3 @@ -187,9 +186,7 @@ Imports and settings `⇑ <#top>`__ from scipy.io.wavfile import write from tqdm import tqdm - from openvino.runtime import Core, serialize - from openvino.runtime.ie_api import CompiledModel - from openvino.tools import mo + import openvino as ov import utils from models import SynthesizerTrn @@ -232,7 +229,7 @@ Models initialization .. parsed-literal:: - Loaded the voice encoder model on cpu in 0.01 seconds. + Loaded the voice encoder model on cpu in 0.00 seconds. Reading dataset settings @@ -272,29 +269,27 @@ Inference .. parsed-literal:: - 2it [00:01, 1.27it/s] + 2it [00:01, 1.30it/s] Result audio files should be available in ‘outputs/freevc’ -Convert Modes to OpenVINO Intermediate Representation `⇑ <#top>`__ -#################################################################### +Convert Modes to OpenVINO Intermediate Representation +=============================================================================================== Convert each model to ONNX format and then use the model conversion Python API to convert the ONNX model to OpenVINO IR, with FP16 -precision. The ``mo.convert_model`` function accepts the path to a model +precision. The ``ov.convert_model`` function accepts the path to a model and returns the OpenVINO Model class instance which represents this model. The obtained model is ready to use and to be loaded on a device using ``compile_model`` or can be saved on a disk using the -``serialize`` function. The ``read_model`` method loads a saved model -from a disk. For more information about model conversion, see this -`page `__. - -Convert Prior Encoder. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -First we convert WavLM model, as a part of Convert Prior Encoder, to the ONNX format, then to OpenVINO’s IR -format. We keep the original name of the model in code: ``cmodel``. +``ov.save_model`` function. The ``read_model`` method loads a saved +model from a disk. For more information about model conversion, see this +`page `__. +### Convert Prior Encoder. +First we convert WavLM model, as a part of Convert Prior Encoder, to the +ONNX format, then to OpenVINO’s IR format. We keep the original name of +the model in code: ``cmodel``. .. code:: ipython3 @@ -336,19 +331,19 @@ Convert ``cmodel`` to ONNX. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/WavLM.py:352: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/WavLM.py:352: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if mask: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert embed_dim == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert list(query.size()) == [tgt_len, bsz, embed_dim] - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert key_bsz == bsz - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert src_len, bsz == value.shape[:2] - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/WavLM.py:372: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/WavLM.py:372: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! feature = res["features"] if ret_conv else res["x"] - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/WavLM.py:373: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/242-freevc-voice-conversion/FreeVC/wavlm/WavLM.py:373: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ret_layer_results: @@ -356,11 +351,11 @@ Converting to OpenVINO’s IR format. .. code:: ipython3 - core = Core() + core = ov.Core() if not ir_cmodel_path.exists(): - ir_cmodel = mo.convert_model(onnx_cmodel_path, compress_to_fp16=True) - serialize(ir_cmodel, str(ir_cmodel_path)) + ir_cmodel = ov.convert_model(onnx_cmodel_path) + ov.save_model(ir_cmodel, ir_cmodel_path) else: ir_cmodel = core.read_model(ir_cmodel_path) @@ -393,8 +388,8 @@ Select device from dropdown list for running inference using OpenVINO compiled_cmodel = core.compile_model(ir_cmodel, device.value) -Convert ``SpeakerEncoder`` `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert ``SpeakerEncoder`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Converting to ONNX format. @@ -427,13 +422,13 @@ Converting to ONNX format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:4315: UserWarning: Exporting a model to ONNX with a batch_size other than 1, with a variable length with LSTM can cause an error when running the ONNX model with a different batch size. Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:4315: UserWarning: Exporting a model to ONNX with a batch_size other than 1, with a variable length with LSTM can cause an error when running the ONNX model with a different batch size. Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( @@ -442,8 +437,8 @@ Converting to OpenVINO’s IR format. .. code:: ipython3 if not ir_smodel_path.exists(): - ir_smodel = mo.convert_model(onnx_smodel_path, compress_to_fp16=True) - serialize(ir_smodel, str(ir_smodel_path)) + ir_smodel = ov.convert_model(onnx_smodel_path) + ov.save_model(ir_smodel, ir_smodel_path) else: ir_smodel = core.read_model(ir_smodel_path) @@ -509,7 +504,7 @@ based on ``speaker_encoder.voice_encoder.SpeakerEncoder`` class methods return wav_slices, mel_slices - def embed_utterance(wav: np.ndarray, smodel: CompiledModel, return_partials=False, rate=1.3, min_coverage=0.75): + def embed_utterance(wav: np.ndarray, smodel: ov.CompiledModel, return_partials=False, rate=1.3, min_coverage=0.75): """ Computes an embedding for a single utterance. The utterance is divided in partial utterances and an embedding is computed for each. The complete utterance embedding is the @@ -577,9 +572,8 @@ Then compile model. compiled_smodel = core.compile_model(ir_smodel, device.value) -Convert Decoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Convert Decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In the same way export ``SynthesizerTrn`` model, that implements decoder function, to ONNX format and convert it to OpenVINO IR format. @@ -613,8 +607,8 @@ function, to ONNX format and convert it to OpenVINO IR format. torch.onnx.export(net_g, (dummy_input_1, dummy_input_2), onnx_net_g_path, input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes) if not ir_net_g_path.exists(): - ir_net_g_model = mo.convert_model(onnx_net_g_path, compress_to_fp16=True) - serialize(ir_net_g_model, str(ir_net_g_path)) + ir_net_g_model = ov.convert_model(onnx_net_g_path) + ov.save_model(ir_net_g_model, ir_net_g_path) else: ir_net_g_model = core.read_model(ir_net_g_path) @@ -673,7 +667,7 @@ And now we can check inference using only IR models. .. parsed-literal:: - 2it [00:02, 1.45s/it] + 2it [00:02, 1.39s/it] Result audio files should be available in ‘outputs/freevc’ and you can @@ -734,7 +728,7 @@ Result audio: @@ -780,15 +774,15 @@ inference. Use rate corresponding to the value of .. parsed-literal:: - /tmp/ipykernel_2082705/3932271335.py:4: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components + /tmp/ipykernel_1186436/3932271335.py:4: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components audio1 = gr.inputs.Audio(label="Source Audio", type='filepath') - /tmp/ipykernel_2082705/3932271335.py:4: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect + /tmp/ipykernel_1186436/3932271335.py:4: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect audio1 = gr.inputs.Audio(label="Source Audio", type='filepath') - /tmp/ipykernel_2082705/3932271335.py:5: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components + /tmp/ipykernel_1186436/3932271335.py:5: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components audio2 = gr.inputs.Audio(label="Reference Audio", type='filepath') - /tmp/ipykernel_2082705/3932271335.py:5: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect + /tmp/ipykernel_1186436/3932271335.py:5: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect audio2 = gr.inputs.Audio(label="Reference Audio", type='filepath') - /tmp/ipykernel_2082705/3932271335.py:6: GradioDeprecationWarning: Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components + /tmp/ipykernel_1186436/3932271335.py:6: GradioDeprecationWarning: Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components outputs = gr.outputs.Audio(label="Output Audio", type='filepath') @@ -802,7 +796,7 @@ inference. Use rate corresponding to the value of .. .. raw:: html -..
+..
.. code:: ipython3 diff --git a/docs/notebooks/243-tflite-selfie-segmentation-with-output.rst b/docs/notebooks/243-tflite-selfie-segmentation-with-output.rst index f989c47a6e0bd8..5bf7ab1f87b2aa 100644 --- a/docs/notebooks/243-tflite-selfie-segmentation-with-output.rst +++ b/docs/notebooks/243-tflite-selfie-segmentation-with-output.rst @@ -1,8 +1,6 @@ Selfie Segmentation using TFLite and OpenVINO ============================================= - - The Selfie segmentation pipeline allows developers to easily separate the background from users within a scene and focus on what matters. Adding cool effects to selfies or inserting your users into interesting @@ -36,44 +34,49 @@ The tutorial consists of following steps: 2. Run inference on the image. 3. Run interactive background blurring demo on video. -.. _top: - -**Table of contents**: +**Table of contents:** -- `Prerequisites <#prerequisites>`__ - - `Install required dependencies <#install-required-dependencies>`__ - - `Download pre-trained model and test image <#download-pre-trained-model-and-test-image>`__ +- `Prerequisites <#prerequisites>`__ -- `Convert Tensorflow Lite model to OpenVINO IR format <#convert-tensorflow-lite-model-to-openvino-ir-format>`__ -- `Run OpenVINO model inference on image <#run-openvino-model-inference-on-image>`__ + - `Install required + dependencies <#install-required-dependencies>`__ + - `Download pretrained model and test + image <#download-pretrained-model-and-test-image>`__ - - `Load model <#load-model>`__ - - `Prepare input image <#prepare-input-image>`__ - - `Run model inference <#run-model-inference>`__ - - `Postprocess and visualize inference results <#postprocess-and-visualize-inference-results>`__ +- `Convert Tensorflow Lite model to OpenVINO IR + format <#convert-tensorflow-lite-model-to-openvino-ir-format>`__ +- `Run OpenVINO model inference on + image <#run-openvino-model-inference-on-image>`__ -- `Interactive background blurring demo on video <#interactive-background-blurring-demo-on-video>`__ + - `Load model <#load-model>`__ + - `Prepare input image <#prepare-input-image>`__ + - `Run model inference <#run-model-inference>`__ + - `Postprocess and visualize inference + results <#postprocess-and-visualize-inference-results>`__ - - `Run Live Background Blurring <#run-live-background-blurring>`__ +- `Interactive background blurring demo on + video <#interactive-background-blurring-demo-on-video>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### + - `Run Live Background + Blurring <#run-live-background-blurring>`__ +Prerequisites +------------------------------------------------------- -Install required dependencies `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Install required dependencies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" "matplotlib" "opencv-python" + %pip install -q "openvino>=2023.1.0" "matplotlib" "opencv-python" .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -83,9 +86,8 @@ Install required dependencies `⇑ <#top>`__ filename='notebook_utils.py' ); -Download pretrained model and test image `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Download pretrained model and test image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -108,13 +110,12 @@ Download pretrained model and test image `⇑ <#top>`__ .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/243-tflite-selfie-segmentation/selfie_multiclass_256x256.tflite') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/243-tflite-selfie-segmentation/selfie_multiclass_256x256.tflite') -Convert Tensorflow Lite model to OpenVINO IR format `⇑ <#top>`__ -############################################################################################################################### - +Convert Tensorflow Lite model to OpenVINO IR format +--------------------------------------------------------------------------------------------- Starting from the 2023.0.0 release, OpenVINO supports TFLite model conversion. However TFLite model format can be directly passed in @@ -126,31 +127,27 @@ capabilities <002-openvino-api-with-output.html>`__), it is recommended to convert model to OpenVINO Intermediate Representation format to apply additional optimizations (e.g. weights compression to FP16 format). To convert the TFLite model to OpenVINO IR, model -conversion Python API can be used. The ``mo.convert_model`` function +conversion Python API can be used. The ``ov.convert_model`` function accepts a path to the TFLite model and returns the OpenVINO Model class instance which represents this model. The obtained model is ready to use and to be loaded on the device using ``compile_model`` or can be saved -on a disk using the ``serialize`` function reducing loading time for the -next running. Optionally, we can apply compression to the FP16 model -weights, using the ``compress_to_fp16=True`` option and integrate -preprocessing, using this approach. For more information about model -conversion, see this -`page `__. +on a disk using the ``ov.save_model`` function reducing loading time for +the next running. For more information about model conversion, see this +`page `__. For TensorFlow Lite, refer to the `models -support `__. +support `__. .. code:: ipython3 - from openvino.tools import mo - from openvino.runtime import Core, serialize + import openvino as ov - core = Core() + core = ov.Core() ir_model_path = tflite_model_path.with_suffix(".xml") if not ir_model_path.exists(): - ov_model = mo.convert_model(tflite_model_path, compress_to_fp16=True) - serialize(ov_model, ir_model_path) + ov_model = ov.convert_model(tflite_model_path) + ov.save_model(ov_model, ir_model_path) else: ov_model = core.read_model(ir_model_path) @@ -192,17 +189,15 @@ and others. Each value in the output tensor represents of probability that the pixel belongs to the specified class. We can use the ``argmax`` operation to get the label with the highest probability for each pixel. -Run OpenVINO model inference on image `⇑ <#top>`__ -############################################################################################################################### - +Run OpenVINO model inference on image +------------------------------------------------------------------------------- Let’s see the model in action. For running the inference model with OpenVINO we should load the model on the device first. Please use the next dropdown list for the selection inference device. -Load model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Load model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -230,9 +225,8 @@ Load model `⇑ <#top>`__ compiled_model = core.compile_model(ov_model, device.value) -Prepare input image `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Prepare input image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The model accepts an image with size 256x256, we need to resize our input image to fit it in the model input tensor. Usually, segmentation @@ -286,17 +280,15 @@ Additionally, the input image is represented as an RGB image in UINT8 # Convert input data from uint8 [0, 255] to float32 [0, 1] range and add batch dimension normalized_img = np.expand_dims(padded_img.astype(np.float32) / 255, 0) -Run model inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 out = compiled_model(normalized_img)[0] -Postprocess and visualize inference results `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Postprocess and visualize inference results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The model predicts segmentation probabilities mask with the size 256 x 256, we need to apply postprocessing to get labels with the highest @@ -401,9 +393,8 @@ Visualize obtained result .. image:: 243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_25_0.png -Interactive background blurring demo on video `⇑ <#top>`__ -############################################################################################################################### - +Interactive background blurring demo on video +--------------------------------------------------------------------------------------- The following code runs model inference on a video: @@ -413,13 +404,12 @@ The following code runs model inference on a video: import time from IPython import display from typing import Union - from openvino.runtime import Model from notebook_utils import VideoPlayer # Main processing function to run background blurring - def run_background_blurring(source:Union[str, int] = 0, flip:bool = False, use_popup:bool = False, skip_first_frames:int = 0, model:Model = ov_model, device:str = "CPU"): + def run_background_blurring(source:Union[str, int] = 0, flip:bool = False, use_popup:bool = False, skip_first_frames:int = 0, model:ov.Model = ov_model, device:str = "CPU"): """ Function for running background blurring inference on video Parameters: @@ -427,7 +417,7 @@ The following code runs model inference on a video: flip (bool, *optional*, False): flip output video, used for front-camera video processing use_popup (bool, *optional*, False): use popup window for avoid flickering skip_first_frames (int, *optional*, 0): specified number of frames will be skipped in video processing - model (Model): OpenVINO model for inference + model (ov.Model): OpenVINO model for inference device (str): inference device Returns: None @@ -525,9 +515,8 @@ The following code runs model inference on a video: if use_popup: cv2.destroyAllWindows() -Run Live Background Blurring `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Live Background Blurring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use a webcam as the video input. By default, the primary webcam is set with \ ``source=0``. If you have multiple webcams, each one will be @@ -536,16 +525,13 @@ using a front-facing camera. Some web browsers, especially Mozilla Firefox, may cause flickering. If you experience flickering, set \ ``use_popup=True``. -.. note:: - - To use this notebook with a webcam, you need to run the + **NOTE**: To use this notebook with a webcam, you need to run the notebook on a computer with a webcam. If you run the notebook on a remote server (for example, in Binder or Google Colab service), the webcam will not work. By default, the lower cell will run model inference on a video file. If you want to try to live inference on your webcam set ``WEBCAM_INFERENCE = True`` - .. code:: ipython3 WEBCAM_INFERENCE = False diff --git a/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_25_0.png b/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_25_0.png index 7d78a79936fce2..84c4b4e4d02638 100644 --- a/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_25_0.png +++ b/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39d0f6720de285c9bf1abf39f5c51b0889eda8aa2ddbb62fb613fa8c248a049e -size 512588 +oid sha256:41a285eebb6f6f42b73d47fd6d08f942f3c09c1e8a50e2498ef8be0bfb488d95 +size 512563 diff --git a/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_33_0.png b/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_33_0.png index a35b90fd78831d..894e801d44cbcf 100644 --- a/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_33_0.png +++ b/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/243-tflite-selfie-segmentation-with-output_33_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b6e654bff9d785a50a4a40d4ae7b06168a9439fda6d2e77ed927030cb7a8f68 -size 14356 +oid sha256:02e707e438961cb298edeea4712458f9b6ba0a7edc2ec6c1b6d3b8f2883fffb8 +size 14279 diff --git a/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/index.html b/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/index.html index 6bb3be94ceeddc..c3b7f01c7db1fe 100644 --- a/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/index.html +++ b/docs/notebooks/243-tflite-selfie-segmentation-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/243-tflite-selfie-segmentation-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/243-tflite-selfie-segmentation-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/243-tflite-selfie-segmentation-with-output_files/


../
-243-tflite-selfie-segmentation-with-output_25_0..> 16-Aug-2023 01:31              512588
-243-tflite-selfie-segmentation-with-output_33_0..> 16-Aug-2023 01:31               14356
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/243-tflite-selfie-segmentation-with-output_files/


../
+243-tflite-selfie-segmentation-with-output_25_0..> 31-Oct-2023 00:35              512563
+243-tflite-selfie-segmentation-with-output_33_0..> 31-Oct-2023 00:35               14279
 

diff --git a/docs/notebooks/244-named-entity-recognition-with-output.rst b/docs/notebooks/244-named-entity-recognition-with-output.rst index 9d49188f04a6f7..320bac5661a9cf 100644 --- a/docs/notebooks/244-named-entity-recognition-with-output.rst +++ b/docs/notebooks/244-named-entity-recognition-with-output.rst @@ -1,8 +1,6 @@ Named entity recognition with OpenVINO™ ======================================= - - The Named Entity Recognition(NER) is a natural language processing method that involves the detecting of key information in the unstructured text and categorizing it into pre-defined categories. These @@ -27,53 +25,32 @@ To simplify the user experience, the `Hugging Face Optimum `__ library is used to convert the model to OpenVINO™ IR format and quantize it. -.. _top: - -**Table of contents**: - +**Table of contents:** +--- - `Prerequisites <#prerequisites>`__ -- `Download the NER model <#download-the-ner-model>`__ -- `Quantize the model, using Hugging Face Optimum API <#quantize-the-model-using-hugging-face-optimum-api>`__ -- `Prepare demo for Named Entity Recognition OpenVINO Runtime <#prepare-demo-for-named-entity-recognition-openvino-runtime>`__ -- `Compare the Original and Quantized Models <#compare-the-original-and-quantized-models>`__ - - `Compare performance <#compare-performance>`__ - - `Compare size of the models <#compare-size-of-the-models>`__ +- `Download the NER model <#download-the-ner-model>`__ +- `Quantize the model, using Hugging Face Optimum + API <#quantize-the-model-using-hugging-face-optimum-api>`__ +- `Prepare demo for Named Entity Recognition OpenVINO + Runtime <#prepare-demo-for-named-entity-recognition-openvino-runtime>`__ +- `Compare the Original and Quantized + Models <#compare-the-original-and-quantized-models>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### + - `Compare performance <#compare-performance>`__ + - `Compare size of the + models <#compare-size-of-the-models>`__ +Prerequisites +------------------------------------------------------- .. code:: ipython3 - !pip install -q "diffusers>=0.17.1" "openvino-dev>=2023.0.0" "nncf>=2.5.0" "gradio" "onnx>=1.11.0" "onnxruntime>=1.14.0" "optimum-intel>=1.9.1" "transformers>=4.31.0" - - -.. parsed-literal:: - - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - audiocraft 0.0.2a2 requires xformers, which is not installed. - audiocraft 0.0.2a2 requires torch>=2.0.0, but you have torch 1.13.1+cpu which is incompatible. - audiocraft 0.0.2a2 requires torchaudio>=2.0.0, but you have torchaudio 0.13.1+cpu which is incompatible. - deepfloyd-if 1.0.2rc0 requires accelerate~=0.15.0, but you have accelerate 0.22.0.dev0 which is incompatible. - deepfloyd-if 1.0.2rc0 requires diffusers~=0.16.0, but you have diffusers 0.18.2 which is incompatible. - deepfloyd-if 1.0.2rc0 requires transformers~=4.25.1, but you have transformers 4.30.2 which is incompatible. - paddleclas 2.5.1 requires faiss-cpu==1.7.1.post2, but you have faiss-cpu 1.7.4 which is incompatible. - paddleclas 2.5.1 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. - ppgan 2.1.0 requires librosa==0.8.1, but you have librosa 0.9.2 which is incompatible. - ppgan 2.1.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.7.0.72 which is incompatible. - pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 3.20.3 which is incompatible. - spacy 3.5.2 requires pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4, but you have pydantic 2.0.3 which is incompatible. - thinc 8.1.10 requires pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4, but you have pydantic 2.0.3 which is incompatible. - visualdl 2.5.2 requires gradio==3.11.0, but you have gradio 3.36.1 which is incompatible. - - [notice] A new release of pip is available: 23.1.2 -> 23.2 - [notice] To update, run: pip install --upgrade pip - - -Download the NER model `⇑ <#top>`__ -############################################################################################################################### + %pip install -q "diffusers>=0.17.1" "openvino>=2023.1.0" "nncf>=2.5.0" "gradio" "onnx>=1.11.0" "onnxruntime>=1.14.0" "transformers>=4.31.0" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" +Download the NER model +---------------------------------------------------------------- We load the `distilbert-base-cased-finetuned-conll03-english `__ @@ -98,39 +75,16 @@ method. tokenizer = AutoTokenizer.from_pretrained(model_id) - .. parsed-literal:: - Downloading (…)lve/main/config.json: 0%| | 0.00/954 [00:00`__ -############################################################################################################################### +Quantize the model, using Hugging Face Optimum API +-------------------------------------------------------------------------------------------- Post-training static quantization introduces an additional calibration step where data is fed through the network in order to compute the @@ -187,14 +141,6 @@ corresponding ``OVModelForXxx`` class. So we use optimized_model = OVModelForTokenClassification.from_pretrained(quantized_ner_model_dir) -.. parsed-literal:: - - 2023-07-17 14:40:49.402855: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-17 14:40:49.442756: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-17 14:40:50.031065: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -203,65 +149,16 @@ corresponding ``OVModelForXxx`` class. So we use .. parsed-literal:: No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - comet_ml is installed but `COMET_API_KEY` is not set. - - - -.. parsed-literal:: - - Downloading builder script: 0%| | 0.00/9.57k [00:00`__ -############################################################################################################################### - +Prepare demo for Named Entity Recognition OpenVINO Runtime +---------------------------------------------------------------------------------------------------- As the Optimum Inference models are API compatible with Hugging Face Transformers models, we can just use ``pipleine()`` from `Hugging Face @@ -421,9 +290,9 @@ text. if __name__ == "__main__": try: - demo.launch(debug=True) + demo.launch(debug=False) except Exception: - demo.launch(share=True, debug=True) + demo.launch(share=True, debug=False) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ @@ -431,8 +300,6 @@ text. .. parsed-literal:: - - Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. @@ -441,7 +308,7 @@ text. .. .. raw:: html -..
+..
.. parsed-literal:: @@ -449,18 +316,16 @@ text. Keyboard interruption in main thread... closing server. -Compare the Original and Quantized Models `⇑ <#top>`__ -############################################################################################################################### - +Compare the Original and Quantized Models +----------------------------------------------------------------------------------- Compare the original `distilbert-base-cased-finetuned-conll03-english `__ model with quantized and converted to OpenVINO IR format models to see the difference. -Compare performance `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Compare performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -495,12 +360,12 @@ Compare performance `⇑ <#top>`__ .. parsed-literal:: - Median inference time of quantized model: 0.008888308017048985 + Median inference time of quantized model: 0.008145123501890339 + Median inference time of original model: 0.09339697850373341 -Compare size of the models `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Compare size of the models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -508,3 +373,10 @@ Compare size of the models `⇑ <#top>`__ print(f'Size of original model in Bytes is {Path(original_ner_model_dir, "pytorch_model.bin").stat().st_size}') print(f'Size of quantized model in Bytes is {Path(quantized_ner_model_dir, "openvino_model.bin").stat().st_size}') + + +.. parsed-literal:: + + Size of original model in Bytes is 260824741 + Size of quantized model in Bytes is 133539000 + diff --git a/docs/notebooks/245-typo-detector-with-output.rst b/docs/notebooks/245-typo-detector-with-output.rst index a9248929307134..42ebf79363741a 100644 --- a/docs/notebooks/245-typo-detector-with-output.rst +++ b/docs/notebooks/245-typo-detector-with-output.rst @@ -32,15 +32,60 @@ positives and false negatives. The model has been pretrained on the `NeuSpell `__ dataset. -Imports -~~~~~~~ +**Table of contents:** + + +- `Imports <#imports>`__ +- `Methods <#methods>`__ + + - `1. Using the Hugging Face Optimum + library <#-using-the-hugging-face-optimum-library>`__ + - `2. Converting the model to OpenVINO + IR <#-converting-the-model-to-openvino-ir>`__ + +- `Select inference device <#select-inference-device>`__ +- `1. Hugging Face Optimum Intel + library <#-hugging-face-optimum-intel-library>`__ + + - `Load the model <#load-the-model>`__ + - `Load the tokenizer <#load-the-tokenizer>`__ + +- `2. Converting the model to OpenVINO + IR <#-converting-the-model-to-openvino-ir>`__ + + - `Load the Pytorch model <#load-the-pytorch-model>`__ + - `Converting to OpenVINO IR <#converting-to-openvino-ir>`__ + - `Inference <#inference>`__ + +- `Helper Functions <#helper-functions>`__ + +.. code:: ipython3 + + %pip install -q "diffusers>=0.17.1" "openvino>=2023.1.0" "nncf>=2.5.0" "gradio" "onnx>=1.11.0" "onnxruntime>=1.14.0" "transformers>=4.31.0" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + + +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 4.24.4 which is incompatible. + pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 4.24.4 which is incompatible. + tensorflow 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.8.0 which is incompatible. + tf2onnx 1.15.1 requires protobuf~=3.20.2, but you have protobuf 4.24.4 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 from transformers import AutoConfig, AutoTokenizer, AutoModelForTokenClassification, pipeline from pathlib import Path import numpy as np - import torch import re from typing import List, Dict import time @@ -48,14 +93,14 @@ Imports .. parsed-literal:: - 2023-08-16 01:01:23.631663: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-16 01:01:23.665285: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-31 00:01:48.550893: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-31 00:01:48.584584: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-16 01:01:24.208556: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-31 00:01:49.140201: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -Methods -~~~~~~~ +Methods +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The notebook provides two methods to run the inference of typo detector with OpenVINO runtime, so that you can experience both calling the API @@ -63,8 +108,8 @@ of Optimum with OpenVINO Runtime included, and loading models in other frameworks, converting them to OpenVINO IR format, and running inference with OpenVINO Runtime. -1. Using the `Hugging Face Optimum `__ library -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' +1. Using the `Hugging Face Optimum `__ library +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' The Hugging Face Optimum API is a high-level API that allows us to convert models from the Hugging Face Transformers library to the @@ -72,14 +117,11 @@ OpenVINO™ IR format. Compiled models in OpenVINO IR format can be loaded using Optimum. Optimum allows the use of optimization on targeted hardware. -2. Converting the model to ONNX and then to OpenVINO IR -''''''''''''''''''''''''''''''''''''''''''''''''''''''' +2. Converting the model to OpenVINO IR +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' -First the Pytorch model is converted to the ONNX format and then the -`Model -Optimizer `__ -tool will be used to convert to `OpenVINO IR -format `__. This +The Pytorch model is converted to `OpenVINO IR +format `__. This method provides much more insight to how to set up a pipeline from model loading to model converting, compiling and running inference with OpenVINO, so that you could conveniently use OpenVINO to optimize and @@ -95,8 +137,8 @@ methods | Load models from Optimum, an | Load model from transformers | | extension of transformers | | +-----------------------------------+----------------------------------+ -| Load the model in OpenVINO IR | Convert to ONNX and then to | -| format on the fly | OpenVINO IR | +| Load the model in OpenVINO IR | Convert to OpenVINO IR | +| format on the fly | | +-----------------------------------+----------------------------------+ | Load the compiled model by | Compile the OpenVINO IR and run | | default | inference with OpenVINO Runtime | @@ -105,17 +147,17 @@ methods | inference with OpenVINO Runtime | | +-----------------------------------+----------------------------------+ -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - from openvino.runtime import Core + import openvino as ov - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -135,8 +177,8 @@ Select device from dropdown list for running inference using OpenVINO: -1. Hugging Face Optimum Intel library -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +1. Hugging Face Optimum Intel library +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For this method, we need to install the ``Hugging Face Optimum Intel library`` accelerated by OpenVINO @@ -150,16 +192,6 @@ Face Transformers models. This means we need just replace ``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` class. -.. code:: ipython3 - - !pip install -q "diffusers>=0.17.1" "openvino-dev>=2023.0.0" "nncf>=2.5.0" "gradio" "onnx>=1.11.0" "onnxruntime>=1.14.0" "optimum-intel>=1.9.1" "transformers>=4.31.0" - - -.. parsed-literal:: - - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - - Import required model class .. code:: ipython3 @@ -175,10 +207,12 @@ Import required model class .. parsed-literal:: No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + warnings.warn( -Load the model -'''''''''''''' +Load the model +'''''''''''''''''''''''''''''''''''''''''''''''''''''''' From the ``OVModelForTokenCLassification`` class we will import the relevant pre-trained model. To load a Transformers model and convert it @@ -200,18 +234,30 @@ your model. model.save_pretrained(model_dir) -.. code:: +.. parsed-literal:: + + Framework not specified. Using pt to export to ONNX. + Using the export variant default. Available variants are: + - default: The default ONNX variant. + Using framework PyTorch: 1.13.1+cpu + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: - Framework not specified. Using pt to export to ONNX. - Using framework PyTorch: 1.13.1+cpu - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - op1 = operator(*args, **kwargs) - Compiling the model... - Set CACHE_DIR to /tmp/tmpmevydbbe/model_cache + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + op1 = operator(\*args, \*\*kwargs) + Compiling the model to AUTO ... + Set CACHE_DIR to /tmp/tmpuz_oy32n/model_cache -Load the tokenizer -'''''''''''''''''' +Load the tokenizer +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Text Preprocessing cleans the text-based input data so it can be fed into the model. Tokenization splits paragraphs and sentences into @@ -316,14 +362,14 @@ Let’s run a demo using the Hugging Face Optimum API. [Input]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. [Detected]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. ---------------------------------------------------------------------------------------------------------------------------------- - Time elapsed: 0.20883584022521973 + Time elapsed: 0.20258617401123047 -2. Converting the model to ONNX and then to OpenVINO IR -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +2. Converting the model to OpenVINO IR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Load the Pytorch model -'''''''''''''''''''''' +Load the Pytorch model +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Use the ``AutoModelForTokenClassification`` class to load the pretrained pytorch model. @@ -343,79 +389,34 @@ pytorch model. model = AutoModelForTokenClassification.from_pretrained(model_id, config=config) model.save_pretrained(model_dir) -Converting to `ONNX `__ -''''''''''''''''''''''''''''''''''''''''' - -``ONNX`` is an open format built to represent machine learning models. -ONNX defines a common set of operators - the building blocks of machine -learning and deep learning models - and a common file format to enable -AI developers to use models with a variety of frameworks, tools, -runtimes, and compilers. We need to convert our model from PyTorch to -ONNX. In order to perform the operation, we use the torch.onnx.export -function to `convert a Hugging Face -model `__ -to its respective ONNX format. +Converting to OpenVINO IR +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' .. code:: ipython3 - onnx_model = "typo_detect.onnx" - - onnx_model_path = Path(model_dir) / onnx_model + ov_model_path = Path(model_dir) / "typo_detect.xml" dummy_model_input = tokenizer("This is a sample", return_tensors="pt") - - torch.onnx.export( - model, - tuple(dummy_model_input.values()), - f=onnx_model_path, - input_names=['input_ids', 'attention_mask'], - output_names=['logits'], - dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}, - 'attention_mask': {0: 'batch_size', 1: 'sequence'}, - 'logits': {0: 'batch_size', 1: 'sequence'}}, - ) - -Model Optimizer -''''''''''''''' + ov_model = ov.convert_model(model, example_input=dict(dummy_model_input)) + ov.save_model(ov_model, ov_model_path) -`Model -Optimizer `__ -is a cross-platform command-line tool that facilitates the transition -between training and deployment environments, performs static model -analysis, and adjusts deep learning models for optimal execution on -end-point target devices. Model Optimizer converts the model to the -OpenVINO Intermediate Representation format (IR), which you can infer -later with `OpenVINO -runtime `__. - -.. code:: ipython3 - - from openvino.tools.mo import convert_model - - ov_model = convert_model(onnx_model_path) - -Inference -''''''''' +Inference +''''''''''''''''''''''''''''''''''''''''''''''''''' OpenVINO™ Runtime Python API is used to compile the model in OpenVINO IR -format. The -`Core `__ -class from the ``openvino.runtime`` module is imported first. This class -provides access to the OpenVINO Runtime API. The ``core`` object, which -is an instance of the ``Core`` class, represents the API and it is used -to compile the model. The output layer is extracted from the compiled -model as it is needed for inference. +format. The Core class from the ``openvino`` module is imported first. +This class provides access to the OpenVINO Runtime API. The ``core`` +object, which is an instance of the ``Core`` class, represents the API +and it is used to compile the model. The output layer is extracted from +the compiled model as it is needed for inference. .. code:: ipython3 - from openvino.runtime import Core - - core = Core() compiled_model = core.compile_model(ov_model, device.value) output_layer = compiled_model.output(0) -Helper Functions -~~~~~~~~~~~~~~~~ +Helper Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -589,5 +590,5 @@ Let’s run a demo using the converted OpenVINO IR model. [Input]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. [Detected]: I have been stuying for my math exam all week, but I'm stil not very confidet that I will pass it, because there are so many formuals to remeber. ---------------------------------------------------------------------------------------------------------------------------------- - Time elapsed: 0.1267991065979004 + Time elapsed: 0.10554790496826172 diff --git a/docs/notebooks/246-depth-estimation-videpth-with-output.rst b/docs/notebooks/246-depth-estimation-videpth-with-output.rst index 98c52ee17fe864..bf81d6d2110846 100644 --- a/docs/notebooks/246-depth-estimation-videpth-with-output.rst +++ b/docs/notebooks/246-depth-estimation-videpth-with-output.rst @@ -64,12 +64,54 @@ repository `__ for the pre-processing, model transformations and basic utility code. A part of it has already been kept as it is in the `utils `__ directory. At the same time we will learn how to perform `model -conversion `__ +conversion `__ for converting a model in a different format to the standard OpenVINO™ IR model representation *via* another format. -Imports -~~~~~~~ +**Table of contents:** + + +- `Imports <#imports>`__ +- `Loading models and + checkpoints <#loading-models-and-checkpoints>`__ + + - `Cleaning up the model + directory <#cleaning-up-the-model-directory>`__ + +- `Transformation of models <#transformation-of-models>`__ + + - `Dummy input creation <#dummy-input-creation>`__ + - `Conversion of depth model to OpenVINO™ IR + format <#conversion-of-depth-model-to-openvino-ir-format>`__ + + - `Select inference device <#select-inference-device>`__ + - `Compilation of depth + model <#compilation-of-depth-model>`__ + - `Computation of scale and shift + parameters <#computation-of-scale-and-shift-parameters>`__ + + - `Conversion of Scale Map Learner model to OpenVINO™ IR + format <#conversion-of-scale-map-learner-model-to-openvino-ir-format>`__ + + - `Select inference device <#select-inference-device>`__ + - `Compilation of the ScaleMapLearner(SML) + model <#compilation-of-the-scalemaplearnersml-model>`__ + + - `Storing and visualizing dummy results + obtained <#storing-and-visualizing-dummy-results-obtained>`__ + +- `Running inference on a test + image <#running-inference-on-a-test-image>`__ +- `Store and visualize Inference + results <#store-and-visualize-inference-results>`__ + + - `Cleaning up the data + directory <#cleaning-up-the-data-directory>`__ + +- `Concluding notes <#concluding-notes>`__ + +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -81,27 +123,30 @@ Imports # Download the correct version of the PyTorch deep learning library associated with image models # alongside the lightning module - !pip install -q lightning timm==0.6.12 + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "pytorch-lightning" "timm==0.6.12" "openvino>=2023.1.0" .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - onnx 1.14.0 requires protobuf>=3.20.2, but you have protobuf 3.20.1 which is incompatible. - paddlepaddle 2.5.0rc0 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.20.1 which is incompatible. - tensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.1 which is incompatible. - + onnx 1.15.0 requires protobuf>=3.20.2, but you have protobuf 3.20.1 which is incompatible. + onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 3.20.1 which is incompatible. + paddlepaddle 2.5.2 requires protobuf>=3.20.2; platform_system != "Windows", but you have protobuf 3.20.1 which is incompatible. + tensorflow 2.13.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.1 which is incompatible. + tensorflow 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.8.0 which is incompatible. + tf2onnx 1.15.1 requires protobuf~=3.20.2, but you have protobuf 3.20.1 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np - import openvino + import openvino as ov import torch import torchvision - from openvino.runtime import Core from pathlib import Path from shutil import rmtree from typing import Optional, Tuple @@ -122,8 +167,8 @@ Imports # Ability to display images inline %matplotlib inline -Loading models and checkpoints -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Loading models and checkpoints +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The complete pipeline here requires only two models: one for depth estimation and a ScaleMapLearner model which is responsible for @@ -141,26 +186,26 @@ ScaleMapLearner model. *Interestingly*, the ScaleMapLearner decides the depth prediction model as you will see. -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ -| Depth Predictor | SML on VOID 150 | SML on VOID 500 | SML on VOID 1500 | -+==================+=================================================================================================================================+==================================================================================================================================+===================================================================================================================================+ -| DPT-BEiT-Large | `model `__ | `model `__ | `model `__ | -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ -| DPT-SwinV2-Large | `model `__ | `model `__ | `model `__ | -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ -| DPT-Large | `model `__ | `model `__ | `model `__ | -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ -| DPT-Hybrid | `model `__ \* | `model `__ | `model `__ | -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ -| DPT-SwinV2-Tiny | `model `__ | `model `__ | `model `__ | -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ -| DPT-LeViT | `model `__ | `model `__ | `model `__ | -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ -| MiDaS-small | `model `__ | `model `__ | `model `__ | -+------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - - -\* Also available with pre-training on TartanAir: ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ +| Depth Predictor | SML on VOID 150 | SML on VOID 500 | SML on VOID 1500 | ++==================+=================================================================================================================================+=================================================================================================================================+==================================================================================================================================+ +| DPT-BEiT-Large | `model `__ | `model `__ | `model `__ | ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ +| DPT-SwinV2-Large | `model `__ | `model `__ | `model `__ | ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ +| DPT-Large | `model `__ | `model `__ | `model `__ | ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ +| DPT-Hybrid | `model `__\ \* | `model `__ | `model `__ | ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ +| DPT-SwinV2-Tiny | `model `__ | `model `__ | `model `__ | ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ +| DPT-LeViT | `model `__ | `model `__ | `model `__ | ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ +| MiDaS-small | `model `__ | `model `__ | `model `__ | ++------------------+---------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ + + +\*Also available with pre-training on TartanAir: `model `__ .. code:: ipython3 @@ -239,7 +284,7 @@ depth prediction model as you will see. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/hub.py:267: UserWarning: You are about to download and run code from an untrusted repository. In a future release, this won't be allowed. To add the repository to your trusted list, change the command to {calling_fn}(..., trust_repo=False) and a command prompt will appear asking for an explicit confirmation of trust, or load(..., trust_repo=True), which will assume that the prompt is to be answered with 'yes'. You can also use load(..., trust_repo='check') which will only prompt for confirmation if the repo is not already trusted. This will eventually be the default behaviour + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/hub.py:267: UserWarning: You are about to download and run code from an untrusted repository. In a future release, this won't be allowed. To add the repository to your trusted list, change the command to {calling_fn}(..., trust_repo=False) and a command prompt will appear asking for an explicit confirmation of trust, or load(..., trust_repo=True), which will assume that the prompt is to be answered with 'yes'. You can also use load(..., trust_repo='check') which will only prompt for confirmation if the repo is not already trusted. This will eventually be the default behaviour warnings.warn( Downloading: "https://github.com/rwightman/gen-efficientnet-pytorch/zipball/master" to model/master.zip Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite3-b733e338.pth" to model/checkpoints/tf_efficientnet_lite3-b733e338.pth @@ -252,8 +297,8 @@ depth prediction model as you will see. 0%| | 0.00/81.8M [00:00`__ @@ -273,8 +318,8 @@ process. if list_file.is_file(): list_file.unlink() -Transformation of models -~~~~~~~~~~~~~~~~~~~~~~~~ +Transformation of models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each of the models need an appropriate transformation which can be invoked by the ``get_model_transforms`` function. It needs only the @@ -286,7 +331,7 @@ model are always in direct correspondence with each other. # Define important custom types type_transform_compose = torchvision.transforms.transforms.Compose - type_compiled_model = openvino.runtime.ie_api.CompiledModel + type_compiled_model = ov.CompiledModel .. code:: ipython3 @@ -308,11 +353,11 @@ model are always in direct correspondence with each other. depth_model_transform, scale_map_learner_transform = get_model_transforms(depth_predictor='midas_small', nsamples=NSAMPLES) -Dummy input creation -^^^^^^^^^^^^^^^^^^^^ +Dummy input creation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Dummy inputs are necessary for `PyTorch to -ONNX `__ +ONNX `__ conversion. Although `torch.onnx.export `__ accepts any dummy input for a single pass through the model and thereby @@ -392,8 +437,8 @@ dataset # Transform the dummy input image for the depth model transformed_dummy_image = transform_image_for_depth(input_image=dummy_input, depth_model_transform=depth_model_transform) -Conversion of depth model to OpenVINO™ IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Conversion of depth model to OpenVINO IR format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The OpenVINO™ toolkit doesn’t provide any direct method of converting PyTorch models to the intermediate representation format. To have a @@ -404,7 +449,7 @@ we shall follow the following steps: models and checkpoints* stage. 2. Export the model to ``.onnx`` format using the transformed dummy input created earlier. -3. Use the serialize function from OpenVINO to create equivalent +3. Use the save model function from OpenVINO to create equivalent ``.xml`` and ``.bin`` files and obtain compiled models in the same step. Alternatively serialization procedure may be avoided and compiled model may be obtained by directly using OpenVINO’s @@ -423,29 +468,29 @@ we shall follow the following steps: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/246-depth-estimation-videpth/model/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py:47: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/246-depth-estimation-videpth/model/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py:47: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( -Select inference device -''''''''''''''''''''''' +Select inference device +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -465,8 +510,8 @@ Select device from dropdown list for running inference using OpenVINO: -Compilation of depth model -'''''''''''''''''''''''''' +Compilation of depth model +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Now we can go ahead and compile our depth models from the ``.onnx`` file path. We will not perform serialization because we don’t plan to re-read @@ -476,7 +521,6 @@ depth estimation model as it is. .. code:: ipython3 # Initialize OpenVINO Runtime. - core = Core() depth_model = core.read_model(MODEL_DIR / 'depth_model.onnx') compiled_depth_model = core.compile_model(model=depth_model, device_name=device.value) @@ -525,8 +569,8 @@ depth estimation model as it is. depth_pred_dummy = run_depth_model(input_image_h=IMAGE_H, input_image_w=IMAGE_W, transformed_image=transformed_dummy_image, compiled_depth_model=compiled_depth_model) -Computation of scale and shift parameters -''''''''''''''''''''''''''''''''''''''''' +Computation of scale and shift parameters +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Computation of these parameters required the depth estimation model output from the previous step. These are the regression based parameters @@ -633,8 +677,8 @@ purpose has already been created. scale_map_learner_transform=scale_map_learner_transform, int_depth=d_depth, int_scales=d_scales) -Conversion of Scale Map Learner model to OpenVINO™ IR format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Conversion of Scale Map Learner model to OpenVINO IR format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The OpenVINO™ toolkit doesn’t provide any direct method of converting PyTorch models to the intermediate representation format. To have the @@ -646,7 +690,7 @@ it, we shall follow the following steps: passing the downloaded checkpoint earlier as an argument. 2. Export the model to ``.onnx`` format using the transformed dummy inputs created earlier. -3. Use the serialize function from OpenVINO to create equivalent +3. Use the save model function from OpenVINO to create equivalent ``.xml`` and ``.bin`` files and obtain compiled models in the same step. Alternatively serialization procedure may be avoided and compiled model may be obtained by directly using OpenVINO’s @@ -655,14 +699,14 @@ it, we shall follow the following steps: If the name of the ``.ckpt`` file is too much to handle, here is the common format of all checkpoint files from the model releases. -- sml_model.dpredictor..nsamples..ckpt -- Replace and with the depth estimation - model name and the no. of levels of depth density the SML model - has been trained on -- E.g. sml_model.dpredictor.dpt_hybrid.nsamples.500.ckpt will be the - file name corresponding to the SML model based on the dpt_hybrid - depth predictor and has been trained on 500 points of the density - level on the depth map + - sml_model.dpredictor..nsamples..ckpt + - Replace and with the depth estimation + model name and the no. of levels of depth density the SML model + has been trained on + - E.g. sml_model.dpredictor.dpt_hybrid.nsamples.500.ckpt will be the + file name corresponding to the SML model based on the dpt_hybrid + depth predictor and has been trained on 500 points of the density + level on the depth map .. code:: ipython3 @@ -710,23 +754,23 @@ common format of all checkpoint files from the model releases. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/246-depth-estimation-videpth/model/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py:47: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/246-depth-estimation-videpth/model/rwightman_gen-efficientnet-pytorch_master/geffnet/conv2d_layers.py:47: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.) _C._jit_pass_onnx_graph_shape_type_inference( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) _C._jit_pass_onnx_graph_shape_type_inference( -Select inference device -''''''''''''''''''''''' +Select inference device +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -741,8 +785,8 @@ Select device from dropdown list for running inference using OpenVINO: -Compilation of the ScaleMapLearner(SML) model -''''''''''''''''''''''''''''''''''''''''''''' +Compilation of the ScaleMapLearner(SML) model +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Now we can go ahead and compile our SML model from the ``.onnx`` file path. We will not perform serialization because we don’t plan to re-read @@ -803,8 +847,8 @@ SML model as it is. transformed_image_for_depth_scale=transformed_dummy_image_scale, compiled_scale_map_learner=compiled_scale_map_learner) -Storing and visualizing dummy results obtained -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Storing and visualizing dummy results obtained +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -858,8 +902,8 @@ Storing and visualizing dummy results obtained .. image:: 246-depth-estimation-videpth-with-output_files/246-depth-estimation-videpth-with-output_48_2.png -Running inference on a test image -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Running inference on a test image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now role of both the dummy inputs i.e. the dummy image as well as its associated depth map is now over. Since we have access to the compiled @@ -949,8 +993,8 @@ present*\ `here `__. diff --git a/docs/notebooks/246-depth-estimation-videpth-with-output_files/index.html b/docs/notebooks/246-depth-estimation-videpth-with-output_files/index.html index 2e55e6f38e22a3..599ce48df64586 100644 --- a/docs/notebooks/246-depth-estimation-videpth-with-output_files/index.html +++ b/docs/notebooks/246-depth-estimation-videpth-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/246-depth-estimation-videpth-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/246-depth-estimation-videpth-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/246-depth-estimation-videpth-with-output_files/


../
-246-depth-estimation-videpth-with-output_48_2.png  16-Aug-2023 01:31              215788
-246-depth-estimation-videpth-with-output_53_2.png  16-Aug-2023 01:31              190117
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/246-depth-estimation-videpth-with-output_files/


../
+246-depth-estimation-videpth-with-output_48_2.png  31-Oct-2023 00:35              215788
+246-depth-estimation-videpth-with-output_53_2.png  31-Oct-2023 00:35              190117
 

diff --git a/docs/notebooks/247-code-language-id-with-output.rst b/docs/notebooks/247-code-language-id-with-output.rst index 2d0c9d3019b418..22c7854ab78af0 100644 --- a/docs/notebooks/247-code-language-id-with-output.rst +++ b/docs/notebooks/247-code-language-id-with-output.rst @@ -4,20 +4,58 @@ Programming Language Classification with OpenVINO Overview -------- -This tutorial will be divided in 2 parts: - -1. Create a simple inference pipeline with a pre-trained model using the OpenVINO™ IR format. -2. Conduct `post-training quantization `__ - on a pre-trained model using Hugging Face Optimum and benchmark performance. +This tutorial will be divided in 2 parts: 1. Create a simple inference +pipeline with a pre-trained model using the OpenVINO™ IR format. 2. +Conduct `post-training +quantization `__ +on a pre-trained model using Hugging Face Optimum and benchmark +performance. Feel free to use the notebook outline in Jupyter or your IDE for easy navigation. -Introduction ------------- +**Table of contents:** + + +- `Introduction <#introduction>`__ + + - `Task <#task>`__ + - `Model <#model>`__ + +- `Part 1: Inference pipeline with + OpenVINO <#part--inference-pipeline-with-openvino>`__ + + - `Install prerequisites <#install-prerequisites>`__ + - `Imports <#imports>`__ + - `Setting up HuggingFace + cache <#setting-up-huggingface-cache>`__ + - `Select inference device <#select-inference-device>`__ + - `Download resources <#download-resources>`__ + - `Create inference pipeline <#create-inference-pipeline>`__ + - `Inference on new input <#inference-on-new-input>`__ -Task -~~~~ +- `Part 2: OpenVINO post-training quantization with HuggingFace + Optimum <#part--openvino-post-training-quantization-with-huggingface-optimum>`__ + + - `Define constants and + functions <#define-constants-and-functions>`__ + - `Load resources <#load-resources>`__ + - `Load calibration dataset <#load-calibration-dataset>`__ + - `Quantize model <#quantize-model>`__ + - `Load quantized model <#load-quantized-model>`__ + - `Inference on new input using quantized + model <#inference-on-new-input-using-quantized-model>`__ + - `Load evaluation set <#load-evaluation-set>`__ + - `Evaluate model <#evaluate-model>`__ + +- `Additional resources <#additional-resources>`__ +- `Clean up <#clean-up>`__ + +Introduction +------------------------------------------------------ + +Task +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ **Programming language classification** is the task of identifying which programming language is used in an arbitrary code snippet. This can be @@ -42,8 +80,8 @@ formal, their symbols, syntax, and grammar can be revised and updated. For example, the walrus operator (``:=``) was a symbol distinctively used in Golang, but was later introduced in Python 3.8. -Model -~~~~~ +Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The classification model that will be used in this notebook is `CodeBERTa-language-id `__ @@ -57,17 +95,18 @@ dataset (Husain, 2019). It supports 6 programming languages: - Go - Java - JavaScript - PHP - Python - Ruby -Part 1: Inference pipeline with OpenVINO ----------------------------------------- +Part 1: Inference pipeline with OpenVINO +---------------------------------------------------------------------------------- -For this section, we will use the `HuggingFace Optimum `__ library, which +For this section, we will use the `HuggingFace +Optimum `__ library, which aims to optimize inference on specific hardware and integrates with the -OpenVINO toolkit. The code will be very similar to the -`HuggingFace Transformers `__, but +OpenVINO toolkit. The code will be very similar to the `HuggingFace +Transformers `__, but will allow to automatically convert models to the OpenVINO™ IR format. -Install prerequisites -~~~~~~~~~~~~~~~~~~~~~ +Install prerequisites +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ First, complete the `repository installation steps <../notebooks_installation.html>`__. @@ -76,18 +115,25 @@ OpenVINO support - HuggingFace Evaluate to benchmark results .. code:: ipython3 - !pip install -q "diffusers>=0.17.1" "openvino-dev>=2023.0.0" "nncf>=2.5.0" "gradio" "onnx>=1.11.0" "onnxruntime>=1.14.0" "optimum-intel>=1.9.1" "transformers>=4.31.0" "evaluate" + %pip install -q "diffusers>=0.17.1" "openvino>=2023.1.0" "nncf>=2.5.0" "gradio" "onnx>=1.11.0" "onnxruntime>=1.14.0" "transformers>=4.31.0" "evaluate" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 4.24.0 which is incompatible. - + onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 4.24.4 which is incompatible. + pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 4.24.4 which is incompatible. + tensorflow 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.8.0 which is incompatible. + tf2onnx 1.15.1 requires protobuf~=3.20.2, but you have protobuf 4.24.4 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + -Imports -~~~~~~~ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The import ``OVModelForSequenceClassification`` from Optimum is equivalent to ``AutoModelForSequenceClassification`` from Transformers @@ -108,10 +154,10 @@ equivalent to ``AutoModelForSequenceClassification`` from Transformers .. parsed-literal:: - 2023-08-16 01:03:40.095980: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-16 01:03:40.129769: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-31 00:04:18.151817: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-31 00:04:18.186093: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-16 01:03:40.709247: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-31 00:04:18.771332: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -122,10 +168,12 @@ equivalent to ``AutoModelForSequenceClassification`` from Transformers .. parsed-literal:: No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + warnings.warn( -Setting up HuggingFace cache -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Setting up HuggingFace cache +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Resources from HuggingFace will be downloaded in the local folder ``./model`` (next to this notebook) instead of the device global cache @@ -138,17 +186,17 @@ for easy cleanup. Learn more MODEL_ID = f"huggingface/{MODEL_NAME}" MODEL_LOCAL_PATH = Path("./model").joinpath(MODEL_NAME) -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - from openvino.runtime import Core + import openvino as ov - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -168,8 +216,8 @@ Select device from dropdown list for running inference using OpenVINO: -Download resources -~~~~~~~~~~~~~~~~~~ +Download resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -199,37 +247,42 @@ Download resources .. parsed-literal:: Framework not specified. Using pt to export to ONNX. - Some weights of the model checkpoint at huggingface/CodeBERTa-language-id were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] + Some weights of the model checkpoint at huggingface/CodeBERTa-language-id were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). + Using the export variant default. Available variants are: + - default: The default ONNX variant. Using framework PyTorch: 1.13.1+cpu Overriding 1 configuration item(s) - use_cache -> False - Compiling the model... - Set CACHE_DIR to /tmp/tmpsl_db7y_/model_cache .. parsed-literal:: - Ressources cached locally at: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/247-code-language-id/model/CodeBERTa-language-id + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. -Create inference pipeline -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: ipython3 +.. parsed-literal:: - code_classification_pipe = pipeline("text-classification", model=model, tokenizer=tokenizer) + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + Compiling the model to AUTO ... + Set CACHE_DIR to /tmp/tmpbwk74vw4/model_cache .. parsed-literal:: - Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers - pip install xformers. + Ressources cached locally at: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/247-code-language-id/model/CodeBERTa-language-id -Inference on new input -~~~~~~~~~~~~~~~~~~~~~~ +Create inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + code_classification_pipe = pipeline("text-classification", model=model, tokenizer=tokenizer) + +Inference on new input +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -251,19 +304,21 @@ Inference on new input Predicted score: 0.81 -Part 2: OpenVINO post-training quantization with HuggingFace Optimum --------------------------------------------------------------------- +Part 2: OpenVINO post-training quantization with HuggingFace Optimum +-------------------------------------------------------------------------------------------------------------- In this section, we will quantize a trained model. At a high-level, this process consists of using lower precision numbers in the model, which results in a smaller model size and faster inference at the cost of a -potential marginal performance degradation. `Learn more `__. +potential marginal performance degradation. `Learn +more `__. The HuggingFace Optimum library supports post-training quantization for -OpenVINO. `Learn more `__. +OpenVINO. `Learn +more `__. -Define constants and functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Define constants and functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -302,13 +357,11 @@ Define constants and functions return Dataset.from_list(examples) -Load resources -~~~~~~~~~~~~~~ - -.. note:: - - The base model is loaded using ``AutoModelForSequenceClassification`` from ``Transformers``. +Load resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +NOTE: the base model is loaded using +``AutoModelForSequenceClassification`` from ``Transformers`` .. code:: ipython3 @@ -321,21 +374,19 @@ Load resources .. parsed-literal:: - Some weights of the model checkpoint at huggingface/CodeBERTa-language-id were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] + Some weights of the model checkpoint at huggingface/CodeBERTa-language-id were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). -Load calibration dataset -~~~~~~~~~~~~~~~~~~~~~~~~ +Load calibration dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``get_dataset_sample()`` function will sample up to ``num_samples``, with an equal number of examples across the 6 programming languages. -.. note:: - - Uncomment the method below to download and use the full dataset (5+ Gb). - +NOTE: Uncomment the method below to download and use the full dataset +(5+ Gb). .. code:: ipython3 @@ -365,8 +416,8 @@ with an equal number of examples across the 6 programming languages. Map: 0%| | 0/120 [00:00 False .. parsed-literal:: - WARNING:nncf:You are setting `forward` on an NNCF-processed model object. - NNCF relies on custom-wrapping the `forward` call in order to function properly. - Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour. - If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling: - model.nncf.set_original_unbound_forward(fn) - if `fn` has an unbound 0-th `self` argument, or - with model.nncf.temporary_bound_original_forward(fn): ... - if `fn` already had 0-th `self` argument bound or never had it in the first place. - WARNING:nncf:You are setting `forward` on an NNCF-processed model object. - NNCF relies on custom-wrapping the `forward` call in order to function properly. - Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour. - If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling: - model.nncf.set_original_unbound_forward(fn) - if `fn` has an unbound 0-th `self` argument, or - with model.nncf.temporary_bound_original_forward(fn): ... - if `fn` already had 0-th `self` argument bound or never had it in the first place. + INFO:nncf:Finished loading torch extension: quantized_functions_cpu .. parsed-literal:: + Using framework PyTorch: 1.13.1+cpu + Overriding 1 configuration item(s) + - use_cache -> False Configuration saved in model/CodeBERTa-language-id-quantized/openvino_config.json -Load quantized model -~~~~~~~~~~~~~~~~~~~~ - -.. note:: +Load quantized model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The argument ``export=True`` is not required since the quantized model is already in the OpenVINO format. +NOTE: the argument ``export=True`` is not required since the quantized +model is already in the OpenVINO format. .. code:: ipython3 @@ -507,12 +537,12 @@ Load quantized model .. parsed-literal:: - Compiling the model... + Compiling the model to AUTO ... Set CACHE_DIR to model/CodeBERTa-language-id-quantized/model_cache -Inference on new input using quantized model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Inference on new input using quantized model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -530,16 +560,14 @@ Inference on new input using quantized model df['speed'] = df.distance / df.time Predicted label: python - Predicted score: 0.82 - + Predicted score: 0.84 -Load evaluation set -~~~~~~~~~~~~~~~~~~~ -.. note:: - - Uncomment the method below to download and use the full dataset (5+ Gb). +Load evaluation set +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +NOTE: Uncomment the method below to download and use the full dataset +(5+ Gb). .. code:: ipython3 @@ -547,8 +575,8 @@ Load evaluation set # validation_sample = load_dataset(DATASET_NAME, split="validation") -Evaluate model -~~~~~~~~~~~~~~ +Evaluate model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -645,16 +673,16 @@ displayed.
base 1.02.32259351.6663920.0193552.24639353.4189810.018720
quantized 1.02.64746645.3263570.0220623.09006138.8341820.025751
@@ -662,16 +690,16 @@ displayed. -Additional resources --------------------- +Additional resources +-------------------------------------------------------------- - `Grammatical Error Correction with OpenVINO `__ - `Quantize a Hugging Face Question-Answering Model with OpenVINO `__\ \*\* -Clean up --------- +Clean up +-------------------------------------------------- Uncomment and run cell below to delete all resources cached locally in ./model diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output.rst b/docs/notebooks/248-stable-diffusion-xl-with-output.rst index 0c0451bd8aaee4..f868ad02114de1 100644 --- a/docs/notebooks/248-stable-diffusion-xl-with-output.rst +++ b/docs/notebooks/248-stable-diffusion-xl-with-output.rst @@ -1,8 +1,6 @@ Image generation with Stable Diffusion XL and OpenVINO ====================================================== - - Stable Diffusion XL or SDXL is the latest image generation model that is tailored towards more photorealistic outputs with more detailed imagery and composition compared to previous Stable Diffusion models, including @@ -62,43 +60,47 @@ The tutorial consists of the following steps: Optimum `__. - Run 2-stages Stable Diffusion XL pipeline -.. note:: +.. - Some demonstrated models can require at least 64GB RAM for + **Note**: Some demonstrated models can require at least 64GB RAM for conversion and running. -.. _top: - -**Table of contents**: +**Table of contents:** -- `Install Prerequisites <#install-prerequisites>`__ -- `SDXL Base model <#sdxl-base-model>`__ - - `Select inference device <#select-inference-device>`__ - - `Run Text2Image generation pipeline <#run-text2image-generation-pipeline>`__ - - `Text2image Generation Interactive Demo <#text2image-generation-interactive-demo>`__ - - `Run Image2Image generation pipeline <#run-image2image-generation-pipeline>`__ - - `Image2Image Generation Interactive Demo <#image2image-generation-interactive-demo>`__ +- `Install Prerequisites <#install-prerequisites>`__ +- `SDXL Base model <#sdxl-base-model>`__ -- `SDXL Refiner model <#sdxl-refiner-model>`__ + - `Select inference device SDXL Base + model <#select-inference-device-sdxl-base-model>`__ + - `Run Text2Image generation + pipeline <#run-textimage-generation-pipeline>`__ + - `Text2image Generation Interactive + Demo <#textimage-generation-interactive-demo>`__ + - `Run Image2Image generation + pipeline <#run-imageimage-generation-pipeline>`__ + - `Image2Image Generation Interactive + Demo <#imageimage-generation-interactive-demo>`__ - - `Select inference device <#select-inference-device>`__ - - `Run Text2Image generation with Refinement <#run-text2image-generation-with-refinement>`__ +- `SDXL Refiner model <#sdxl-refiner-model>`__ -Install prerequisites\ `⇑ <#top>`__ -############################################################################################################################### + - `Select inference device SDXL Refiner + model <#select-inference-device-sdxl-refiner-model>`__ + - `Run Text2Image generation with + Refinement <#run-textimage-generation-with-refinement>`__ +Install prerequisites +--------------------------------------------------------------- .. code:: ipython3 - !pip install -q "git+https://github.com/huggingface/optimum-intel.git" - !pip install -q "openvino-dev==2023.1.0.dev20230728" - !pip install -q --upgrade-strategy eager "diffusers>=0.18.0" "invisible-watermark>=0.2.0" "transformers>=4.30.2" "accelerate" "onnx" "onnxruntime" - !pip install -q gradio - -SDXL Base model\ `⇑ <#top>`__ -############################################################################################################################### + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + %pip install -q "openvino>=2023.1.0" + %pip install -q --upgrade-strategy eager "diffusers>=0.18.0" "invisible-watermark>=0.2.0" "transformers>=4.30.2" "accelerate" "onnx" "onnxruntime" + %pip install -q gradio +SDXL Base model +--------------------------------------------------------- We will start with the base model part, which is responsible for the generation of images of the desired output size. @@ -126,14 +128,6 @@ You can save the model on disk using the ``save_pretrained`` method. model_dir = Path("openvino-sd-xl-base-1.0") -.. parsed-literal:: - - 2023-08-06 20:21:05.073866: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-06 20:21:05.114013: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-06 20:21:05.843627: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -142,20 +136,25 @@ You can save the model on disk using the ``save_pretrained`` method. .. parsed-literal:: No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + 2023-09-19 18:52:15.570335: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-19 18:52:15.609718: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-09-19 18:52:16.242994: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + warnings.warn( -Select inference device\ `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Select inference device SDXL Base model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - from openvino.runtime import Core + import openvino as ov - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -190,14 +189,13 @@ Select device from dropdown list for running inference using OpenVINO: Compiling the vae_decoder... Compiling the unet... - Compiling the text_encoder_2... Compiling the text_encoder... Compiling the vae_encoder... + Compiling the text_encoder_2... -Run Text2Image generation pipeline\ `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Text2Image generation pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, we can run the model for the generation of images using text prompts. To speed up evaluation and reduce the required memory we @@ -218,7 +216,7 @@ numpy random state with a specific seed for results reproducibility. .. parsed-literal:: - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:552: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:559: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) @@ -230,9 +228,9 @@ numpy random state with a specific seed for results reproducibility. .. parsed-literal:: - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:583: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:590: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:599: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:606: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) @@ -242,9 +240,8 @@ numpy random state with a specific seed for results reproducibility. -Text2image Generation Interactive Demo\ `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Text2image Generation Interactive Demo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -294,7 +291,7 @@ Text2image Generation Interactive Demo\ `⇑ <#top>`__ .. .. raw:: html -..
+..
.. code:: ipython3 @@ -309,19 +306,18 @@ Text2image Generation Interactive Demo\ `⇑ <#top>`__ Closing server running on port: 7860 -Run Image2Image generation pipeline\ `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Image2Image generation pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We can reuse the already converted model for running the Image2Image generation pipeline. For that, we should replace ``OVStableDiffusionXLPipeline`` with ``OVStableDiffusionXLImage2ImagePipeline``. -Select inference device -^^^^^^^^^^^^^^^^^^^^^^^ +Select inference device SDXL Refiner model +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -347,9 +343,9 @@ Select device from dropdown list for running inference using OpenVINO: Compiling the vae_decoder... Compiling the unet... - Compiling the text_encoder... - Compiling the vae_encoder... Compiling the text_encoder_2... + Compiling the vae_encoder... + Compiling the text_encoder... .. code:: ipython3 @@ -362,11 +358,9 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:552: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:559: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/pipelines/diffusers/pipeline_utils.py:64: FutureWarning: The preprocess method is deprecated and will be removed in a future version. Please use VaeImageProcessor.preprocess instead - warnings.warn( - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:615: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:622: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) @@ -378,9 +372,9 @@ Select device from dropdown list for running inference using OpenVINO: .. parsed-literal:: - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:583: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:590: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:599: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:606: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) @@ -390,9 +384,8 @@ Select device from dropdown list for running inference using OpenVINO: -Image2Image Generation Interactive Demo\ `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Image2Image Generation Interactive Demo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -447,7 +440,7 @@ Image2Image Generation Interactive Demo\ `⇑ <#top>`__ .. .. raw:: html -..
+..
.. code:: ipython3 @@ -466,13 +459,12 @@ Image2Image Generation Interactive Demo\ `⇑ <#top>`__ .. parsed-literal:: - 312 - + 280 -SDXL Refiner model\ `⇑ <#top>`__ -############################################################################################################################### +SDXL Refiner model +------------------------------------------------------------ As we discussed above, Stable Diffusion XL can be used in a 2-stages approach: first, the base model is used to generate latents of the @@ -501,11 +493,10 @@ prompt for improving generated image. del refiner gc.collect() -Select inference device\ `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -520,9 +511,8 @@ Select device from dropdown list for running inference using OpenVINO: -Run Text2Image generation with Refinement\ `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Text2Image generation with Refinement +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -542,9 +532,9 @@ Run Text2Image generation with Refinement\ `⇑ <#top>`__ Compiling the vae_decoder... Compiling the unet... Compiling the text_encoder_2... - Compiling the vae_encoder... Compiling the text_encoder... - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:552: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + Compiling the vae_encoder... + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:559: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) @@ -556,7 +546,7 @@ Run Text2Image generation with Refinement\ `⇑ <#top>`__ .. parsed-literal:: - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:583: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:590: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) @@ -564,7 +554,7 @@ Run Text2Image generation with Refinement\ `⇑ <#top>`__ .. parsed-literal:: - 244 + 240 @@ -573,6 +563,7 @@ Run Text2Image generation with Refinement\ `⇑ <#top>`__ refiner = OVStableDiffusionXLImg2ImgPipeline.from_pretrained(refiner_model_dir, device=device.value) + .. parsed-literal:: Compiling the vae_decoder... @@ -583,18 +574,12 @@ Run Text2Image generation with Refinement\ `⇑ <#top>`__ .. code:: ipython3 - image = refiner(prompt=prompt, image=latents[None, :], num_inference_steps=15, generator=np.random.RandomState(314)).images[0] + image = refiner(prompt=prompt, image=np.transpose(latents[None, :], (0, 2, 3, 1)), num_inference_steps=15, generator=np.random.RandomState(314)).images[0] image.save("cat_refined.png") image -.. parsed-literal:: - - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/pipelines/diffusers/pipeline_utils.py:64: FutureWarning: The preprocess method is deprecated and will be removed in a future version. Please use VaeImageProcessor.preprocess instead - warnings.warn( - - .. parsed-literal:: @@ -603,12 +588,12 @@ Run Text2Image generation with Refinement\ `⇑ <#top>`__ .. parsed-literal:: - /home/ea/work/ov_notebooks_env/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:599: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_diffusion.py:606: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. outputs = self.request(inputs, shared_memory=True) -.. image:: 248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.png +.. image:: 248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.png diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.jpg b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.jpg index 6eda11270c7742..86240095fe078c 100644 --- a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.jpg +++ b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4a6a4de38aac5e4730954c4c3110dc72e9e46c9f7c31ca4f0a24279081c3eb2 -size 21518 +oid sha256:57d1afd29b87fe98bdf49632c67a7c5224d9ecaa7436ae7d6ba19a213a32b8ff +size 21574 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.png b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.png index d609857b46b509..f0367047735f49 100644 --- a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.png +++ b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_10_3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c42ad9703b6a72cc6cc03259dce4706a29379ffe82ac13e20071cb5ac7c104b7 -size 439606 +oid sha256:544a18338099ec1ef80a52a61dc4ec2ca643a6dffc0140b1471874fd80e3037f +size 440317 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.jpg b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.jpg index 05415d178f20c4..aff048c78649b8 100644 --- a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.jpg +++ b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:270e1ce2eed1d1d1b4737a9bed4efa3a1b7570327c881db26b916dfee2462a7d -size 22630 +oid sha256:10dc1639f80d5d9c5aed44552b6ac6e8d9d806233c7ba5cad884552ad54828b9 +size 22767 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.png b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.png index 0169a0acea451d..b84838abd4071e 100644 --- a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.png +++ b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_18_3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53b3ed2a3d791c0e1e3cec7e589403ccf999077dd5b0e042c01b44f953720113 -size 448218 +oid sha256:31e323d1bdfadfc4a949dbd1932d158535af90e79a661d80ed66cf2657fccdb0 +size 439143 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.jpg b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.jpg new file mode 100644 index 00000000000000..f80cd63ac6559c --- /dev/null +++ b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4404aac377efb0377fa7d08c7c02f358008acc19c9aa82aded7ce39e4091be33 +size 29349 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.png b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.png new file mode 100644 index 00000000000000..5cbe5394eef85b --- /dev/null +++ b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b9c84579d8b0ab9f1dfcfb8b94d4d55fb4acfc4b528cab7a2061634dcb0f8fe +size 432689 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.jpg b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.jpg deleted file mode 100644 index 039836421859a3..00000000000000 --- a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0538c463a20e6d3d3f2940fad93343a986e809c7ea277a2b47e47727655dc5a9 -size 29603 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.png b/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.png deleted file mode 100644 index 43d94cfa41c6ef..00000000000000 --- a/docs/notebooks/248-stable-diffusion-xl-with-output_files/248-stable-diffusion-xl-with-output_29_3.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f690e7122d0b3fd12c4bf19aa7332dbb50aa71050134eb39c7a888d7b6f3d3c7 -size 454022 diff --git a/docs/notebooks/248-stable-diffusion-xl-with-output_files/index.html b/docs/notebooks/248-stable-diffusion-xl-with-output_files/index.html index e913f32f97c15c..45534552f3d0de 100644 --- a/docs/notebooks/248-stable-diffusion-xl-with-output_files/index.html +++ b/docs/notebooks/248-stable-diffusion-xl-with-output_files/index.html @@ -1,12 +1,12 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/248-stable-diffusion-xl-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/248-stable-diffusion-xl-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/248-stable-diffusion-xl-with-output_files/


../
-248-stable-diffusion-xl-with-output_10_3.jpg       16-Aug-2023 01:31               21518
-248-stable-diffusion-xl-with-output_10_3.png       16-Aug-2023 01:31              439606
-248-stable-diffusion-xl-with-output_18_3.jpg       16-Aug-2023 01:31               22630
-248-stable-diffusion-xl-with-output_18_3.png       16-Aug-2023 01:31              448218
-248-stable-diffusion-xl-with-output_29_3.jpg       16-Aug-2023 01:31               29603
-248-stable-diffusion-xl-with-output_29_3.png       16-Aug-2023 01:31              454022
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/248-stable-diffusion-xl-with-output_files/


../
+248-stable-diffusion-xl-with-output_10_3.jpg       31-Oct-2023 00:35               21574
+248-stable-diffusion-xl-with-output_10_3.png       31-Oct-2023 00:35              440317
+248-stable-diffusion-xl-with-output_18_3.jpg       31-Oct-2023 00:35               22767
+248-stable-diffusion-xl-with-output_18_3.png       31-Oct-2023 00:35              439143
+248-stable-diffusion-xl-with-output_29_2.jpg       31-Oct-2023 00:35               29349
+248-stable-diffusion-xl-with-output_29_2.png       31-Oct-2023 00:35              432689
 

diff --git a/docs/notebooks/249-oneformer-segmentation-with-output.rst b/docs/notebooks/249-oneformer-segmentation-with-output.rst index 905f00c17ec7da..066bdb90bf9ae2 100644 --- a/docs/notebooks/249-oneformer-segmentation-with-output.rst +++ b/docs/notebooks/249-oneformer-segmentation-with-output.rst @@ -6,7 +6,9 @@ This tutorial demonstrates how to use the with OpenVINO. It describes how to download weights and create PyTorch model using Hugging Face transformers library, then convert model to OpenVINO Intermediate Representation format (IR) using OpenVINO Model -Optimizer API and run model inference +Optimizer API and run model inference. Additionally, +`NNCF `__ quantization is +applied to improve OneFormer segmentation speed. |image0| @@ -22,15 +24,45 @@ of increased latency, however. .. |image0| image:: https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/oneformer_architecture.png -Install required libraries --------------------------- +Table of contents: +~~~~~~~~~~~~~~~~~~ + +- `Install required + libraries <#install-required-libraries>`__ +- `Prepare the environment <#prepare-the-environment>`__ +- `Load OneFormer fine-tuned on COCO for universal + segmentation <#load-oneformer-fine-tuned-on-coco-for-universal-segmentation>`__ +- `Convert the model to OpenVINO IR + format <#convert-the-model-to-openvino-ir-format>`__ +- `Select inference device <#select-inference-device>`__ +- `Choose a segmentation + task <#choose-a-segmentation-task>`__ +- `Inference <#inference>`__ +- `Quantization <#quantization>`__ + + - `Preparing calibration + dataset <#preparing-calibration-dataset>`__ + - `Run quantization <#run-quantization>`__ + - `Compare model size and + performance <#compare-model-size-and-performance>`__ + +- `Interactive Demo <#interactive-demo>`__ + +Install required libraries +--------------------------------------------------------------------- .. code:: ipython3 - !pip install -q "transformers>=4.26.0" "openvino==2023.1.0.dev20230728" gradio torch scipy ipywidgets Pillow matplotlib + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.26.0" "openvino>=2023.1.0" "nncf>=2.6.0" gradio torch scipy ipywidgets Pillow matplotlib -Prepare the environment ------------------------ + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + + +Prepare the environment +------------------------------------------------------------------ Import all required packages and set paths for models and constant variables. @@ -55,22 +87,13 @@ variables. sys.path.append("../utils") from notebook_utils import download_file - -.. parsed-literal:: - - 2023-08-13 20:13:13.033722: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-13 20:13:13.205781: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-13 20:13:14.052205: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. code:: ipython3 IR_PATH = Path("oneformer.xml") OUTPUT_NAMES = ['class_queries_logits', 'masks_queries_logits'] -Load OneFormer fine-tuned on COCO for universal segmentation ------------------------------------------------------------- +Load OneFormer fine-tuned on COCO for universal segmentation +------------------------------------------------------------------------------------------------------- Here we use the ``from_pretrained`` method of ``OneFormerForUniversalSegmentation`` to load the `HuggingFace OneFormer @@ -89,18 +112,28 @@ images and post-process model outputs for visualization. ) id2label = model.config.id2label + +.. parsed-literal:: + + 2023-10-06 14:00:53.306851: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-06 14:00:53.342792: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-06 14:00:53.913248: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/models/oneformer/image_processing_oneformer.py:427: FutureWarning: The `reduce_labels` argument is deprecated and will be removed in v4.27. Please use `do_reduce_labels` instead. + warnings.warn( + + .. code:: ipython3 task_seq_length = processor.task_seq_length shape = (800, 800) dummy_input = { "pixel_values": torch.randn(1, 3, *shape), - "task_inputs": torch.randn(1, task_seq_length), - "pixel_mask": torch.randn(1, *shape), + "task_inputs": torch.randn(1, task_seq_length) } -Convert the model to OpenVINO IR format ---------------------------------------- +Convert the model to OpenVINO IR format +---------------------------------------------------------------------------------- Convert the PyTorch model to IR format to take advantage of OpenVINO optimization tools and features. The ``openvino.convert_model`` python @@ -124,8 +157,19 @@ should provide PyTorch model instance and example input to model = openvino.convert_model(model, example_input=dummy_input) openvino.save_model(model, IR_PATH, compress_to_fp16=False) -Select inference device ------------------------ + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + + +Select inference device +------------------------------------------------------------------ Select device from dropdown list for running inference using OpenVINO @@ -149,7 +193,7 @@ Select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') @@ -209,7 +253,7 @@ the inference results. def predicted_semantic_map_to_figure(predicted_map): segmentation = predicted_map[0] # get the used color map - viridis = plt.get_cmap('viridis', torch.max(segmentation)) + viridis = plt.get_cmap('viridis', max(1, torch.max(segmentation))) # get all the unique numbers labels_ids = torch.unique(segmentation).tolist() fig, ax = plt.subplots() @@ -229,7 +273,7 @@ the inference results. segmentation = predicted_map[0]['segmentation'] segments_info = predicted_map[0]['segments_info'] # get the used color map - viridis = plt.get_cmap('viridis', torch.max(segmentation)) + viridis = plt.get_cmap('viridis', max(torch.max(segmentation), 1)) fig, ax = plt.subplots() ax.imshow(segmentation) ax.set_axis_off() @@ -254,7 +298,7 @@ the inference results. segmentation = predicted_map[0]['segmentation'] segments_info = predicted_map[0]['segments_info'] # get the used color map - viridis = plt.get_cmap('viridis', torch.max(segmentation)) + viridis = plt.get_cmap('viridis', max(torch.max(segmentation), 1)) fig, ax = plt.subplots() ax.imshow(segmentation) ax.set_axis_off() @@ -273,10 +317,19 @@ the inference results. fig_legend = Visualizer.extract_legend(handles) fig.tight_layout() return fig, fig_legend + + @staticmethod + def figures_to_images(fig, fig_legend, name_suffix=""): + seg_filename, leg_filename = f"segmentation{name_suffix}.png", f"legend{name_suffix}.png" + fig.savefig(seg_filename, bbox_inches="tight") + fig_legend.savefig(leg_filename, bbox_inches="tight") + segmentation = Image.open(seg_filename) + legend = Image.open(leg_filename) + return segmentation, legend .. code:: ipython3 - def segment(img: Image.Image, task: str): + def segment(model, img: Image.Image, task: str): """ Apply segmentation on an image. @@ -289,7 +342,7 @@ the inference results. if img is None: raise gr.Error("Please load the image or use one from the examples list") inputs = prepare_inputs(img, task) - outputs = compiled_model(inputs) + outputs = model(inputs) hf_output = process_output(outputs) predicted_map = getattr(processor, f"post_process_{task}_segmentation")( hf_output, target_sizes=[img.size[::-1]] @@ -311,12 +364,12 @@ the inference results. -.. image:: 249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_22_1.png +.. image:: 249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_23_1.png -Choose a segmentation task --------------------------- +Choose a segmentation task +--------------------------------------------------------------------- .. code:: ipython3 @@ -334,8 +387,8 @@ Choose a segmentation task -Inference ---------- +Inference +---------------------------------------------------- .. code:: ipython3 @@ -348,32 +401,315 @@ Inference res.paste(img2, (img1.width, 0)) return res - result, legend = segment(image, task.value) + segmentation_fig, legend_fig = segment(compiled_model, image, task.value) + segmentation_image, legend_image = Visualizer.figures_to_images(segmentation_fig, legend_fig) + plt.close("all") + prediction = stack_images_horizontally(segmentation_image, legend_image) + prediction + + + + +.. image:: 249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.png + + + +Quantization +------------------------------------------------------- + +`NNCF `__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. + +The optimization process contains the following steps: 1. Create a +calibration dataset for quantization. 2. Run ``nncf.quantize()`` to +obtain quantized model. 3. Serialize the ``INT8`` model using +``openvino.save_model()`` function. + + Note: Quantization is time and memory consuming operation. Running + quantization code below may take some time. + +Please select below whether you would like to run quantization to +improve model inference speed. + +.. code:: ipython3 + + compiled_quantized_model = None + + to_quantize = widgets.Checkbox( + value=False, + description='Quantization', + disabled=False, + ) - result.savefig("result.jpg", bbox_inches="tight") - legend.savefig("legend.jpg", bbox_inches="tight") - result = Image.open("result.jpg") - legend = Image.open("legend.jpg") - stack_images_horizontally(result, legend) + to_quantize + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + -.. image:: 249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.png +Let’s load skip magic extension to skip quantization if to_quantize is +not selected + +.. code:: ipython3 + + import sys + sys.path.append("../utils") + + %load_ext skip_kernel_extension +Preparing calibration dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Interactive Demo ----------------- +We use images from +`COCO128 `__ +dataset as calibration samples. .. code:: ipython3 + %%skip not $to_quantize.value + + import nncf + import torch.utils.data as data + + from zipfile import ZipFile + + DATA_URL = "https://ultralytics.com/assets/coco128.zip" + OUT_DIR = Path('.') + + + class COCOLoader(data.Dataset): + def __init__(self, images_path): + self.images = list(Path(images_path).iterdir()) + + def __getitem__(self, index): + image = Image.open(self.images[index]) + if image.mode == 'L': + rgb_image = Image.new("RGB", image.size) + rgb_image.paste(image) + image = rgb_image + return image + + def __len__(self): + return len(self.images) + + + def download_coco128_dataset(): + download_file(DATA_URL, directory=OUT_DIR, show_progress=True) + if not (OUT_DIR / "coco128/images/train2017").exists(): + with ZipFile('coco128.zip' , "r") as zip_ref: + zip_ref.extractall(OUT_DIR) + coco_dataset = COCOLoader(OUT_DIR / 'coco128/images/train2017') + return coco_dataset + + + def transform_fn(image): + # We quantize model in panoptic mode because it produces optimal results for both semantic and instance segmentation tasks + inputs = prepare_inputs(image, "panoptic") + return inputs + + + coco_dataset = download_coco128_dataset() + calibration_dataset = nncf.Dataset(coco_dataset, transform_fn) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + + +.. parsed-literal:: + + coco128.zip: 0%| | 0.00/6.66M [00:00 + diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_22_1.jpg b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_23_1.jpg similarity index 100% rename from docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_22_1.jpg rename to docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_23_1.jpg diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_22_1.png b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_23_1.png similarity index 100% rename from docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_22_1.png rename to docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_23_1.png diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.jpg b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.jpg deleted file mode 100644 index 1ec3f109377f2c..00000000000000 --- a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2dcdca03618093fc7ea734421b9d76eff27fc00af3faa58f987fa9b6cb9dd89a -size 29211 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.png b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.png deleted file mode 100644 index 265a13053edbb3..00000000000000 --- a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_26_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2a4b780b4ddebc0313fc0ffcf147eb132f6a61919999ef2c1321c05b25ef8454 -size 172371 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.jpg b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.jpg new file mode 100644 index 00000000000000..473f124ad40ee1 --- /dev/null +++ b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fcdb580521370b27153113b596a25aed7207e120554d6e66731211d20f9dc6 +size 29349 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.png b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.png new file mode 100644 index 00000000000000..79e39c5dcfe36f --- /dev/null +++ b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_27_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd5b28e53b5b782723b3739f2a4e025c15a6e4a945585327aad3c8932b05346 +size 40137 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.jpg b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.jpg new file mode 100644 index 00000000000000..473f124ad40ee1 --- /dev/null +++ b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fcdb580521370b27153113b596a25aed7207e120554d6e66731211d20f9dc6 +size 29349 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.png b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.png new file mode 100644 index 00000000000000..79e39c5dcfe36f --- /dev/null +++ b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd5b28e53b5b782723b3739f2a4e025c15a6e4a945585327aad3c8932b05346 +size 40137 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.jpg b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.jpg new file mode 100644 index 00000000000000..2d1a4b947bb2fc --- /dev/null +++ b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7d95c0b9243af4fc2428f8d8318afe4b6c0e447a0c57bc0a75253313e694d1 +size 30421 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.png b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.png new file mode 100644 index 00000000000000..f09d352bc5eaa4 --- /dev/null +++ b/docs/notebooks/249-oneformer-segmentation-with-output_files/249-oneformer-segmentation-with-output_39_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e195b2737cb6032189f99e4b49f0c131e2cde3eefa7c12abb81bc44a9bf5bd64 +size 45063 diff --git a/docs/notebooks/249-oneformer-segmentation-with-output_files/index.html b/docs/notebooks/249-oneformer-segmentation-with-output_files/index.html index 598795f8f4f2b6..2c4ebb24e90bea 100644 --- a/docs/notebooks/249-oneformer-segmentation-with-output_files/index.html +++ b/docs/notebooks/249-oneformer-segmentation-with-output_files/index.html @@ -1,10 +1,14 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/249-oneformer-segmentation-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/249-oneformer-segmentation-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/249-oneformer-segmentation-with-output_files/


../
-249-oneformer-segmentation-with-output_22_1.jpg    16-Aug-2023 01:31               64470
-249-oneformer-segmentation-with-output_22_1.png    16-Aug-2023 01:31              514894
-249-oneformer-segmentation-with-output_26_0.jpg    16-Aug-2023 01:31               29211
-249-oneformer-segmentation-with-output_26_0.png    16-Aug-2023 01:31              172371
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/249-oneformer-segmentation-with-output_files/


../
+249-oneformer-segmentation-with-output_23_1.jpg    31-Oct-2023 00:35               64470
+249-oneformer-segmentation-with-output_23_1.png    31-Oct-2023 00:35              514894
+249-oneformer-segmentation-with-output_27_0.jpg    31-Oct-2023 00:35               29349
+249-oneformer-segmentation-with-output_27_0.png    31-Oct-2023 00:35               40137
+249-oneformer-segmentation-with-output_39_1.jpg    31-Oct-2023 00:35               29349
+249-oneformer-segmentation-with-output_39_1.png    31-Oct-2023 00:35               40137
+249-oneformer-segmentation-with-output_39_3.jpg    31-Oct-2023 00:35               30421
+249-oneformer-segmentation-with-output_39_3.png    31-Oct-2023 00:35               45063
 

diff --git a/docs/notebooks/250-music-generation-with-output.rst b/docs/notebooks/250-music-generation-with-output.rst index 564fe33f99f9e1..52bc565acaa909 100644 --- a/docs/notebooks/250-music-generation-with-output.rst +++ b/docs/notebooks/250-music-generation-with-output.rst @@ -1,8 +1,6 @@ Controllable Music Generation with MusicGen and OpenVINO ======================================================== - - MusicGen is a single-stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text prompt is passed to a text encoder model (T5) @@ -32,9 +30,8 @@ We will use a model implementation from the `Hugging Face Transformers `__ library. -.. _top: - -**Table of contents**: +**Table of contents:** +--- - `Requirements and Imports <#prerequisites>`__ - `Original Pipeline Inference <#musicgen-in-hf-transformers>`__ @@ -45,20 +42,31 @@ library. - `Embedding the Converted Models into the Pipeline <#embedding-the-converted-models-into-the-original-pipeline>`__ - `Run Gradio App <#try-out-the-converted-pipeline>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +Prerequisites +------------- -Install requirements `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Install requirements +~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino==2023.1.0.dev20230811" - !pip install -q torch onnx gradio - !pip install -q transformers + %pip install -q "openvino>=2023.1.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch onnx gradio + %pip install -q transformers + + +.. parsed-literal:: -Imports `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + + +Imports +~~~~~~~ .. code:: ipython3 @@ -79,8 +87,17 @@ Imports `⇑ <#top>`__ # Ignore tracing warnings warnings.filterwarnings("ignore", category=TracerWarning) -MusicGen in HF Transformers `⇑ <#top>`__ -############################################################################################################################### + +.. parsed-literal:: + + 2023-10-31 00:07:14.058999: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-31 00:07:14.092895: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-31 00:07:14.669694: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +MusicGen in HF Transformers +--------------------------- To work with `MusicGen `__ by Meta @@ -115,10 +132,10 @@ and the desired music sample length. .. parsed-literal:: Sampling rate is 32000 Hz - -Original Pipeline Inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Original Pipeline Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~ Text Preprocessing prepares the text prompt to be fed into the model, the ``processor`` object abstracts this step for us. Text tokenization @@ -146,15 +163,15 @@ vocabulary. It helps the model understand the context of a sentence. - -Convert models to OpenVINO Intermediate representation (IR) format `⇑ <#top>`__ -############################################################################################################################### + +Convert models to OpenVINO Intermediate representation (IR) format +------------------------------------------------------------------ Model conversion API enables direct conversion of PyTorch models. We will utilize the ``openvino.convert_model`` method to acquire OpenVINO @@ -177,8 +194,8 @@ The pipeline consists of three important parts: Let us convert each model step by step. -0. Set Up Variables `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +0. Set Up Variables +~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -189,8 +206,8 @@ Let us convert each model step by step. audio_decoder_onnx_path = models_dir / "encodec.onnx" audio_decoder_ir_path = models_dir / "encodec.xml" -1. Convert Text Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +1. Convert Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~ The text encoder is responsible for converting the input prompt, such as “90s rock song with loud guitars and heavy drums” into an embedding @@ -206,7 +223,7 @@ a time and this vector will just consist of ones. We use OpenVINO Converter (OVC) below to convert the PyTorch model to the OpenVINO Intermediate Representation format (IR), which you can infer later with `OpenVINO -runtime `__ +runtime `__ .. code:: ipython3 @@ -217,8 +234,19 @@ runtime `__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + + +2. Convert MusicGen Language Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This model is the central part of the whole pipeline, it takes the embedded text representation and generates audio codes that can be then @@ -292,8 +320,8 @@ type for them. del mg_ov gc.collect() -3. Convert Audio Decoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +3. Convert Audio Decoder +~~~~~~~~~~~~~~~~~~~~~~~~ The audio decoder which is a part of the EnCodec model is used to recover the audio waveform from the audio tokens predicted by the @@ -335,17 +363,15 @@ wrapper class with its ``forward()`` method calling .. parsed-literal:: - /home/idavidyu/.virtualenvs/test/lib/python3.10/site-packages/torch/onnx/symbolic_opset9.py:4476: UserWarning: Exporting a model to ONNX with a batch_size other than 1, with a variable length with LSTM can cause an error when running the ONNX model with a different batch size. Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:4315: UserWarning: Exporting a model to ONNX with a batch_size other than 1, with a variable length with LSTM can cause an error when running the ONNX model with a different batch size. Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model. warnings.warn( - + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + _C._jit_pass_onnx_graph_shape_type_inference( + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.) + _C._jit_pass_onnx_graph_shape_type_inference( -.. parsed-literal:: - - ============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 ============= - verbose: False, log level: Level.ERROR - ======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ======================== - - Now we can convert the frozen ONNX computation graph to OpenVINO IR. @@ -359,12 +385,12 @@ Now we can convert the frozen ONNX computation graph to OpenVINO IR. del audio_decoder_ov gc.collect() -Embedding the converted models into the original pipeline `⇑ <#top>`__ -############################################################################################################################### +Embedding the converted models into the original pipeline +--------------------------------------------------------- OpenVINO™ Runtime Python API is used to compile the model in OpenVINO IR format. The -`Core `__ +`Core `__ class provides access to the OpenVINO Runtime API. The ``core`` object, which is an instance of the ``Core`` class represents the API and it is used to compile the model. @@ -373,8 +399,8 @@ used to compile the model. core = Core() -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +^^^^^^^^^^^^^^^^^^^^^^^ Select device that will be used to do models inference using OpenVINO from the dropdown list: @@ -397,27 +423,30 @@ from the dropdown list: .. parsed-literal:: - Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') -Adapt OpenVINO models to the original pipeline `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Adapt OpenVINO models to the original pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here we create wrapper classes for all three OpenVINO models that we want to embed in the original inference pipeline. Here are some of the -things to consider when adapting an OV model: - Make sure that -parameters passed by the original pipeline are forwarded to the compiled -OV model properly; sometimes the OV model uses only a portion of the -input arguments and some are ignored, sometimes you need to convert the -argument to another data type or unwrap some data structures such as -tuples or dictionaries. - Guarantee that the wrapper class returns -results to the pipeline in an expected format. In the example below you -can see how we pack OV model outputs into special classes declared in -the HF repo. - Pay attention to the model method used in the original -pipeline for calling the model - it may be not the ``forward`` method! -Refer to the ``AudioDecoderWrapper`` to see how we wrap OV model -inference into the ``decode`` method. +things to consider when adapting an OV model: + +- Make sure that parameters passed by the original pipeline are forwarded to the compiled + OV model properly; sometimes the OV model uses only a portion of the + input arguments and some are ignored, sometimes you need to convert the + argument to another data type or unwrap some data structures such as + tuples or dictionaries. +- Guarantee that the wrapper class returns + results to the pipeline in an expected format. In the example below you + can see how we pack OV model outputs into special classes declared in + the HF repo. +- Pay attention to the model method used in the original + pipeline for calling the model - it may be not the ``forward`` method! + Refer to the ``AudioDecoderWrapper`` to see how we wrap OV model + inference into the ``decode`` method. .. code:: ipython3 @@ -522,15 +551,15 @@ We can now infer the pipeline backed by OpenVINO models. - -Try out the converted pipeline `⇑ <#top>`__ -############################################################################################################################### + +Try out the converted pipeline +------------------------------ The demo app below is created using `Gradio package `__ @@ -567,10 +596,24 @@ package `__ ], ) try: - demo.launch(debug=True) + demo.launch(debug=False) except Exception: - demo.launch(share=True, debug=True) + demo.launch(share=True, debug=False) # If you are launching remotely, specify server_name and server_port # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output.rst b/docs/notebooks/251-tiny-sd-image-generation-with-output.rst index 466cb5801d5a35..083da6fe58081c 100644 --- a/docs/notebooks/251-tiny-sd-image-generation-with-output.rst +++ b/docs/notebooks/251-tiny-sd-image-generation-with-output.rst @@ -1,8 +1,6 @@ Image Generation with Tiny-SD and OpenVINO™ =========================================== - - In recent times, the AI community has witnessed a remarkable surge in the development of larger and more performant language models, such as Falcon 40B, LLaMa-2 70B, Falcon 40B, MPT 30B, and in the imaging domain @@ -41,12 +39,12 @@ The notebook contains the following steps: 3. Run Inference pipeline with OpenVINO. 4. Run Interactive demo for Tiny-SD model -.. _toc: +**Table of contents:** -**Table of contents**: - `Prerequisites <#prerequisites>`__ -- `Create PyTorch Models pipeline <#create-pytorch-models-pipeline>`__ +- `Create PyTorch Models + pipeline <#create-pytorch-models-pipeline>`__ - `Convert models to OpenVINO Intermediate representation (IR) format <#convert-models-to-openvino-intermediate-representation-format>`__ @@ -55,28 +53,26 @@ The notebook contains the following steps: - `VAE <#vae>`__ - `Prepare Inference Pipeline <#prepare-inference-pipeline>`__ -- `Configure Inference Pipeline <#configure-inference-pipeline>`__ +- `Configure Inference + Pipeline <#configure-inference-pipeline>`__ - `Text-to-Image generation <#text-to-image-generation>`__ - `Image-to-Image generation <#image-to-image-generation>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### +- `Interactive Demo <#interactive-demo>`__ + +Prerequisites +------------------------------------------------------- Install required dependencies .. code:: ipython3 - :force: - ! pip install -q --find-links https://download.pytorch.org/whl/torch_stable.html \ - "torch==1.13.1; sys_platform == 'darwin'" \ - "torch==1.13.1+cpu; sys_platform == 'linux' or platform_system == 'Windows'" \ - "torchvision==0.14.1; sys_platform == 'darwin'" \ - "torchvision==0.14.1+cpu; sys_platform == 'linux' or platform_system == 'Windows'" - !pip -q install "openvino==2023.1.0.dev20230811" "diffusers>=0.18.0" "transformers>=4.30.2" "gradio" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision + %pip -q install "openvino>=2023.1.0" "diffusers>=0.18.0" "transformers>=4.30.2" "gradio" -Create PyTorch Models pipeline `⇑ <#top>`__ -############################################################################################################################### +Create PyTorch Models pipeline +------------------------------------------------------------------------ ``StableDiffusionPipeline`` is an end-to-end inference pipeline that you can use to generate images from text with just a few lines of code. @@ -104,12 +100,12 @@ First, load the pre-trained weights of all components of the model. .. parsed-literal:: - 2023-08-17 16:52:46.141349: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-17 16:52:46.179746: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-09-18 15:58:40.831193: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-18 15:58:40.870576: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-17 16:52:46.808512: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - vae/diffusion_pytorch_model.safetensors not found - + 2023-09-18 15:58:41.537042: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + text_encoder/model.safetensors not found + .. parsed-literal:: @@ -121,12 +117,12 @@ First, load the pre-trained weights of all components of the model. .. parsed-literal:: - 36 + 27 -Convert models to OpenVINO Intermediate representation format `⇑ <#top>`__ -############################################################################################################################### +Convert models to OpenVINO Intermediate representation format +------------------------------------------------------------------------------------------------------- OpenVINO supports PyTorch through conversion to OpenVINO Intermediate Representation (IR) format. To take the advantage of OpenVINO @@ -154,8 +150,8 @@ The model consists of three important parts: Let us convert each part. -Text Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text-encoder is responsible for transforming the input prompt, for example, “a photo of an astronaut riding a horse” into an embedding @@ -174,7 +170,7 @@ hidden states. from pathlib import Path import torch - from openvino import convert_model, save_model + import openvino as ov TEXT_ENCODER_OV_PATH = Path("text_encoder.xml") @@ -196,8 +192,8 @@ hidden states. # disable gradients calculation for reducing memory consumption with torch.no_grad(): # Export model to IR format - ov_model = convert_model(text_encoder, example_input=input_ids, input=[(1,77),]) - save_model(ov_model, ir_path) + ov_model = ov.convert_model(text_encoder, example_input=input_ids, input=[(1,77),]) + ov.save_model(ov_model, ir_path) del ov_model print(f'Text Encoder successfully converted to IR and saved to {ir_path}') @@ -214,7 +210,7 @@ hidden states. .. parsed-literal:: Text encoder will be loaded from text_encoder.xml - + @@ -224,8 +220,8 @@ hidden states. -U-net `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +U-net +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ U-net model has three inputs: @@ -273,8 +269,8 @@ Model predicts the ``sample`` state for the next step. unet.eval() with torch.no_grad(): - ov_model = convert_model(unet, example_input=dummy_inputs, input=input_info) - save_model(ov_model, ir_path) + ov_model = ov.convert_model(unet, example_input=dummy_inputs, input=input_info) + ov.save_model(ov_model, ir_path) del ov_model print(f'Unet successfully converted to IR and saved to {ir_path}') @@ -291,7 +287,7 @@ Model predicts the ``sample`` state for the next step. .. parsed-literal:: Unet will be loaded from unet.xml - + @@ -301,8 +297,8 @@ Model predicts the ``sample`` state for the next step. -VAE `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +VAE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The VAE model has two parts, an encoder and a decoder. The encoder is used to convert the image into a low dimensional latent representation, @@ -347,8 +343,8 @@ of the pipeline, it will be better to convert them to separate models. vae_encoder.eval() image = torch.zeros((1, 3, 512, 512)) with torch.no_grad(): - ov_model = convert_model(vae_encoder, example_input=image, input=[((1,3,512,512),)]) - save_model(ov_model, ir_path) + ov_model = ov.convert_model(vae_encoder, example_input=image, input=[((1,3,512,512),)]) + ov.save_model(ov_model, ir_path) del ov_model print(f'VAE encoder successfully converted to IR and saved to {ir_path}') @@ -384,8 +380,8 @@ of the pipeline, it will be better to convert them to separate models. vae_decoder.eval() with torch.no_grad(): - ov_model = convert_model(vae_decoder, example_input=latents, input=[((1,4,64,64),)]) - save_model(ov_model, ir_path) + ov_model = ov.convert_model(vae_decoder, example_input=latents, input=[((1,4,64,64),)]) + ov.save_model(ov_model, ir_path) del ov_model print(f'VAE decoder successfully converted to IR and saved to {ir_path}') @@ -403,7 +399,7 @@ of the pipeline, it will be better to convert them to separate models. VAE encoder will be loaded from vae_encodr.xml VAE decoder will be loaded from vae_decoder.xml - + @@ -413,8 +409,8 @@ of the pipeline, it will be better to convert them to separate models. -Prepare Inference Pipeline `⇑ <#top>`__ -############################################################################################################################### +Prepare Inference Pipeline +-------------------------------------------------------------------- Putting it all together, let us now take a closer look at how the model works in inference by illustrating the logical flow. @@ -473,7 +469,6 @@ of the variational auto encoder. from transformers import CLIPTokenizer from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler - from openvino.runtime import Model def scale_fit_to_window(dst_width:int, dst_height:int, image_width:int, image_height:int): @@ -525,12 +520,12 @@ of the variational auto encoder. class OVStableDiffusionPipeline(DiffusionPipeline): def __init__( self, - vae_decoder: Model, - text_encoder: Model, + vae_decoder: ov.Model, + text_encoder: ov.Model, tokenizer: CLIPTokenizer, - unet: Model, + unet: ov.Model, scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], - vae_encoder: Model = None, + vae_encoder: ov.Model = None, ): """ Pipeline for text-to-image generation using Stable Diffusion. @@ -815,15 +810,14 @@ of the variational auto encoder. return timesteps, num_inference_steps - t_start -Configure Inference Pipeline ----------------------------- +Configure Inference Pipeline +---------------------------------------------------------------------- First, you should create instances of OpenVINO Model. .. code:: ipython3 - from openvino.runtime import Core - core = Core() + core = ov.Core() Select device from dropdown list for running inference using OpenVINO. @@ -859,8 +853,10 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 - vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value) - vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value) + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + + vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_config) + vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value, ov_config) Model tokenizer and scheduler are also important parts of the pipeline. Let us define them and put all components together @@ -886,8 +882,8 @@ Let us define them and put all components together scheduler=lms ) -Text-to-Image generation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Text-to-Image generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, let’s see model in action @@ -911,7 +907,7 @@ Now, let’s see model in action Input text: RAW studio photo of An intricate forest minitown landscape trapped in a bottle, atmospheric oliva lighting, on the table, intricate details, dark shot, soothing tones, muted colors Seed: 431 Number of steps: 20 - + .. code:: ipython3 @@ -947,16 +943,16 @@ Now is show time! Input text: RAW studio photo of An intricate forest minitown landscape trapped in a bottle, atmospheric oliva lighting, on the table, intricate details, dark shot, soothing tones, muted colors - -.. image:: 251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_33_1.png + +.. image:: 251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.png Nice. As you can see, the picture has quite a high definition 🔥. -Image-to-Image generation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Image-to-Image generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ One of the most amazing features of Stable Diffusion model is the ability to condition image generation from an existing image or sketch. @@ -1021,10 +1017,10 @@ found in this Number of steps: 40 Strength: 0.68 Input image: - -.. image:: 251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_37_1.png + +.. image:: 251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.png @@ -1050,89 +1046,89 @@ found in this Input text: professional photo portrait of woman, highly detailed, hyper realistic, cinematic effects, soft lighting - -.. image:: 251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_39_1.png +.. image:: 251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_39_1.png -.. Interactive Demo `⇑ <#top>`__ -.. ############################################################################################################################### -.. .. code:: ipython3 +Interactive Demo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. import gradio as gr - -.. sample_img_url = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/tower.jpg" - -.. img = load_image(sample_img_url).save("tower.jpg") - -.. def generate_from_text(text, negative_text, seed, num_steps, _=gr.Progress(track_tqdm=True)): -.. result = ov_pipe(text, negative_prompt=negative_text, num_inference_steps=num_steps, seed=seed) -.. return result["sample"][0] - - -.. def generate_from_image(img, text, negative_text, seed, num_steps, strength, _=gr.Progress(track_tqdm=True)): -.. result = ov_pipe(text, img, negative_prompt=negative_text, num_inference_steps=num_steps, seed=seed, strength=strength) -.. return result["sample"][0] - - -.. with gr.Blocks() as demo: -.. with gr.Tab("Text-to-Image generation"): -.. with gr.Row(): -.. with gr.Column(): -.. text_input = gr.Textbox(lines=3, label="Positive prompt") -.. negative_text_input = gr.Textbox(lines=3, label="Negative prompt") -.. seed_input = gr.Slider(0, 10000000, value=751, label="Seed") -.. steps_input = gr.Slider(1, 50, value=20, step=1, label="Steps") -.. out = gr.Image(label="Result", type="pil") -.. sample_text = "futuristic synthwave city, retro sunset, crystals, spires, volumetric lighting, studio Ghibli style, rendered in unreal engine with clean details" -.. sample_text2 = "RAW studio photo of tiny cute happy cat in a yellow raincoat in the woods, rain, a character portrait, soft lighting, high resolution, photo realistic, extremely detailed" -.. negative_sample_text = "" -.. negative_sample_text2 = "bad anatomy, blurry, noisy, jpeg artifacts, low quality, geometry, mutation, disgusting. ugly" -.. btn = gr.Button() -.. btn.click(generate_from_text, [text_input, negative_text_input, seed_input, steps_input], out) -.. gr.Examples([[sample_text, negative_sample_text, 42, 20], [sample_text2, negative_sample_text2, 1561, 25]], [text_input, negative_text_input, seed_input, steps_input]) -.. with gr.Tab("Image-to-Image generation"): -.. with gr.Row(): -.. with gr.Column(): -.. i2i_input = gr.Image(label="Image", type="pil") -.. i2i_text_input = gr.Textbox(lines=3, label="Text") -.. i2i_negative_text_input = gr.Textbox(lines=3, label="Negative prompt") -.. i2i_seed_input = gr.Slider(0, 10000000, value=42, label="Seed") -.. i2i_steps_input = gr.Slider(1, 50, value=10, step=1, label="Steps") -.. strength_input = gr.Slider(0, 1, value=0.5, label="Strength") -.. i2i_out = gr.Image(label="Result", type="pil") -.. i2i_btn = gr.Button() -.. sample_i2i_text = "amazing watercolor painting" -.. i2i_btn.click( -.. generate_from_image, -.. [i2i_input, i2i_text_input, i2i_negative_text_input, i2i_seed_input, i2i_steps_input, strength_input], -.. i2i_out, -.. ) -.. gr.Examples( -.. [["tower.jpg", sample_i2i_text, "", 6400023, 40, 0.3]], -.. [i2i_input, i2i_text_input, i2i_negative_text_input, i2i_seed_input, i2i_steps_input, strength_input], -.. ) +.. code:: ipython3 + + import gradio as gr + + sample_img_url = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/tower.jpg" + + img = load_image(sample_img_url).save("tower.jpg") + + def generate_from_text(text, negative_text, seed, num_steps, _=gr.Progress(track_tqdm=True)): + result = ov_pipe(text, negative_prompt=negative_text, num_inference_steps=num_steps, seed=seed) + return result["sample"][0] + + + def generate_from_image(img, text, negative_text, seed, num_steps, strength, _=gr.Progress(track_tqdm=True)): + result = ov_pipe(text, img, negative_prompt=negative_text, num_inference_steps=num_steps, seed=seed, strength=strength) + return result["sample"][0] + + + with gr.Blocks() as demo: + with gr.Tab("Text-to-Image generation"): + with gr.Row(): + with gr.Column(): + text_input = gr.Textbox(lines=3, label="Positive prompt") + negative_text_input = gr.Textbox(lines=3, label="Negative prompt") + seed_input = gr.Slider(0, 10000000, value=751, label="Seed") + steps_input = gr.Slider(1, 50, value=20, step=1, label="Steps") + out = gr.Image(label="Result", type="pil") + sample_text = "futuristic synthwave city, retro sunset, crystals, spires, volumetric lighting, studio Ghibli style, rendered in unreal engine with clean details" + sample_text2 = "RAW studio photo of tiny cute happy cat in a yellow raincoat in the woods, rain, a character portrait, soft lighting, high resolution, photo realistic, extremely detailed" + negative_sample_text = "" + negative_sample_text2 = "bad anatomy, blurry, noisy, jpeg artifacts, low quality, geometry, mutation, disgusting. ugly" + btn = gr.Button() + btn.click(generate_from_text, [text_input, negative_text_input, seed_input, steps_input], out) + gr.Examples([[sample_text, negative_sample_text, 42, 20], [sample_text2, negative_sample_text2, 1561, 25]], [text_input, negative_text_input, seed_input, steps_input]) + with gr.Tab("Image-to-Image generation"): + with gr.Row(): + with gr.Column(): + i2i_input = gr.Image(label="Image", type="pil") + i2i_text_input = gr.Textbox(lines=3, label="Text") + i2i_negative_text_input = gr.Textbox(lines=3, label="Negative prompt") + i2i_seed_input = gr.Slider(0, 10000000, value=42, label="Seed") + i2i_steps_input = gr.Slider(1, 50, value=10, step=1, label="Steps") + strength_input = gr.Slider(0, 1, value=0.5, label="Strength") + i2i_out = gr.Image(label="Result", type="pil") + i2i_btn = gr.Button() + sample_i2i_text = "amazing watercolor painting" + i2i_btn.click( + generate_from_image, + [i2i_input, i2i_text_input, i2i_negative_text_input, i2i_seed_input, i2i_steps_input, strength_input], + i2i_out, + ) + gr.Examples( + [["tower.jpg", sample_i2i_text, "", 6400023, 40, 0.3]], + [i2i_input, i2i_text_input, i2i_negative_text_input, i2i_seed_input, i2i_steps_input, strength_input], + ) -.. try: -.. demo.queue().launch(debug=True) -.. except Exception: -.. demo.queue().launch(share=True, debug=True) -.. # if you are launching remotely, specify server_name and server_port -.. # demo.launch(server_name='your server name', server_port='server port in int') -.. # Read more in the docs: https://gradio.app/docs/ + try: + demo.queue().launch(debug=False) + except Exception: + demo.queue().launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ -.. .. parsed-literal:: +.. parsed-literal:: -.. Running on local URL: http://127.0.0.1:7860 - -.. To create a public link, set `share=True` in `launch()`. + Running on local URL: http://127.0.0.1:7863 + To create a public link, set `share=True` in `launch()`. + .. .. raw:: html -..
+..
diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.jpg b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.jpg new file mode 100644 index 00000000000000..b5887c4200e06b --- /dev/null +++ b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea967ccd91d70c37cff7181b6332d7059c0df3a55f8e7a10b4a2b282096d320 +size 40294 diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.png b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.png new file mode 100644 index 00000000000000..42f8892e0ed407 --- /dev/null +++ b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_33_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fbf0960faa29476405a3956e1ac33a02f7f2b7f9ede92fdbfc59767af4721af +size 434441 diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.jpg b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.jpg new file mode 100644 index 00000000000000..5134861797a727 --- /dev/null +++ b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca9dee37fa5f3ee5ac7ab8ac7ddab31e98c4d1786b31cecedf6d2cc6afcb29c +size 84339 diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.png b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.png new file mode 100644 index 00000000000000..6f3383b8adee4c --- /dev/null +++ b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_37_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e40daa15ad1a8cefff400ac7ce6762400b27c8f073f4b9fb6cd67aeb50520a +size 770190 diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_39_1.jpg b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_39_1.jpg new file mode 100644 index 00000000000000..dc00510869abea --- /dev/null +++ b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_39_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3871cef661296a7b4dc0b250b0d2faa7608d057e6279c65ac1f1fa1e1051a39 +size 50437 diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_39_1.png b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_39_1.png similarity index 100% rename from docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_39_1.png rename to docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation-with-output_39_1.png diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_33_1.png b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_33_1.png deleted file mode 100644 index 2826eba480a9ea..00000000000000 --- a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_33_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d4313551a427c44d4bba572717be2134c99e0e8785a8f11d5fd2993ccfb10da -size 470354 diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_37_1.png b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_37_1.png deleted file mode 100644 index 7696d04c750a96..00000000000000 --- a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/251-tiny-sd-image-generation_37_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e97e1e206d64979d8fc9aaa6e46786172062c9a9eb1817a351af2a38557a012f -size 835162 diff --git a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/index.html b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/index.html index e69de29bb2d1d6..98e68a3c493cfe 100644 --- a/docs/notebooks/251-tiny-sd-image-generation-with-output_files/index.html +++ b/docs/notebooks/251-tiny-sd-image-generation-with-output_files/index.html @@ -0,0 +1,12 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/251-tiny-sd-image-generation-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/251-tiny-sd-image-generation-with-output_files/


../
+251-tiny-sd-image-generation-with-output_33_1.jpg  31-Oct-2023 00:35               40294
+251-tiny-sd-image-generation-with-output_33_1.png  31-Oct-2023 00:35              434441
+251-tiny-sd-image-generation-with-output_37_1.jpg  31-Oct-2023 00:35               84339
+251-tiny-sd-image-generation-with-output_37_1.png  31-Oct-2023 00:35              770190
+251-tiny-sd-image-generation-with-output_39_1.jpg  31-Oct-2023 00:35               50437
+251-tiny-sd-image-generation-with-output_39_1.png  31-Oct-2023 00:35              699062
+

+ diff --git a/docs/notebooks/252-fastcomposer-image-generation-with-output.rst b/docs/notebooks/252-fastcomposer-image-generation-with-output.rst index d0c9a479aa0f06..4b98381bd103a1 100644 --- a/docs/notebooks/252-fastcomposer-image-generation-with-output.rst +++ b/docs/notebooks/252-fastcomposer-image-generation-with-output.rst @@ -1,8 +1,6 @@ `FastComposer: Tuning-Free Multi-Subject Image Generation with Localized Attention `__ ===================================================================================================================== - - FastComposer uses subject embeddings extracted by an image encoder to augment the generic text conditioning in diffusion models, enabling personalized image generation based on subject images and textual @@ -22,51 +20,50 @@ problems: FastComposer generates images of multiple unseen individuals with different styles, actions, and contexts. -.. image:: 252-fastcomposer-image-generation-with-output_files/multi-subject.png - -.. note:: - - ``model.py`` is slightly changed ``model.py`` from + **NOTE**: ``model.py`` is slightly changed ``model.py`` from fastcomposer repository. There are two main changes: - some unused lines of code are removed to avoid errors if there are no CUDA drivers in the system - changes to have compatibility with transformers >= 4.30.1 (due to security vulnerability) -.. _top: - -**Table of contents**: +**Table of contents:** +--- - `Install Prerequisites <#install-prerequisites>`__ -- `Convert models to OpenVINO Intermediate representation (IR) format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ -- `Convert text_encoder <#convert-text_encoder>`__ -- `The Object Transform <#the-object-transform>`__ -- `The Image Encoder <#the-image-encoder>`__ -- `Postfuse module <#postfuse-module>`__ -- `Convert Unet <#convert-unet>`__ -- `Rebuild pipeline <#rebuild-pipeline>`__ -- `Inference <#inference>`__ -- `Run Gradio <#run-gradio>`__ +- `Convert models to OpenVINO Intermediate representation (IR) + format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ + + - `Convert text_encoder <#convert-text_encoder>`__ + - `The Object Transform <#the-object-transform>`__ + - `The Image Encoder <#the-image-encoder>`__ + - `Postfuse module <#postfuse-module>`__ + - `Convert Unet <#convert-unet>`__ + +- `Rebuild pipeline <#rebuild-pipeline>`__ +- `Inference <#inference>`__ +- `Run Gradio <#run-gradio>`__ -.. important:: +.. container:: alert alert-block alert-warning - This tutorial requires about 25-28GB of free memory to generate one image. Each extra image requires ~11GB of free memory. + :: + This tutorial requires about 25-28GB of free memory to generate one image. Each extra image requires ~11GB of free memory. -Install Prerequisites `⇑ <#top>`__ -############################################################################################################################### +Install Prerequisites +--------------------------------------------------------------- Install required packages. -.. code:: ipython2 +.. code:: ipython3 !pip install -q --upgrade pip - !pip install -q torch torchvision huggingface-hub - !pip install -q transformers accelerate "diffusers==0.16.1" gradio - !pip install -q "openvino==2023.1.0.dev20230811" + !pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision + !pip install -q transformers huggingface-hub accelerate "diffusers==0.16.1" gradio + !pip install -q "openvino>=2023.1.0" Clone FastComposer project from GitHub -.. code:: ipython2 +.. code:: ipython3 from pathlib import Path @@ -79,19 +76,19 @@ Clone FastComposer project from GitHub Download pretrained model. -.. code:: ipython2 +.. code:: ipython3 from huggingface_hub import hf_hub_download model_path = hf_hub_download(repo_id='mit-han-lab/fastcomposer', filename='pytorch_model.bin') -Convert models to OpenVINO Intermediate representation (IR) format `⇑ <#top>`__ -############################################################################################################################### +Convert models to OpenVINO Intermediate representation (IR) format +------------------------------------------------------------------------------------------------------------ Define a configuration and make instance of ``FastComposerModel``. -.. code:: ipython2 +.. code:: ipython3 from dataclasses import dataclass @@ -129,15 +126,15 @@ Pipeline consist of next models: ``Unet``, ``TextEncoder``, So, convert the models into OpenVINO IR format. -Convert text_encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert text_encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Model components are PyTorch modules, that can be converted with openvino.convert_model function directly. We also use openvino.save_model function to serialize the result of conversion. Let’s create a helper function. -.. code:: ipython2 +.. code:: ipython3 import gc import openvino @@ -165,7 +162,7 @@ The input for the text encoder consists of a tensor ``input_ids``, which contains token indices from the text processed by the tokenizer and padded to the maximum length accepted by the model. -.. code:: ipython2 +.. code:: ipython3 text_encoder_ir_xml_path = Path('models/text_encoder_ir.xml') example_input = torch.zeros((1, 77), dtype=torch.int64) @@ -176,13 +173,13 @@ padded to the maximum length accepted by the model. del model.text_encoder gc.collect(); -The Object Transform `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The Object Transform +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It pads an incoming user image to -square and resize it. An input is a tensor of size [3, height, width]. +It pads an incoming user image to square and resize it. An input is a +tensor of size [3, height, width]. -.. code:: ipython2 +.. code:: ipython3 from collections import OrderedDict from torchvision import transforms as T @@ -215,15 +212,14 @@ square and resize it. An input is a tensor of size [3, height, width]. del object_transforms gc.collect(); -The Image Encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The Image Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The image encoder is a CLIP -(Contrastive Language-Image Pretraining) Image Encoder. It takes a -transformed image from the previous step as input and transforms it into -a high-dimensional vector or embeddings. +The image encoder is a CLIP (Contrastive Language-Image Pretraining) +Image Encoder. It takes a transformed image from the previous step as +input and transforms it into a high-dimensional vector or embeddings. -.. code:: ipython2 +.. code:: ipython3 image_encoder_ir_xml_path = Path('models/image_encoder_ir.xml') example_input = torch.zeros((1, 2, 3, 256, 256), dtype=torch.float32) @@ -234,16 +230,16 @@ a high-dimensional vector or embeddings. del model.image_encoder gc.collect(); -Postfuse module `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Postfuse module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -On this step it is employed a multilayer -perceptron (MLP) to augment the text embeddings with visual features -extracted from the reference subjects. The Postfuse module concatenates -the word embeddings with the visual features and feeds the resulting -augmented embeddings into the MLP. +On this step it is employed a multilayer perceptron (MLP) to augment the +text embeddings with visual features extracted from the reference +subjects. The Postfuse module concatenates the word embeddings with the +visual features and feeds the resulting augmented embeddings into the +MLP. -.. code:: ipython2 +.. code:: ipython3 postfuse_module_ir_xml_path = Path('models/postfuse_module_ir.xml') @@ -260,13 +256,13 @@ augmented embeddings into the MLP. del model.postfuse_module gc.collect(); -Convert Unet `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert Unet +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -U-Net model gradually denoises latent image -representation guided by text encoder hidden state. +U-Net model gradually denoises latent image representation guided by +text encoder hidden state. -.. code:: ipython2 +.. code:: ipython3 unet_ir_xml_path = Path('models/unet_ir.xml') @@ -284,14 +280,14 @@ representation guided by text encoder hidden state. gc.collect() -Rebuild pipeline `⇑ <#top>`__ -############################################################################################################################### +Rebuild pipeline +---------------------------------------------------------- -Also, it needs to modify some internal -FastComposer entities, to use OpenVINO models. First of all, how to get -results. For example, to convert outputs from numpy to torch types. +Also, it needs to modify some internal FastComposer entities, to use +OpenVINO models. First of all, how to get results. For example, to +convert outputs from numpy to torch types. -.. code:: ipython2 +.. code:: ipython3 import numpy as np from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -784,7 +780,7 @@ results. For example, to convert outputs from numpy to torch types. And replace all model in the pipeline by converted models. -.. code:: ipython2 +.. code:: ipython3 import PIL from transformers import CLIPTokenizer @@ -904,16 +900,16 @@ And replace all model in the pipeline by converted models. ) ) -Inference `⇑ <#top>`__ -############################################################################################################################### +Inference +--------------------------------------------------- -And now it is possible to make inference. You -can provide 1 or 2 images (``image1`` and ``image2``). If you want to -provide only one image pass in inference ``None`` instead image. -``prompt`` describes context in what objects from user images will be -generated. Word ``img`` is a token that correlates with input images. +And now it is possible to make inference. You can provide 1 or 2 images +(``image1`` and ``image2``). If you want to provide only one image pass +in inference ``None`` instead image. ``prompt`` describes context in +what objects from user images will be generated. Word ``img`` is a token +that correlates with input images. -.. code:: ipython2 +.. code:: ipython3 image1 = Image.open('fastcomposer/data/newton_einstein/einstein/0.png') image2 = Image.open('fastcomposer/data/newton_einstein/newton/0.png') @@ -941,16 +937,16 @@ generated. Word ``img`` is a token that correlates with input images. Result consists of several (``num_images``) images and now it possible to display them. -.. code:: ipython2 +.. code:: ipython3 display(result[0][0]) -Run Gradio `⇑ <#top>`__ -############################################################################################################################### +Run Gradio +---------------------------------------------------- Also, it is possible to run with Gradio -.. code:: ipython2 +.. code:: ipython3 import gradio as gr @@ -1061,9 +1057,9 @@ Also, it is possible to run with Gradio if __name__ == "__main__": try: - demo.launch(debug=True) + demo.launch(debug=False) except Exception: - demo.launch(share=True, debug=True) + demo.launch(share=True, debug=False) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/252-fastcomposer-image-generation-with-output_files/multi-subject.png b/docs/notebooks/252-fastcomposer-image-generation-with-output_files/multi-subject.png deleted file mode 100644 index 306c414299d0f6..00000000000000 --- a/docs/notebooks/252-fastcomposer-image-generation-with-output_files/multi-subject.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8760283d06f1b29e26a3f684c22afe65d809208a2cd624b70acda3e6a9b87a1f -size 16854851 diff --git a/docs/notebooks/253-zeroscope-text2video-with-output.rst b/docs/notebooks/253-zeroscope-text2video-with-output.rst index 549a1ce04e5bfa..575ef2126af15a 100644 --- a/docs/notebooks/253-zeroscope-text2video-with-output.rst +++ b/docs/notebooks/253-zeroscope-text2video-with-output.rst @@ -1,8 +1,6 @@ Video generation with ZeroScope and OpenVINO ============================================ - - The ZeroScope model is a free and open-source text-to-video model that can generate realistic and engaging videos from text descriptions. It is based on the @@ -34,36 +32,33 @@ Both versions of the ZeroScope model are available on Hugging Face: We will use the first one. -.. _top: - -**Table of contents**: - -- `Install and import required packages <#install-and-import-required-packages>`__ -- `Load the model <#load-the-model>`__ -- `Convert the model <#convert-the-model>`__ +**Table of contents:** +--- - - `Define the conversion function <#define-the-conversion-function>`__ - - `UNet <#unet>`__ - - - `VAE <#vae>`__ - - `Text encoder <#text-encoder>`__ +- 1. `Install and import required packages <#install-and-import-required-packages>`__ +- 2. `Load the model <#load-the-model>`__ +- 3. `Convert the model <#convert-the-model>`__ -- `Build a pipeline <#build-a-pipeline>`__ -- `Inference with OpenVINO <#inference-with-openvino>`__ + - 3.1. `Define the conversion function <#define-the-conversion-function>`__ + - 3.2. `UNet <#unet>`__ + - 3.3. `VAE <#vae>`__ + - 3.4. `Text encoder <#text-encoder>`__ - - `Select inference device <#select-inference-device>`__ - - `Define a prompt <#define-a-prompt>`__ - - `Video generation <#video-generation>`__ +- 4. `Build a pipeline <#build-a-pipeline>`__ +- 5. `Inference with OpenVINO <#inference-with-openvino>`__ + - 5.1. `Select inference device <#select-inference-device>`__ + - 5.2. `Define a prompt <#define-a-prompt>`__ + - 5.3. `Video generation <#video-generation>`__ -.. important:: +.. container:: alert alert-block alert-warning - This tutorial requires at least 24GB of free memory to generate a video with - a frame size of 432x240 and 16 frames. Increasing either of these values will - require more memory and take more time. + :: + This tutorial requires at least 24GB of free memory to generate a video with a frame size of 432x240 and 16 frames. Increasing either of these values will require more memory and take more time. -Install and import required packages `⇑ <#top>`__ -############################################################################################################################### +Install and import required packages +------------------------------------ To work with text-to-video synthesis model, we will use Hugging Face’s `Diffusers `__ library. It @@ -71,12 +66,11 @@ provides already pretrained model from ``cerspense``. .. code:: ipython3 - !pip install -q "diffusers[torch]>=0.15.0" transformers "openvino==2023.1.0.dev20230811" numpy gradio + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "diffusers[torch]>=0.18.0" transformers "openvino>=2023.1.0" numpy gradio .. code:: ipython3 import gc - from pathlib import Path from typing import Optional, Union, List, Callable import base64 import tempfile @@ -96,13 +90,11 @@ provides already pretrained model from ``cerspense``. .. parsed-literal:: - 2023-08-16 21:15:40.145184: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-16 21:15:40.146998: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2023-08-16 21:15:40.179214: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2023-08-16 21:15:40.180050: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-09-27 09:46:10.119370: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-27 09:46:10.159667: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-16 21:15:40.750499: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - + 2023-09-27 09:46:10.735453: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + Original 576x320 inference requires a lot of RAM (>100GB), so let’s run our example on a smaller frame size, keeping the same aspect ratio. Try @@ -114,8 +106,8 @@ reducing values below to reduce the memory consumption. HEIGHT = 240 # must be divisible by 8 NUM_FRAMES = 16 -Load the model `⇑ <#top>`__ -############################################################################################################################### +Load the model +-------------- The model is loaded from HuggingFace using ``.from_pretrained`` method of ``diffusers.DiffusionPipeline``. @@ -127,8 +119,8 @@ of ``diffusers.DiffusionPipeline``. .. parsed-literal:: - vae/diffusion_pytorch_model.safetensors not found - + unet/diffusion_pytorch_model.safetensors not found + .. parsed-literal:: @@ -153,8 +145,8 @@ of ``diffusers.DiffusionPipeline``. del pipe gc.collect(); -Convert the model `⇑ <#top>`__ -############################################################################################################################### +Convert the model +----------------- The architecture for generating videos from text comprises three distinct sub-networks: one for extracting text features, another for @@ -166,11 +158,12 @@ diffusion model is built upon the Unet3D model and achieves video generation by iteratively denoising a starting point of pure Gaussian noise video. -.. image:: 253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_02.png +|image0| +.. |image0| image::  -Define the conversion function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Define the conversion function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Model components are PyTorch modules, that can be converted with ``ov.convert_model`` function directly. We also use ``ov.save_model`` @@ -182,6 +175,8 @@ function to serialize the result of conversion. .. code:: ipython3 + from pathlib import Path + def convert(model: torch.nn.Module, xml_path: str, **convert_kwargs) -> Path: xml_path = Path(xml_path) if not xml_path.exists(): @@ -189,15 +184,15 @@ function to serialize the result of conversion. with torch.no_grad(): converted_model = ov.convert_model(model, **convert_kwargs) ov.save_model(converted_model, xml_path) - del converted model + del converted_model gc.collect() torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() torch.jit._state._clear_class_state() return xml_path -UNet `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +UNet +~~~~ Text-to-video generation pipeline main component is a conditional 3D UNet model that takes a noisy sample, conditional state, and a timestep @@ -209,7 +204,7 @@ and returns a sample shaped output. unet, "models/unet.xml", example_input={ - "sample": torch.randn(2, 4, 2, 32, 32), + "sample": torch.randn(2, 4, 2, int(sample_height // 2), int(sample_width // 2)), "timestep": torch.tensor(1), "encoder_hidden_states": torch.randn(2, 77, 1024), }, @@ -222,19 +217,8 @@ and returns a sample shaped output. del unet gc.collect(); - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - - -VAE `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +VAE +~~~ Variational autoencoder (VAE) uses UNet output to decode latents to visual representations. Our VAE model has KL loss for encoding images @@ -262,8 +246,8 @@ inference, we need only decoder part. del vae gc.collect(); -Text encoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Text encoder +~~~~~~~~~~~~ Text encoder is used to encode the input prompt to tensor. Default tensor length is 77. @@ -274,13 +258,13 @@ tensor length is 77. text_encoder, "models/text_encoder.xml", example_input=torch.ones(1, 77, dtype=torch.int64), - input=((1, 77), (ov.Type.i64,)), + input=((1, 77), ov.Type.i64), ) del text_encoder gc.collect(); -Build a pipeline `⇑ <#top>`__ -############################################################################################################################### +Build a pipeline +---------------- .. code:: ipython3 @@ -303,6 +287,13 @@ Build a pipeline `⇑ <#top>`__ .. code:: ipython3 + try: + from diffusers.utils import randn_tensor + except ImportError: + from diffusers.utils.torch_utils import randn_tensor + + + class OVTextToVideoSDPipeline(diffusers.DiffusionPipeline): def __init__( self, @@ -658,7 +649,7 @@ Build a pipeline `⇑ <#top>`__ ) if latents is None: - latents = diffusers.utils.randn_tensor(shape, generator=generator, dtype=dtype) + latents = randn_tensor(shape, generator=generator, dtype=dtype) # scale the initial noise by the standard deviation required by the scheduler latents = latents * self.scheduler.init_noise_sigma @@ -738,15 +729,15 @@ Build a pipeline `⇑ <#top>`__ video = video.float() return video -Inference with OpenVINO `⇑ <#top>`__ -############################################################################################################################### +Inference with OpenVINO +----------------------- .. code:: ipython3 core = ov.Core() -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ select device from dropdown list for running inference using OpenVINO @@ -766,7 +757,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=2, options=('CPU', 'GNA', 'AUTO'), value='AUTO') @@ -778,9 +769,9 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - CPU times: user 14.1 s, sys: 5.62 s, total: 19.7 s - Wall time: 10.6 s - + CPU times: user 10.9 s, sys: 4.63 s, total: 15.5 s + Wall time: 8.67 s + .. code:: ipython3 @@ -790,9 +781,9 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - CPU times: user 456 ms, sys: 320 ms, total: 776 ms - Wall time: 328 ms - + CPU times: user 432 ms, sys: 251 ms, total: 683 ms + Wall time: 337 ms + .. code:: ipython3 @@ -802,9 +793,9 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - CPU times: user 1.78 s, sys: 1.44 s, total: 3.22 s - Wall time: 1.13 s - + CPU times: user 1.23 s, sys: 1.19 s, total: 2.43 s + Wall time: 1.11 s + Here we replace the pipeline parts with versions converted to OpenVINO IR and compiled to specific device. Note that we use original pipeline @@ -814,8 +805,8 @@ tokenizer and scheduler. ov_pipe = OVTextToVideoSDPipeline(ov_vae_decoder, ov_text_encoder, tokenizer, ov_unet, scheduler) -Define a prompt `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Define a prompt +~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -823,10 +814,10 @@ Define a prompt `⇑ <#top>`__ Let’s generate a video for our prompt. For full list of arguments, see ``__call__`` function definition of ``OVTextToVideoSDPipeline`` class in -`Build a pipeline <#Build-a-pipeline>`__ section. +`Build a pipeline <#build-a-pipeline>`__ section. -Video generation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Video generation +~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -848,11 +839,16 @@ Video generation `⇑ <#top>`__ IPython.display.HTML(f"") -.. image:: 253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_03.gif -Interactive demo `⇑ <#top>`__ -############################################################################################################################### +.. raw:: html + + + + + +Interactive demo +---------------- .. code:: ipython3 @@ -871,8 +867,8 @@ Interactive demo `⇑ <#top>`__ out_file, save_all=True, append_images=images[1:], duration=125, loop=0 ) return out_file.name - - + + demo = gr.Interface( generate, [ @@ -888,11 +884,25 @@ Interactive demo `⇑ <#top>`__ ], allow_flagging="never" ) - + try: - demo.queue().launch(debug=True) + demo.queue().launch(debug=False) except Exception: - demo.queue().launch(share=True, debug=True) + demo.queue().launch(share=True, debug=False) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ diff --git a/docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_02.png b/docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_02.png deleted file mode 100644 index ee9c0e1f4275a2..00000000000000 --- a/docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_02.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f9b3abdf1818a885d159961285a1ef96a2c0c0c99d26eac96435b7813e28198d -size 41341 diff --git a/docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_03.gif b/docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_03.gif deleted file mode 100644 index 987b9debc4cf17..00000000000000 --- a/docs/notebooks/253-zeroscope-text2video-with-output_files/253-zeroscope-text2video-with-output_01_03.gif +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c0786f897470a25d935d1f5e096132f086c7f96f42d441102f598828d6d39452 -size 1366066 diff --git a/docs/notebooks/254-llm-chatbot-with-output.rst b/docs/notebooks/254-llm-chatbot-with-output.rst new file mode 100644 index 00000000000000..18a150369530cd --- /dev/null +++ b/docs/notebooks/254-llm-chatbot-with-output.rst @@ -0,0 +1,1040 @@ +Create an LLM-powered Chatbot using OpenVINO +============================================ + +In the rapidly evolving world of artificial intelligence (AI), chatbots +have emerged as powerful tools for businesses to enhance customer +interactions and streamline operations. Large Language Models (LLMs) are +artificial intelligence systems that can understand and generate human +language. They use deep learning algorithms and massive amounts of data +to learn the nuances of language and produce coherent and relevant +responses. While a decent intent-based chatbot can answer basic, +one-touch inquiries like order management, FAQs, and policy questions, +LLM chatbots can tackle more complex, multi-touch questions. LLM enables +chatbots to provide support in a conversational manner, similar to how +humans do, through contextual memory. Leveraging the capabilities of +Language Models, chatbots are becoming increasingly intelligent, capable +of understanding and responding to human language with remarkable +accuracy. + +Previously, we already discussed how to build an instruction-following +pipeline using OpenVINO and Optimum Intel, please check out `Dolly +example <../240-dolly-2-instruction-following>`__ for reference. In this +tutorial, we consider how to use the power of OpenVINO for running Large +Language Models for chat. We will use a pre-trained model from the +`Hugging Face +Transformers `__ +library. To simplify the user experience, the `Hugging Face Optimum +Intel `__ library is +used to convert the models to OpenVINO™ IR format. + +The tutorial consists of the following steps: + +- Install prerequisites +- Download and convert the model from a public source using the + `OpenVINO integration with Hugging Face + Optimum `__. +- Compress model weights to INT8 precision using + `NNCF `__ +- Create a chat inference pipeline +- Run chat pipeline + +**Table of contents:** + +- `Prerequisites <#prerequisites>`__ +- `Select model for inference <#select-model-for-inference>`__ +- `Instantiate Model using Optimum Intel <#instantiate-model-using-optimum-intel>`__ +- `Compress model weights <#compress-model-weights>`__ +- `Weights Compression using Optimum Intel <#weights-compression-using-optimum-intel>`__ +- `Weights Compression using NNCF <#weights-compression-using-nncf->`__ +- `Select device for inference and model variant <#select-device-for-inference-and-model-variant->`__ +- `Run Chatbot <#run-chatbot>`__ + +Prerequisites +------------------------------------------------------- + +Install required dependencies + +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu\ + "git+https://github.com/huggingface/optimum-intel.git"\ + "nncf>=2.6.0"\ + "gradio"\ + "onnx" "onnxruntime" "einops" "transformers>=4.31.0"\ + "openvino==2023.2.0.dev20230922" + +Select model for inference +-------------------------------------------------------------------- + +The tutorial supports different models, you can select one from the +provided options to compare the quality of open source LLM solutions. +>\ **Note**: conversion of some models can require additional actions +from user side and at least 64GB RAM for conversion. + +The available options are: + +- **red-pajama-3b-chat** - A 2.8B parameter pre-trained language model + based on GPT-NEOX architecture. It was developed by Together Computer + and leaders from the open-source AI community. The model is + fine-tuned on OASST1 and Dolly2 datasets to enhance chatting ability. + More details about model can be found in `HuggingFace model + card `__. +- **llama-2-7b-chat** - LLama 2 is the second generation of LLama + models developed by Meta. Llama 2 is a collection of pre-trained and + fine-tuned generative text models ranging in scale from 7 billion to + 70 billion parameters. llama-2-7b-chat is 7 billions parameters + version of LLama 2 finetuned and optimized for dialogue use case. + More details about model can be found in the + `paper `__, + `repository `__ and + `HuggingFace model + card `__ + >\ **Note**: run model with demo, you will need to accept license + agreement. >You must be a registered user in 🤗 Hugging Face Hub. + Please visit `HuggingFace model + card `__, + carefully read terms of usage and click accept button. You will need + to use an access token for the code below to run. For more + information on access tokens, refer to `this section of the + documentation `__. + >You can login on Hugging Face Hub in notebook environment, using + following code: + +.. code:: python + + ## login to huggingfacehub to get access to pretrained model + from huggingface_hub import notebook_login, whoami + + try: + whoami() + print('Authorization token already provided') + except OSError: + notebook_login() + +- **mpt-7b-chat** - MPT-7B is part of the family of + MosaicPretrainedTransformer (MPT) models, which use a modified + transformer architecture optimized for efficient training and + inference. These architectural changes include performance-optimized + layer implementations and the elimination of context length limits by + replacing positional embeddings with Attention with Linear Biases + (`ALiBi `__). Thanks to these + modifications, MPT models can be trained with high throughput + efficiency and stable convergence. MPT-7B-chat is a chatbot-like + model for dialogue generation. It was built by finetuning MPT-7B on + the + `ShareGPT-Vicuna `__, + `HC3 `__, + `Alpaca `__, + `HH-RLHF `__, and + `Evol-Instruct `__ + datasets. More details about the model can be found in `blog + post `__, + `repository `__ and + `HuggingFace model + card `__. + +.. code:: ipython3 + + from config import SUPPORTED_MODELS + import ipywidgets as widgets + +.. code:: ipython3 + + model_ids = list(SUPPORTED_MODELS) + + model_id = widgets.Dropdown( + options=model_ids, + value=model_ids[0], + description='Model:', + disabled=False, + ) + + model_id + + + + +.. parsed-literal:: + + Dropdown(description='Model:', options=('red-pajama-3b-chat', 'llama-2-chat-7b', 'mpt-7b-chat'), value='red-pa… + + + +.. code:: ipython3 + + model_configuration = SUPPORTED_MODELS[model_id.value] + print(f"Selected model {model_id.value}") + + +.. parsed-literal:: + + Selected model red-pajama-3b-chat + + +Instantiate Model using Optimum Intel +------------------------------------------------------------------------------- + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ and +create pipelines to run an inference with OpenVINO Runtime using Hugging +Face APIs. The Optimum Inference models are API compatible with Hugging +Face Transformers models. This means we just need to replace +``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` +class. + +Below is an example of the RedPajama model + +.. code:: diff + + -from transformers import AutoModelForCausalLM + +from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline + + model_id = "togethercomputer/RedPajama-INCITE-Chat-3B-v1" + -model = AutoModelForCausalLM.from_pretrained(model_id) + +model = OVModelForCausalLM.from_pretrained(model_id, export=True) + +Model class initialization starts with calling ``from_pretrained`` +method. When downloading and converting Transformers model, the +parameter ``export=True`` should be added. We can save the converted +model for the next usage with the ``save_pretrained`` method. Tokenizer +class and pipelines API are compatible with Optimum models. + +To optimize the generation process and use memory more efficiently, the +``use_cache=True`` option is enabled. Since the output side is +auto-regressive, an output token hidden state remains the same once +computed for every further generation step. Therefore, recomputing it +every time you want to generate a new token seems wasteful. With the +cache, the model saves the hidden state once it has been computed. The +model only computes the one for the most recently generated output token +at each time step, re-using the saved ones for hidden tokens. This +reduces the generation complexity from O(n^3) to O(n^2) for a +transformer model. More details about how it works can be found in this +`article `__. +With this option, the model gets the previous step’s hidden states +(cached attention keys and values) as input and additionally provides +hidden states for the current step as output. It means for all next +iterations, it is enough to provide only a new token obtained from the +previous step and cached key values to get the next token prediction. + +In our case, MPT model currently is not covered by Optimum Intel, we +will convert it manually and create wrapper compatible with Optimum +Intel. + +Compress model weights +---------------------------------------------------------------- + +The Weights Compression algorithm is aimed at compressing the weights of +the models and can be used to optimize the model footprint and +performance of large models where the size of weights is relatively +larger than the size of activations, for example, Large Language Models +(LLM). + +Weights Compression using Optimum Intel +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To enable weights compression via NNCF for models supported by Optimum +Intel ``OVQuantizer`` class should be used instantiated by PyTorch model +using ``from_pretrained`` method. +``OVQuantizer.quantize(save_directory=save_dir, weights_only=True)`` +enables weights compression and model conversion to OpenVINO +Intermediate Representation format. We will consider how to do it on +RedPajama and LLAMA examples. + + **Note**: This tutorial involves conversion model for both FP16 and + INT8 weights compression scenarios. It maybe memory and + time-consuming in first run. You can manually disable FP16 conversion + using CONVERT_FP16 variable below, CONVERT_INT8 variable can be used + for disabling conversion model with weights compression respectively. + +.. code:: ipython3 + + CONVERT_FP16 = True + CONVERT_INT8 = True + +.. code:: ipython3 + + from pathlib import Path + from optimum.intel import OVQuantizer + from transformers import AutoModelForCausalLM + from optimum.intel.openvino import OVModelForCausalLM + import logging + import nncf + import gc + + nncf.set_log_level(logging.ERROR) + + compressed_model_dir = Path(model_id.value) / "INT8_compressed_weights" + model_dir = Path(model_id.value) / "FP16" + pt_model_id = model_configuration["model_id"] + + if "mpt" not in model_id.value: + if CONVERT_INT8 and not compressed_model_dir.exists(): + pt_model = AutoModelForCausalLM.from_pretrained(pt_model_id) + quantizer = OVQuantizer.from_pretrained(pt_model) + quantizer.quantize(save_directory=compressed_model_dir, weights_only=True) + del quantizer + del pt_model + gc.collect() + + if CONVERT_FP16 and not model_dir.exists(): + ov_model = OVModelForCausalLM.from_pretrained(pt_model_id, export=True, compile=False) + ov_model.half() + ov_model.save_pretrained(model_dir) + del ov_model + gc.collect(); + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + 2023-09-19 19:06:00.934297: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-09-19 19:06:00.971948: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-09-19 19:06:01.591238: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + warnings.warn( + + +Weights Compression using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You also can perform weights compression for PyTorch models using NNCF +directly. ``nncf.compress_weights`` function accept PyTorch model +instance and compress its weights for Linear and Embedding layers. We +will consider this variant based on MPT model. + +To begin compression, we should define model conversion first. + +.. code:: ipython3 + + from functools import wraps + import torch + from transformers import AutoModelForCausalLM + from nncf import compress_weights + import openvino as ov + from typing import Optional, Union, Dict, Tuple, List + + def flattenize_inputs(inputs): + """ + Helper function for making nested inputs flattens + """ + flatten_inputs = [] + for input_data in inputs: + if input_data is None: + continue + if isinstance(input_data, (list, tuple)): + flatten_inputs.extend(flattenize_inputs(input_data)) + else: + flatten_inputs.append(input_data) + return flatten_inputs + + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + def convert_mpt(pt_model:torch.nn.Module, model_path:Path): + """ + MPT model conversion function + + Params: + pt_model: PyTorch model + model_path: path for saving model + Returns: + None + """ + ov_out_path = Path(model_path) / "openvino_model.xml" + pt_model.config.save_pretrained(ov_out_path.parent) + pt_model.config.use_cache = True + outs = pt_model(input_ids=torch.ones((1, 10), dtype=torch.long), attention_mask=torch.ones((1, 10), dtype=torch.long)) + inputs = ["input_ids"] + outputs = ["logits"] + + dynamic_shapes = {"input_ids": {1: "seq_len"}, "attention_mask": {1: "seq_len"}} + for idx in range(len(outs.past_key_values)): + inputs.extend([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"]) + dynamic_shapes[inputs[-1]] = {2: "past_sequence + sequence"} + dynamic_shapes[inputs[-2]] = {3: "past_sequence + sequence"} + outputs.extend([f"present.{idx}.key", f"present.{idx}.value"]) + + inputs.append("attention_mask") + dummy_inputs = {"input_ids": torch.ones((1,2), dtype=torch.long), "past_key_values": outs.past_key_values, "attention_mask": torch.ones((1,12), dtype=torch.long)} + pt_model.config.torchscript = True + orig_forward = pt_model.forward + @wraps(orig_forward) + def ts_patched_forward(input_ids: torch.Tensor, past_key_values: Tuple[Tuple[torch.Tensor]], attention_mask: torch.Tensor): + pkv_list = list(past_key_values) + outs = orig_forward(input_ids=input_ids, past_key_values=pkv_list, attention_mask=attention_mask) + return (outs.logits, tuple(outs.past_key_values)) + pt_model.forward = ts_patched_forward + ov_model = ov.convert_model(pt_model, example_input=dummy_inputs) + pt_model.forward = orig_forward + for inp_name, m_input, input_data in zip(inputs, ov_model.inputs, flattenize_inputs(dummy_inputs.values())): + input_node = m_input.get_node() + if input_node.element_type == ov.Type.dynamic: + m_input.get_node().set_element_type(ov.Type.f32) + shape = list(input_data.shape) + if inp_name in dynamic_shapes: + for k in dynamic_shapes[inp_name]: + shape[k] = -1 + input_node.set_partial_shape(ov.PartialShape(shape)) + m_input.get_tensor().set_names({inp_name}) + + for out, out_name in zip(ov_model.outputs, outputs): + out.get_tensor().set_names({out_name}) + + ov_model.validate_nodes_and_infer_types() + ov.save_model(ov_model, ov_out_path) + del ov_model + cleanup_torchscript_cache() + del pt_model + +Now, we know how to convert model to OpenVINO format, we can save +floating point and compressed model variants + +.. code:: ipython3 + + compressed_model_dir = Path(model_id.value) / "INT8_compressed_weights" + model_dir = Path(model_id.value) / "FP16" + + if "mpt" in model_id.value and (not compressed_model_dir.exists() or not model_dir.exists()): + model = AutoModelForCausalLM.from_pretrained(model_configuration["model_id"], torch_dtype=torch.float32, trust_remote_code=True) + if CONVERT_FP16 and not model_dir.exists(): + convert_mpt(model, model_dir) + if CONVERT_INT8 and not compressed_model_dir.exists(): + compressed_model = compress_weights(model) + convert_mpt(compressed_model, compressed_model_dir) + + gc.collect(); + +.. code:: ipython3 + + fp16_weights = model_dir / "openvino_model.bin" + int8_weights = compressed_model_dir / "openvino_model.bin" + + if fp16_weights.exists(): + print(f'Size of FP16 model in MB is {fp16_weights.stat().st_size / 1024 / 1024}') + if int8_weights.exists(): + print(f'Size of model with INT8 compressed weights in MB is {int8_weights.stat().st_size / 1024 / 1024}') + if int8_weights.exists() and fp16_weights.exists(): + print(f"Model compression rate: {fp16_weights.stat().st_size / int8_weights.stat().st_size:.3f}") + + +.. parsed-literal:: + + Size of FP16 model in MB is 5299.166286468506 + Size of model with INT8 compressed weights in MB is 2659.578887939453 + Model compression rate: 1.992 + + +Select device for inference and model variant +--------------------------------------------------------------------------------------- + +.. code:: ipython3 + + core = ov.Core() + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='CPU', + description='Device:', + disabled=False, + ) + + + + +.. parsed-literal:: + + VBox(children=(Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU'), Checkbox(value=Tr… + + + +.. code:: ipython3 + + int8_compressed_weights = widgets.Checkbox( + value=True, + description='Use compressed weights', + disabled=False + ) + + widgets.VBox([device, int8_compressed_weights]) + +The cell below create ``OVMPTModel`` model wrapper based on +``OVModelForCausalLM`` model. + +.. code:: ipython3 + + from transformers import AutoConfig + import torch + + from optimum.intel.openvino import OVModelForCausalLM + from optimum.utils import NormalizedTextConfig, NormalizedConfigManager + from transformers.modeling_outputs import CausalLMOutputWithPast + import numpy as np + from pathlib import Path + + + class OVMPTModel(OVModelForCausalLM): + """ + Optimum intel compatible model wrapper for MPT + """ + def __init__( + self, + model: "Model", + config: "PretrainedConfig" = None, + device: str = "CPU", + dynamic_shapes: bool = True, + ov_config: Optional[Dict[str, str]] = None, + model_save_dir: Optional[Union[str, Path]] = None, + **kwargs, + ): + NormalizedConfigManager._conf["mpt"] = NormalizedTextConfig.with_args(num_layers="n_layers", num_attention_heads="n_heads") + super().__init__(model, config, device, dynamic_shapes, ov_config, model_save_dir, **kwargs) + + def _reshape( + self, + model: "Model", + *args, + **kwargs + ): + shapes = {} + for inputs in model.inputs: + shapes[inputs] = inputs.get_partial_shape() + if shapes[inputs].rank.get_length() in [2, 3]: + shapes[inputs][1] = -1 + else: + if ".key" in inputs.get_any_name(): + shapes[inputs][3] = -1 + else: + shapes[inputs][2] = -1 + + model.reshape(shapes) + return model + + def forward( + self, + input_ids: torch.LongTensor, + attention_mask: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + **kwargs, + ) -> CausalLMOutputWithPast: + self.compile() + + if self.use_cache and past_key_values is not None: + input_ids = input_ids[:, -1:] + + inputs = {} + if past_key_values is not None: + # Flatten the past_key_values + past_key_values = tuple( + past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer + ) + # Add the past_key_values to the decoder inputs + inputs = dict(zip(self.key_value_input_names, past_key_values)) + + # Create empty past_key_values for decoder_with_past first generation step + elif self.use_cache: + shape_input_ids = input_ids.shape + num_attention_heads = ( + self.normalized_config.num_attention_heads if self.config.model_type == "bloom" else 1 + ) + for input_name in self.key_value_input_names: + model_inputs = self.model.input(input_name) + shape = model_inputs.get_partial_shape() + shape[0] = shape_input_ids[0] * num_attention_heads + if shape[2].is_dynamic: + shape[2] = 0 + if shape[1].is_dynamic: + shape[1] = 0 + if shape.rank.get_length() == 4 and shape[3].is_dynamic: + shape[3] = 0 + inputs[input_name] = ov.Tensor(model_inputs.get_element_type(), shape.get_shape()) + + inputs["input_ids"] = np.array(input_ids) + + # Add the attention_mask inputs when needed + if "attention_mask" in self.input_names and attention_mask is not None: + inputs["attention_mask"] = np.array(attention_mask) + + # Run inference + self.request.start_async(inputs, shared_memory=True) + self.request.wait() + + logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device) + + if self.use_cache: + # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) + past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) + # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) + past_key_values = tuple( + past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv) + ) + else: + past_key_values = None + + return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) + +The cell below demonstrates how to instantiate model based on selected +variant of model weights and inference device + +.. code:: ipython3 + + from pathlib import Path + from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer + + model_dir = Path(model_id.value) / ("FP16" if not int8_compressed_weights.value else "INT8_compressed_weights") + model_name = model_configuration["model_id"] + + ov_config = {'PERFORMANCE_HINT': 'LATENCY', 'NUM_STREAMS': '1', "CACHE_DIR": ""} + + tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + + model_class = OVModelForCausalLM if "mpt" not in model_id.value else OVMPTModel + ov_model = model_class.from_pretrained(model_dir, device=device.value, ov_config=ov_config, config=AutoConfig.from_pretrained(model_dir, trust_remote_code=True), trust_remote_code=True) + + +.. parsed-literal:: + + The argument `trust_remote_code` is to be used along with export=True. It will be ignored. + Compiling the model... + + +.. code:: ipython3 + + tokenizer_kwargs = model_configuration.get("tokenizer_kwargs", {}) + test_string = "2 + 2 =" + input_tokens = tok(test_string, return_tensors="pt", **tokenizer_kwargs) + answer = ov_model.generate(**input_tokens, max_new_tokens=2) + print(tok.batch_decode(answer)[0]) + + +.. parsed-literal:: + + Setting `pad_token_id` to `eos_token_id`:0 for open-end generation. + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_decoder.py:364: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + self.request.start_async(inputs, shared_memory=True) + + +.. parsed-literal:: + + 2 + 2 = 4. + + +Run Chatbot +----------------------------------------------------- + +Now, when model created, we can setup Chatbot interface using +`Gradio `__. The diagram below illustrates how +the chatbot pipeline works + +.. figure:: https://user-images.githubusercontent.com/29454499/255523209-d9336491-c7ba-4dc1-98f0-07f23743ce89.png + :alt: generation pipeline + + generation pipeline + +As can be seen, the pipeline very similar to instruction-following with +only changes that previous conversation history additionally passed as +input with next user question for getting wider input context. On the +first iteration, the user provided instructions joined to conversation +history (if exists) converted to token ids using a tokenizer, then +prepared input provided to the model. The model generates probabilities +for all tokens in logits format The way the next token will be selected +over predicted probabilities is driven by the selected decoding +methodology. You can find more information about the most popular +decoding methods in this +`blog `__. The result +generation updates conversation history for next conversation step. it +makes stronger connection of next question with previously provided and +allows user to make clarifications regarding previously provided +answers. + +| There are several parameters that can control text generation quality: + \* ``Temperature`` is a parameter used to control the level of + creativity in AI-generated text. By adjusting the ``temperature``, you + can influence the AI model’s probability distribution, making the text + more focused or diverse. +| Consider the following example: The AI model has to complete the + sentence “The cat is \____.” with the following token probabilities: + +:: + + playing: 0.5 + sleeping: 0.25 + eating: 0.15 + driving: 0.05 + flying: 0.05 + + - **Low temperature** (e.g., 0.2): The AI model becomes more focused and deterministic, choosing tokens with the highest probability, such as "playing." + - **Medium temperature** (e.g., 1.0): The AI model maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias, such as "playing," "sleeping," or "eating." + - **High temperature** (e.g., 2.0): The AI model becomes more adventurous, increasing the chances of selecting less likely tokens, such as "driving" and "flying." + +- ``Top-p``, also known as nucleus sampling, is a parameter used to + control the range of tokens considered by the AI model based on their + cumulative probability. By adjusting the ``top-p`` value, you can + influence the AI model’s token selection, making it more focused or + diverse. Using the same example with the cat, consider the following + top_p settings: + + - **Low top_p** (e.g., 0.5): The AI model considers only tokens with + the highest cumulative probability, such as “playing.” + - **Medium top_p** (e.g., 0.8): The AI model considers tokens with a + higher cumulative probability, such as “playing,” “sleeping,” and + “eating.” + - **High top_p** (e.g., 1.0): The AI model considers all tokens, + including those with lower probabilities, such as “driving” and + “flying.” + +- ``Top-k`` is an another popular sampling strategy. In comparison with + Top-P, which chooses from the smallest possible set of words whose + cumulative probability exceeds the probability P, in Top-K sampling K + most likely next words are filtered and the probability mass is + redistributed among only those K next words. In our example with cat, + if k=3, then only “playing”, “sleeping” and “eating” will be taken + into account as possible next word. +- ``Repetition Penalty`` This parameter can help penalize tokens based + on how frequently they occur in the text, including the input prompt. + A token that has already appeared five times is penalized more + heavily than a token that has appeared only one time. A value of 1 + means that there is no penalty and values larger than 1 discourage + repeated tokens. + +.. code:: ipython3 + + from threading import Event, Thread + from uuid import uuid4 + + import gradio as gr + import torch + from transformers import ( + AutoTokenizer, + StoppingCriteria, + StoppingCriteriaList, + TextIteratorStreamer, + ) + + + model_name = model_configuration["model_id"] + history_template = model_configuration["history_template"] + current_message_template = model_configuration["current_message_template"] + start_message = model_configuration["start_message"] + stop_tokens = model_configuration.get("stop_tokens") + tokenizer_kwargs = model_configuration.get("tokenizer_kwargs", {}) + + + + max_new_tokens = 256 + + class StopOnTokens(StoppingCriteria): + def __init__(self, token_ids): + self.token_ids = token_ids + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: + for stop_id in self.token_ids: + if input_ids[0][-1] == stop_id: + return True + return False + + if stop_tokens is not None: + if isinstance(stop_tokens[0], str): + stop_tokens = tok.convert_tokens_to_ids(stop_tokens) + + stop_tokens = [StopOnTokens(stop_tokens)] + + def default_partial_text_processor(partial_text:str, new_text:str): + """ + helper for updating partially generated answer, used by de + + Params: + partial_text: text buffer for storing previosly generated text + new_text: text update for the current step + Returns: + updated text string + + """ + partial_text += new_text + return partial_text + + text_processor = model_configuration.get("partial_text_processor", default_partial_text_processor) + + def convert_history_to_text(history:List[Tuple[str, str]]): + """ + function for conversion history stored as list pairs of user and assistant messages to string according to model expected conversation template + Params: + history: dialogue history + Returns: + history in text format + """ + text = start_message + "".join( + [ + "".join( + [ + history_template.format(user=item[0], assistant=item[1]) + ] + ) + for item in history[:-1] + ] + ) + text += "".join( + [ + "".join( + [ + current_message_template.format(user=history[-1][0], assistant=history[-1][1]) + ] + ) + ] + ) + return text + + + + def user(message, history): + """ + callback function for updating user messages in interface on submit button click + + Params: + message: current message + history: conversation history + Returns: + None + """ + # Append the user's message to the conversation history + return "", history + [[message, ""]] + + + def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id): + """ + callback function for running chatbot on submit button click + + Params: + history: conversation history + temperature: parameter for control the level of creativity in AI-generated text. + By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse. + top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability. + top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability. + repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text. + conversation_id: unique conversation identifier. + + """ + + # Construct the input message string for the model by concatenating the current system message and conversation history + messages = convert_history_to_text(history) + + # Tokenize the messages string + input_ids = tok(messages, return_tensors="pt", **tokenizer_kwargs).input_ids + if input_ids.shape[1] > 2000: + history = [history[-1]] + messages = convert_history_to_text(history) + input_ids = tok(messages, return_tensors="pt", **tokenizer_kwargs).input_ids + streamer = TextIteratorStreamer(tok, timeout=30.0, skip_prompt=True, skip_special_tokens=True) + generate_kwargs = dict( + input_ids=input_ids, + max_new_tokens=max_new_tokens, + temperature=temperature, + do_sample=temperature > 0.0, + top_p=top_p, + top_k=top_k, + repetition_penalty=repetition_penalty, + streamer=streamer, + ) + if stop_tokens is not None: + generate_kwargs["stopping_criteria"] = StoppingCriteriaList(stop_tokens) + + stream_complete = Event() + + def generate_and_signal_complete(): + """ + genration function for single thread + """ + global start_time + ov_model.generate(**generate_kwargs) + stream_complete.set() + + t1 = Thread(target=generate_and_signal_complete) + t1.start() + + # Initialize an empty string to store the generated text + partial_text = "" + for new_text in streamer: + partial_text = text_processor(partial_text, new_text) + history[-1][1] = partial_text + yield history + + + def get_uuid(): + """ + universal unique identifier for thread + """ + return str(uuid4()) + + + with gr.Blocks( + theme=gr.themes.Soft(), + css=".disclaimer {font-variant-caps: all-small-caps;}", + ) as demo: + conversation_id = gr.State(get_uuid) + gr.Markdown( + f"""

OpenVINO {model_id.value} Chatbot

""" + ) + chatbot = gr.Chatbot(height=500) + with gr.Row(): + with gr.Column(): + msg = gr.Textbox( + label="Chat Message Box", + placeholder="Chat Message Box", + show_label=False, + container=False + ) + with gr.Column(): + with gr.Row(): + submit = gr.Button("Submit") + stop = gr.Button("Stop") + clear = gr.Button("Clear") + with gr.Row(): + with gr.Accordion("Advanced Options:", open=False): + with gr.Row(): + with gr.Column(): + with gr.Row(): + temperature = gr.Slider( + label="Temperature", + value=0.1, + minimum=0.0, + maximum=1.0, + step=0.1, + interactive=True, + info="Higher values produce more diverse outputs", + ) + with gr.Column(): + with gr.Row(): + top_p = gr.Slider( + label="Top-p (nucleus sampling)", + value=1.0, + minimum=0.0, + maximum=1, + step=0.01, + interactive=True, + info=( + "Sample from the smallest possible set of tokens whose cumulative probability " + "exceeds top_p. Set to 1 to disable and sample from all tokens." + ), + ) + with gr.Column(): + with gr.Row(): + top_k = gr.Slider( + label="Top-k", + value=50, + minimum=0.0, + maximum=200, + step=1, + interactive=True, + info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.", + ) + with gr.Column(): + with gr.Row(): + repetition_penalty = gr.Slider( + label="Repetition Penalty", + value=1.1, + minimum=1.0, + maximum=2.0, + step=0.1, + interactive=True, + info="Penalize repetition — 1.0 to disable.", + ) + gr.Examples([ + ["Hello there! How are you doing?"], + ["What is OpenVINO?"], + ["Who are you?"], + ["Can you explain to me briefly what is Python programming language?"], + ["Explain the plot of Cinderella in a sentence."], + ["What are some common mistakes to avoid when writing code?"], + ["Write a 100-word blog post on “Benefits of Artificial Intelligence and OpenVINO“"] + ], + inputs=msg, + label="Click on any example and press the 'Submit' button" + ) + + submit_event = msg.submit( + fn=user, + inputs=[msg, chatbot], + outputs=[msg, chatbot], + queue=False, + ).then( + fn=bot, + inputs=[ + chatbot, + temperature, + top_p, + top_k, + repetition_penalty, + conversation_id, + ], + outputs=chatbot, + queue=True, + ) + submit_click_event = submit.click( + fn=user, + inputs=[msg, chatbot], + outputs=[msg, chatbot], + queue=False, + ).then( + fn=bot, + inputs=[ + chatbot, + temperature, + top_p, + top_k, + repetition_penalty, + conversation_id, + ], + outputs=chatbot, + queue=True, + ) + stop.click( + fn=None, + inputs=None, + outputs=None, + cancels=[submit_event, submit_click_event], + queue=False, + ) + clear.click(lambda: None, None, chatbot, queue=False) + + demo.queue(max_size=2) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # if you have any issue to launch on your platform, you can pass share=True to launch method: + # demo.launch(share=True) + # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/ + demo.launch() + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ + +.. code:: ipython3 + + # please run this cell for stopping gradio interface + demo.close() + + +.. parsed-literal:: + + Closing server running on port: 7860 + diff --git a/docs/notebooks/255-mms-massively-multilingual-speech-with-output.rst b/docs/notebooks/255-mms-massively-multilingual-speech-with-output.rst new file mode 100644 index 00000000000000..e62e942be3a79a --- /dev/null +++ b/docs/notebooks/255-mms-massively-multilingual-speech-with-output.rst @@ -0,0 +1,966 @@ +MMS: Scaling Speech Technology to 1000+ languages with OpenVINO™ +================================================================ + +The Massively Multilingual Speech (MMS) project expands speech +technology from about 100 languages to over 1,000 by building a single +multilingual speech recognition model supporting over 1,100 languages +(more than 10 times as many as before), language identification models +able to identify over 4,000 languages (40 times more than before), +pretrained models supporting over 1,400 languages, and text-to-speech +models for over 1,100 languages. + +The MMS model was proposed in `Scaling Speech Technology to 1,000+ +Languages `__. The models and code are +originally released +`here `__. + +There are different open sourced models in the MMS project: Automatic +Speech Recognition (ASR), Language Identification (LID) and Speech +Synthesis (TTS). A simple diagram of this is below. + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/76171391/0e7fadd6-29a8-4fac-bd9c-41d66adcb045 + :alt: LID and ASR flow + + LID and ASR flow + +In this notebook we are considering ASR and LID. We will use LID model +to identify language, and then language-specific ASR model to recognize +it. Additional models quantization step is employed to improve models +inference speed. In the end of the notebook there’s a Gradio-based +interactive demo. + +**Table of contents:** +-- + +- `Prerequisites <#>`__ +- `Prepare an example audio <#>`__ +- `Language Identification (LID) <#>`__ +- `Download pretrained model and processor <#>`__ +- `Use the original model to run an inference <#>`__ +- `Convert to OpenVINO IR model and run an inference <#>`__ +- `Automatic Speech Recognition (ASR) <#>`__ +- `Download pretrained model and processor <#>`__ +- `Use the original model to run an inference <#>`__ +- `Convert to OpenVINO IR model and run an inference <#>`__ +- `Quantization <#>`__ +- `Preparing calibration dataset <#>`__ +- `Language identification model quantization <#>`__ +- `Speech recognition model quantization <#>`__ +- `Compare model size, performance and accuracy <#>`__ +- `Interactive demo with Gradio <#>`__ + +Prerequisites +-------------------- + +.. code:: ipython3 + + %pip install -q --upgrade pip + %pip install -q "transformers>=4.33.1" "openvino>=2023.1.0" "numpy>=1.21.0,<=1.24" "nncf>=2.6.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch datasets accelerate soundfile librosa gradio jiwer + +.. code:: ipython3 + + from pathlib import Path + + import torch + + import openvino as ov + +Prepare an example audio +------------------------ + + +Read an audio file and process the audio +data. Make sure that the audio data is sampled to 16000 kHz. For this +example we will use `a streamable version of the Multilingual +LibriSpeech (MLS) +dataset `__. +It supports contains example on 7 languages: +``'german', 'dutch', 'french', 'spanish', 'italian', 'portuguese', 'polish'``. +Choose one of them. + +.. code:: ipython3 + + import ipywidgets as widgets + + + SAMPLE_LANG = widgets.Dropdown( + options=['german', 'dutch', 'french', 'spanish', 'italian', 'portuguese', 'polish'], + value='german', + description='Dataset language:', + disabled=False, + ) + + SAMPLE_LANG + + + + +.. parsed-literal:: + + Dropdown(description='Dataset language:', options=('german', 'dutch', 'french', 'spanish', 'italian', 'portugu… + + + +Specify ``streaming=True`` to not download the entire dataset. + +.. code:: ipython3 + + from datasets import load_dataset + + + mls_dataset = load_dataset("facebook/multilingual_librispeech", SAMPLE_LANG.value, split="test", streaming=True) + mls_dataset = iter(mls_dataset) # make it iterable + + example = next(mls_dataset) # get one example + +Example has a dictionary structure. It contains an audio data and a text +transcription. + +.. code:: ipython3 + + print(example) # look at structure + + +.. parsed-literal:: + + {'file': None, 'audio': {'path': '1054_1599_000000.flac', 'array': array([-0.00131226, -0.00152588, -0.00134277, ..., 0.00411987, + 0.00308228, -0.00015259]), 'sampling_rate': 16000}, 'text': 'mein sechster sohn scheint wenigstens auf den ersten blick der tiefsinnigste von allen ein kopfhänger und doch ein schwätzer deshalb kommt man ihm nicht leicht bei ist er am unterliegen so verfällt er in unbesiegbare traurigkeit', 'speaker_id': 1054, 'chapter_id': 1599, 'id': '1054_1599_000000'} + + +.. code:: ipython3 + + import IPython.display as ipd + + print(example['text']) + ipd.Audio(example['audio']['array'], rate=16_000) + + +.. parsed-literal:: + + mein sechster sohn scheint wenigstens auf den ersten blick der tiefsinnigste von allen ein kopfhänger und doch ein schwätzer deshalb kommt man ihm nicht leicht bei ist er am unterliegen so verfällt er in unbesiegbare traurigkeit + + + + +.. raw:: html + + + + + + + +Language Identification (LID) +------------------------------ + +Download pretrained model and processor +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Different LID models are +available based on the number of languages they can recognize - 126, +256, 512, 1024, 2048, 4017. We will use 126. + +.. code:: ipython3 + + from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor + + model_id = "facebook/mms-lid-126" + + lid_processor = AutoFeatureExtractor.from_pretrained(model_id) + lid_model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id) + +Use the original model to run an inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + inputs = lid_processor(example['audio']['array'], sampling_rate=16_000, return_tensors="pt") + + with torch.no_grad(): + outputs = lid_model(**inputs).logits + + lang_id = torch.argmax(outputs, dim=-1)[0].item() + detected_lang = lid_model.config.id2label[lang_id] + print(detected_lang) + + +.. parsed-literal:: + + deu + + +Convert to OpenVINO IR model and run an inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Convert model to OpenVINO format and compile it + +.. code:: ipython3 + + MAX_SEQ_LENGTH = 30480 + + lid_model_xml_path = Path('models/ov_lid_model.xml') + + def get_lid_model(model_path, compiled=True): + input_values = torch.zeros([1, MAX_SEQ_LENGTH], dtype=torch.float) + + if not model_path.exists() and model_path == lid_model_xml_path: + lid_model_xml_path.parent.mkdir(parents=True, exist_ok=True) + converted_model = ov.convert_model(lid_model, example_input={'input_values': input_values}) + ov.save_model(converted_model, lid_model_xml_path) + if not compiled: + return converted_model + if compiled: + return core.compile_model(model_path, device_name=device.value) + return core.read_model(model_path) + + compiled_lid_model = get_lid_model(lid_model_xml_path) + + +.. parsed-literal:: + + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:634: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): + + +Now it is possible to run an inference. + +.. code:: ipython3 + + def detect_language(compiled_model, audio_data): + inputs = lid_processor(audio_data, sampling_rate=16_000, return_tensors="pt") + + outputs = compiled_model(inputs['input_values'])[0] + + lang_id = torch.argmax(torch.from_numpy(outputs), dim=-1)[0].item() + detected_lang = lid_model.config.id2label[lang_id] + + return detected_lang + +.. code:: ipython3 + + detect_language(compiled_lid_model, example['audio']['array']) + + + + +.. parsed-literal:: + + 'deu' + + + +Let’s check another language. + +.. code:: ipython3 + + SAMPLE_LANG = widgets.Dropdown( + options=['german', 'dutch', 'french', 'spanish', 'italian', 'portuguese', 'polish'], + value='french', + description='Dataset language:', + disabled=False, + ) + + SAMPLE_LANG + + + + +.. parsed-literal:: + + Dropdown(description='Dataset language:', index=2, options=('german', 'dutch', 'french', 'spanish', 'italian',… + + + +.. code:: ipython3 + + mls_dataset = load_dataset("facebook/multilingual_librispeech", SAMPLE_LANG.value, split="test", streaming=True) + mls_dataset = iter(mls_dataset) + + example = next(mls_dataset) + print(example['text']) + ipd.Audio(example['audio']['array'], rate=16_000) + + +.. parsed-literal:: + + grisé par ce parfum il fit des vers en l'honneur de l'humble fleur des bois et il les récita tout haut à ses pieds une violette l'entendit elle crut qu'il ne parlait que pour elle + + + + +.. raw:: html + + + + + + + +.. code:: ipython3 + + language_id = detect_language(compiled_lid_model, example['audio']['array']) + print(language_id) + + +.. parsed-literal:: + + fra + + +Automatic Speech Recognition (ASR) +---------------------------------- + +### Download pretrained model and processor +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Download pretrained model +and processor. By default, MMS loads adapter weights for English. If you +want to load adapter weights of another language make sure to specify +``target_lang=`` as well as +``ignore_mismatched_sizes=True``. The ``ignore_mismatched_sizes=True`` +keyword has to be passed to allow the language model head to be resized +according to the vocabulary of the specified language. Similarly, the +processor should be loaded with the same target language. It is also +possible to change the supported language later. + +.. code:: ipython3 + + from transformers import Wav2Vec2ForCTC, AutoProcessor + model_id = "facebook/mms-1b-all" + + asr_processor = AutoProcessor.from_pretrained(model_id) + asr_model = Wav2Vec2ForCTC.from_pretrained(model_id) + +You can look at all supported languages: + +.. code:: ipython3 + + asr_processor.tokenizer.vocab.keys() + + + + +.. parsed-literal:: + + dict_keys(['abi', 'abk', 'abp', 'aca', 'acd', 'ace', 'acf', 'ach', 'acn', 'acr', 'acu', 'ade', 'adh', 'adj', 'adx', 'aeu', 'afr', 'agd', 'agg', 'agn', 'agr', 'agu', 'agx', 'aha', 'ahk', 'aia', 'aka', 'akb', 'ake', 'akp', 'alj', 'alp', 'alt', 'alz', 'ame', 'amf', 'amh', 'ami', 'amk', 'ann', 'any', 'aoz', 'apb', 'apr', 'ara', 'arl', 'asa', 'asg', 'asm', 'ast', 'ata', 'atb', 'atg', 'ati', 'atq', 'ava', 'avn', 'avu', 'awa', 'awb', 'ayo', 'ayr', 'ayz', 'azb', 'azg', 'azj-script_cyrillic', 'azj-script_latin', 'azz', 'bak', 'bam', 'ban', 'bao', 'bas', 'bav', 'bba', 'bbb', 'bbc', 'bbo', 'bcc-script_arabic', 'bcc-script_latin', 'bcl', 'bcw', 'bdg', 'bdh', 'bdq', 'bdu', 'bdv', 'beh', 'bel', 'bem', 'ben', 'bep', 'bex', 'bfa', 'bfo', 'bfy', 'bfz', 'bgc', 'bgq', 'bgr', 'bgt', 'bgw', 'bha', 'bht', 'bhz', 'bib', 'bim', 'bis', 'biv', 'bjr', 'bjv', 'bjw', 'bjz', 'bkd', 'bkv', 'blh', 'blt', 'blx', 'blz', 'bmq', 'bmr', 'bmu', 'bmv', 'bng', 'bno', 'bnp', 'boa', 'bod', 'boj', 'bom', 'bor', 'bos', 'bov', 'box', 'bpr', 'bps', 'bqc', 'bqi', 'bqj', 'bqp', 'bre', 'bru', 'bsc', 'bsq', 'bss', 'btd', 'bts', 'btt', 'btx', 'bud', 'bul', 'bus', 'bvc', 'bvz', 'bwq', 'bwu', 'byr', 'bzh', 'bzi', 'bzj', 'caa', 'cab', 'cac-dialect_sanmateoixtatan', 'cac-dialect_sansebastiancoatan', 'cak-dialect_central', 'cak-dialect_santamariadejesus', 'cak-dialect_santodomingoxenacoj', 'cak-dialect_southcentral', 'cak-dialect_western', 'cak-dialect_yepocapa', 'cap', 'car', 'cas', 'cat', 'cax', 'cbc', 'cbi', 'cbr', 'cbs', 'cbt', 'cbu', 'cbv', 'cce', 'cco', 'cdj', 'ceb', 'ceg', 'cek', 'ces', 'cfm', 'cgc', 'che', 'chf', 'chv', 'chz', 'cjo', 'cjp', 'cjs', 'ckb', 'cko', 'ckt', 'cla', 'cle', 'cly', 'cme', 'cmn-script_simplified', 'cmo-script_khmer', 'cmo-script_latin', 'cmr', 'cnh', 'cni', 'cnl', 'cnt', 'coe', 'cof', 'cok', 'con', 'cot', 'cou', 'cpa', 'cpb', 'cpu', 'crh', 'crk-script_latin', 'crk-script_syllabics', 'crn', 'crq', 'crs', 'crt', 'csk', 'cso', 'ctd', 'ctg', 'cto', 'ctu', 'cuc', 'cui', 'cuk', 'cul', 'cwa', 'cwe', 'cwt', 'cya', 'cym', 'daa', 'dah', 'dan', 'dar', 'dbj', 'dbq', 'ddn', 'ded', 'des', 'deu', 'dga', 'dgi', 'dgk', 'dgo', 'dgr', 'dhi', 'did', 'dig', 'dik', 'dip', 'div', 'djk', 'dnj-dialect_blowowest', 'dnj-dialect_gweetaawueast', 'dnt', 'dnw', 'dop', 'dos', 'dsh', 'dso', 'dtp', 'dts', 'dug', 'dwr', 'dyi', 'dyo', 'dyu', 'dzo', 'eip', 'eka', 'ell', 'emp', 'enb', 'eng', 'enx', 'epo', 'ese', 'ess', 'est', 'eus', 'evn', 'ewe', 'eza', 'fal', 'fao', 'far', 'fas', 'fij', 'fin', 'flr', 'fmu', 'fon', 'fra', 'frd', 'fry', 'ful', 'gag-script_cyrillic', 'gag-script_latin', 'gai', 'gam', 'gau', 'gbi', 'gbk', 'gbm', 'gbo', 'gde', 'geb', 'gej', 'gil', 'gjn', 'gkn', 'gld', 'gle', 'glg', 'glk', 'gmv', 'gna', 'gnd', 'gng', 'gof-script_latin', 'gog', 'gor', 'gqr', 'grc', 'gri', 'grn', 'grt', 'gso', 'gub', 'guc', 'gud', 'guh', 'guj', 'guk', 'gum', 'guo', 'guq', 'guu', 'gux', 'gvc', 'gvl', 'gwi', 'gwr', 'gym', 'gyr', 'had', 'hag', 'hak', 'hap', 'hat', 'hau', 'hay', 'heb', 'heh', 'hif', 'hig', 'hil', 'hin', 'hlb', 'hlt', 'hne', 'hnn', 'hns', 'hoc', 'hoy', 'hrv', 'hsb', 'hto', 'hub', 'hui', 'hun', 'hus-dialect_centralveracruz', 'hus-dialect_westernpotosino', 'huu', 'huv', 'hvn', 'hwc', 'hye', 'hyw', 'iba', 'ibo', 'icr', 'idd', 'ifa', 'ifb', 'ife', 'ifk', 'ifu', 'ify', 'ign', 'ikk', 'ilb', 'ilo', 'imo', 'ina', 'inb', 'ind', 'iou', 'ipi', 'iqw', 'iri', 'irk', 'isl', 'ita', 'itl', 'itv', 'ixl-dialect_sangasparchajul', 'ixl-dialect_sanjuancotzal', 'ixl-dialect_santamarianebaj', 'izr', 'izz', 'jac', 'jam', 'jav', 'jbu', 'jen', 'jic', 'jiv', 'jmc', 'jmd', 'jpn', 'jun', 'juy', 'jvn', 'kaa', 'kab', 'kac', 'kak', 'kam', 'kan', 'kao', 'kaq', 'kat', 'kay', 'kaz', 'kbo', 'kbp', 'kbq', 'kbr', 'kby', 'kca', 'kcg', 'kdc', 'kde', 'kdh', 'kdi', 'kdj', 'kdl', 'kdn', 'kdt', 'kea', 'kek', 'ken', 'keo', 'ker', 'key', 'kez', 'kfb', 'kff-script_telugu', 'kfw', 'kfx', 'khg', 'khm', 'khq', 'kia', 'kij', 'kik', 'kin', 'kir', 'kjb', 'kje', 'kjg', 'kjh', 'kki', 'kkj', 'kle', 'klu', 'klv', 'klw', 'kma', 'kmd', 'kml', 'kmr-script_arabic', 'kmr-script_cyrillic', 'kmr-script_latin', 'kmu', 'knb', 'kne', 'knf', 'knj', 'knk', 'kno', 'kog', 'kor', 'kpq', 'kps', 'kpv', 'kpy', 'kpz', 'kqe', 'kqp', 'kqr', 'kqy', 'krc', 'kri', 'krj', 'krl', 'krr', 'krs', 'kru', 'ksb', 'ksr', 'kss', 'ktb', 'ktj', 'kub', 'kue', 'kum', 'kus', 'kvn', 'kvw', 'kwd', 'kwf', 'kwi', 'kxc', 'kxf', 'kxm', 'kxv', 'kyb', 'kyc', 'kyf', 'kyg', 'kyo', 'kyq', 'kyu', 'kyz', 'kzf', 'lac', 'laj', 'lam', 'lao', 'las', 'lat', 'lav', 'law', 'lbj', 'lbw', 'lcp', 'lee', 'lef', 'lem', 'lew', 'lex', 'lgg', 'lgl', 'lhu', 'lia', 'lid', 'lif', 'lin', 'lip', 'lis', 'lit', 'lje', 'ljp', 'llg', 'lln', 'lme', 'lnd', 'lns', 'lob', 'lok', 'lom', 'lon', 'loq', 'lsi', 'lsm', 'ltz', 'luc', 'lug', 'luo', 'lwo', 'lww', 'lzz', 'maa-dialect_sanantonio', 'maa-dialect_sanjeronimo', 'mad', 'mag', 'mah', 'mai', 'maj', 'mak', 'mal', 'mam-dialect_central', 'mam-dialect_northern', 'mam-dialect_southern', 'mam-dialect_western', 'maq', 'mar', 'maw', 'maz', 'mbb', 'mbc', 'mbh', 'mbj', 'mbt', 'mbu', 'mbz', 'mca', 'mcb', 'mcd', 'mco', 'mcp', 'mcq', 'mcu', 'mda', 'mdf', 'mdv', 'mdy', 'med', 'mee', 'mej', 'men', 'meq', 'met', 'mev', 'mfe', 'mfh', 'mfi', 'mfk', 'mfq', 'mfy', 'mfz', 'mgd', 'mge', 'mgh', 'mgo', 'mhi', 'mhr', 'mhu', 'mhx', 'mhy', 'mib', 'mie', 'mif', 'mih', 'mil', 'mim', 'min', 'mio', 'mip', 'miq', 'mit', 'miy', 'miz', 'mjl', 'mjv', 'mkd', 'mkl', 'mkn', 'mlg', 'mlt', 'mmg', 'mnb', 'mnf', 'mnk', 'mnw', 'mnx', 'moa', 'mog', 'mon', 'mop', 'mor', 'mos', 'mox', 'moz', 'mpg', 'mpm', 'mpp', 'mpx', 'mqb', 'mqf', 'mqj', 'mqn', 'mri', 'mrw', 'msy', 'mtd', 'mtj', 'mto', 'muh', 'mup', 'mur', 'muv', 'muy', 'mvp', 'mwq', 'mwv', 'mxb', 'mxq', 'mxt', 'mxv', 'mya', 'myb', 'myk', 'myl', 'myv', 'myx', 'myy', 'mza', 'mzi', 'mzj', 'mzk', 'mzm', 'mzw', 'nab', 'nag', 'nan', 'nas', 'naw', 'nca', 'nch', 'ncj', 'ncl', 'ncu', 'ndj', 'ndp', 'ndv', 'ndy', 'ndz', 'neb', 'new', 'nfa', 'nfr', 'nga', 'ngl', 'ngp', 'ngu', 'nhe', 'nhi', 'nhu', 'nhw', 'nhx', 'nhy', 'nia', 'nij', 'nim', 'nin', 'nko', 'nlc', 'nld', 'nlg', 'nlk', 'nmz', 'nnb', 'nno', 'nnq', 'nnw', 'noa', 'nob', 'nod', 'nog', 'not', 'npi', 'npl', 'npy', 'nso', 'nst', 'nsu', 'ntm', 'ntr', 'nuj', 'nus', 'nuz', 'nwb', 'nxq', 'nya', 'nyf', 'nyn', 'nyo', 'nyy', 'nzi', 'obo', 'oci', 'ojb-script_latin', 'ojb-script_syllabics', 'oku', 'old', 'omw', 'onb', 'ood', 'orm', 'ory', 'oss', 'ote', 'otq', 'ozm', 'pab', 'pad', 'pag', 'pam', 'pan', 'pao', 'pap', 'pau', 'pbb', 'pbc', 'pbi', 'pce', 'pcm', 'peg', 'pez', 'pib', 'pil', 'pir', 'pis', 'pjt', 'pkb', 'pls', 'plw', 'pmf', 'pny', 'poh-dialect_eastern', 'poh-dialect_western', 'poi', 'pol', 'por', 'poy', 'ppk', 'pps', 'prf', 'prk', 'prt', 'pse', 'pss', 'ptu', 'pui', 'pus', 'pwg', 'pww', 'pxm', 'qub', 'quc-dialect_central', 'quc-dialect_east', 'quc-dialect_north', 'quf', 'quh', 'qul', 'quw', 'quy', 'quz', 'qvc', 'qve', 'qvh', 'qvm', 'qvn', 'qvo', 'qvs', 'qvw', 'qvz', 'qwh', 'qxh', 'qxl', 'qxn', 'qxo', 'qxr', 'rah', 'rai', 'rap', 'rav', 'raw', 'rej', 'rel', 'rgu', 'rhg', 'rif-script_arabic', 'rif-script_latin', 'ril', 'rim', 'rjs', 'rkt', 'rmc-script_cyrillic', 'rmc-script_latin', 'rmo', 'rmy-script_cyrillic', 'rmy-script_latin', 'rng', 'rnl', 'roh-dialect_sursilv', 'roh-dialect_vallader', 'rol', 'ron', 'rop', 'rro', 'rub', 'ruf', 'rug', 'run', 'rus', 'sab', 'sag', 'sah', 'saj', 'saq', 'sas', 'sat', 'sba', 'sbd', 'sbl', 'sbp', 'sch', 'sck', 'sda', 'sea', 'seh', 'ses', 'sey', 'sgb', 'sgj', 'sgw', 'shi', 'shk', 'shn', 'sho', 'shp', 'sid', 'sig', 'sil', 'sja', 'sjm', 'sld', 'slk', 'slu', 'slv', 'sml', 'smo', 'sna', 'snd', 'sne', 'snn', 'snp', 'snw', 'som', 'soy', 'spa', 'spp', 'spy', 'sqi', 'sri', 'srm', 'srn', 'srp-script_cyrillic', 'srp-script_latin', 'srx', 'stn', 'stp', 'suc', 'suk', 'sun', 'sur', 'sus', 'suv', 'suz', 'swe', 'swh', 'sxb', 'sxn', 'sya', 'syl', 'sza', 'tac', 'taj', 'tam', 'tao', 'tap', 'taq', 'tat', 'tav', 'tbc', 'tbg', 'tbk', 'tbl', 'tby', 'tbz', 'tca', 'tcc', 'tcs', 'tcz', 'tdj', 'ted', 'tee', 'tel', 'tem', 'teo', 'ter', 'tes', 'tew', 'tex', 'tfr', 'tgj', 'tgk', 'tgl', 'tgo', 'tgp', 'tha', 'thk', 'thl', 'tih', 'tik', 'tir', 'tkr', 'tlb', 'tlj', 'tly', 'tmc', 'tmf', 'tna', 'tng', 'tnk', 'tnn', 'tnp', 'tnr', 'tnt', 'tob', 'toc', 'toh', 'tom', 'tos', 'tpi', 'tpm', 'tpp', 'tpt', 'trc', 'tri', 'trn', 'trs', 'tso', 'tsz', 'ttc', 'tte', 'ttq-script_tifinagh', 'tue', 'tuf', 'tuk-script_arabic', 'tuk-script_latin', 'tuo', 'tur', 'tvw', 'twb', 'twe', 'twu', 'txa', 'txq', 'txu', 'tye', 'tzh-dialect_bachajon', 'tzh-dialect_tenejapa', 'tzj-dialect_eastern', 'tzj-dialect_western', 'tzo-dialect_chamula', 'tzo-dialect_chenalho', 'ubl', 'ubu', 'udm', 'udu', 'uig-script_arabic', 'uig-script_cyrillic', 'ukr', 'umb', 'unr', 'upv', 'ura', 'urb', 'urd-script_arabic', 'urd-script_devanagari', 'urd-script_latin', 'urk', 'urt', 'ury', 'usp', 'uzb-script_cyrillic', 'uzb-script_latin', 'vag', 'vid', 'vie', 'vif', 'vmw', 'vmy', 'vot', 'vun', 'vut', 'wal-script_ethiopic', 'wal-script_latin', 'wap', 'war', 'waw', 'way', 'wba', 'wlo', 'wlx', 'wmw', 'wob', 'wol', 'wsg', 'wwa', 'xal', 'xdy', 'xed', 'xer', 'xho', 'xmm', 'xnj', 'xnr', 'xog', 'xon', 'xrb', 'xsb', 'xsm', 'xsr', 'xsu', 'xta', 'xtd', 'xte', 'xtm', 'xtn', 'xua', 'xuo', 'yaa', 'yad', 'yal', 'yam', 'yao', 'yas', 'yat', 'yaz', 'yba', 'ybb', 'ycl', 'ycn', 'yea', 'yka', 'yli', 'yor', 'yre', 'yua', 'yue-script_traditional', 'yuz', 'yva', 'zaa', 'zab', 'zac', 'zad', 'zae', 'zai', 'zam', 'zao', 'zaq', 'zar', 'zas', 'zav', 'zaw', 'zca', 'zga', 'zim', 'ziw', 'zlm', 'zmz', 'zne', 'zos', 'zpc', 'zpg', 'zpi', 'zpl', 'zpm', 'zpo', 'zpt', 'zpu', 'zpz', 'ztq', 'zty', 'zul', 'zyb', 'zyp', 'zza']) + + + +Switch out the language adapters by calling the ``load_adapter()`` +function for the model and ``set_target_lang()`` for the tokenizer. Pass +the target language as an input - ``"detect_language_id"`` which was +detected in the previous step. + +.. code:: ipython3 + + asr_processor.tokenizer.set_target_lang(language_id) + asr_model.load_adapter(language_id) + + +.. parsed-literal:: + + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + + +Use the original model for inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + inputs = asr_processor(example['audio']['array'], sampling_rate=16_000, return_tensors="pt") + + with torch.no_grad(): + outputs = asr_model(**inputs).logits + + ids = torch.argmax(outputs, dim=-1)[0] + transcription = asr_processor.decode(ids) + print(transcription) + + +.. parsed-literal:: + + grisé par ce parfum il fit des vers en l'honneur de l'humble fleur des bois et il les récita tout haut à ses pieds une violette l'entendit elle crut qu'il ne parlait que pour elle + + +Convert to OpenVINO IR model and run inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Convert to OpenVINO IR model format with ``ov.convert_model`` function directly. Use +``ov.save_model`` function to serialize the result of conversion. For +convenience of further use, we will create a function for these +purposes. + +.. code:: ipython3 + + asr_model_xml_path_template = 'models/ov_asr_{}_model.xml' + + + def get_asr_model(model_path_template, language_id, compiled=True): + input_values = torch.zeros([1, MAX_SEQ_LENGTH], dtype=torch.float) + model_path = Path(model_path_template.format(language_id)) + + asr_processor.tokenizer.set_target_lang(language_id) + if not model_path.exists() and model_path_template == asr_model_xml_path_template: + asr_model.load_adapter(language_id) + + model_path.parent.mkdir(parents=True, exist_ok=True) + converted_model = ov.convert_model(asr_model, example_input={'input_values': input_values}) + ov.save_model(converted_model, model_path) + if not compiled: + return converted_model + + if compiled: + return core.compile_model(model_path, device_name=device.value) + return core.read_model(model_path) + + + compiled_asr_model = get_asr_model(asr_model_xml_path_template, language_id) + + +.. parsed-literal:: + + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + + +Run inference. + +.. code:: ipython3 + + def recognize_audio(compiled_model, src_audio): + inputs = asr_processor(src_audio, sampling_rate=16_000, return_tensors="pt") + outputs = compiled_model(inputs['input_values'])[0] + + ids = torch.argmax(torch.from_numpy(outputs), dim=-1)[0] + transcription = asr_processor.decode(ids) + + return transcription + + + transcription = recognize_audio(compiled_asr_model, example['audio']['array']) + print("Original text:", example['text']) + print("Transcription:", transcription) + + +.. parsed-literal:: + + Original text: grisé par ce parfum il fit des vers en l'honneur de l'humble fleur des bois et il les récita tout haut à ses pieds une violette l'entendit elle crut qu'il ne parlait que pour elle + Transcription: grisé par ce parfum il fit des vers en l'honneur de l'humble fleur des bois et il les récita tout haut à ses pieds une violette l'entendit elle crut qu'il ne parlait que pour elle + + +Quantization +-------------------- + +`NNCF `__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. + +The optimization process contains the following steps: + +1. Create a calibration dataset for quantization. +2. Run ``nncf.quantize()`` to obtain quantized models. +3. Serialize quantized ``INT8`` model using ``openvino.save_model()`` + function. + +.. + + Note: Quantization is time and memory consuming operation. Running + quantization code below may take some time. + +.. code:: ipython3 + + compiled_quantized_lid_model = None + quantized_asr_model_xml_path_template = None + + to_quantize = widgets.Checkbox( + value=False, + description='Quantization', + disabled=False, + ) + + to_quantize + + + + +.. parsed-literal:: + + Checkbox(value=True, description='Quantization') + + + +Let’s load skip magic extension to skip quantization if to_quantize is +not selected + +.. code:: ipython3 + + import sys + sys.path.append("../utils") + + %load_ext skip_kernel_extension + +Preparing calibration dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Select the language to quantize the model for: + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from IPython.display import display + + display(SAMPLE_LANG) + +Load validation split of the same +`MLS `__ +dataset for the selected language. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + mls_dataset = iter(load_dataset("facebook/multilingual_librispeech", SAMPLE_LANG.value, split="validation", streaming=True)) + example = next(mls_dataset) + +Create calibration dataset for quantization. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + CALIBRATION_DATASET_SIZE = 5 + + calibration_data = [] + for i in range(CALIBRATION_DATASET_SIZE): + data = asr_processor(next(mls_dataset)['audio']['array'], sampling_rate=16_000, return_tensors="np") + calibration_data.append(data["input_values"]) + +Language identification model quantization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run LID model quantization. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import nncf + + quantized_lid_model_xml_path = Path(str(lid_model_xml_path).replace(".xml", "_quantized.xml")) + + if not quantized_lid_model_xml_path.exists(): + quantized_lid_model = nncf.quantize( + get_lid_model(lid_model_xml_path, compiled=False), + calibration_dataset=nncf.Dataset(calibration_data), + preset=nncf.QuantizationPreset.MIXED, + subset_size=len(calibration_data), + model_type=nncf.ModelType.TRANSFORMER + ) + ov.save_model(quantized_lid_model, quantized_lid_model_xml_path) + compiled_quantized_lid_model = core.compile_model(quantized_lid_model, device_name=device.value) + else: + compiled_quantized_lid_model = get_lid_model(quantized_lid_model_xml_path) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino + + +.. parsed-literal:: + + Statistics collection: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:06<00:00, 1.24s/it] + Applying Smooth Quant: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 291/291 [00:18<00:00, 15.34it/s] + + +.. parsed-literal:: + + INFO:nncf:144 ignored nodes was found by name in the NNCFGraph + + +.. parsed-literal:: + + Statistics collection: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:18<00:00, 3.65s/it] + Applying Fast Bias correction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 298/298 [05:09<00:00, 1.04s/it] + + +Detect language with the quantized model. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + language_id = detect_language(compiled_quantized_lid_model, example['audio']['array']) + print("Detected language:", language_id) + + +.. parsed-literal:: + + Detected language: fra + + +Speech recognition model quantization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Run ASR model quantization. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + quantized_asr_model_xml_path_template = asr_model_xml_path_template.replace(".xml", "_quantized.xml") + quantized_asr_model_xml_path = Path(quantized_asr_model_xml_path_template.format(language_id)) + + if not quantized_asr_model_xml_path.exists(): + quantized_asr_model = nncf.quantize( + get_asr_model(asr_model_xml_path_template, language_id, compiled=False), + calibration_dataset=nncf.Dataset(calibration_data), + preset=nncf.QuantizationPreset.MIXED, + subset_size=len(calibration_data), + model_type=nncf.ModelType.TRANSFORMER + ) + ov.save_model(quantized_asr_model, quantized_asr_model_xml_path) + compiled_quantized_asr_model = core.compile_model(quantized_asr_model, device_name=device.value) + else: + compiled_quantized_asr_model = get_asr_model(quantized_asr_model_xml_path_template, language_id) + + +.. parsed-literal:: + + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + + +.. parsed-literal:: + + Statistics collection: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:05<00:00, 1.17s/it] + Applying Smooth Quant: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 290/290 [00:17<00:00, 16.39it/s] + + +.. parsed-literal:: + + INFO:nncf:144 ignored nodes was found by name in the NNCFGraph + + +.. parsed-literal:: + + Statistics collection: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:19<00:00, 3.93s/it] + Applying Fast Bias correction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 393/393 [05:22<00:00, 1.22it/s] + + +Run transcription with quantized model and compare the result to the one +produced by original model. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + compiled_asr_model = get_asr_model(asr_model_xml_path_template, language_id) + transcription_original = recognize_audio(compiled_asr_model, example['audio']['array']) + transcription_quantized = recognize_audio(compiled_quantized_asr_model, example['audio']['array']) + print("Transcription by original model: ", transcription_original) + print("Transcription by quantized model:", transcription_quantized) + + +.. parsed-literal:: + + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Transcription by original model: le salon était de la plus haute magnificence dorée comme la galerie de diane aux tuileries avec des tableaux à l'huile au lombri il y avait des tâches claires dans ces tableaux julien apprit plus tard que les sujets avaient semblé peu décent à la maîtresse du logis qui avait fait corriger les tableaux + Transcription by quantized model: le salon était de la plus haute magnificence doré comme la galerie de diane aux tuileries avec des tableaux à l'huile au lombri il y avait des tâches claires dans ces tableaux julien apprit plus tard que les sujets avaient semblé peu decent à la maîtresse du logis qui avait fait corriger les tableaux + + +Compare model size, performance and accuracy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First we compare model size. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + def calculate_compression_rate(model_path_ov, model_path_ov_int8, model_type): + model_size_fp32 = model_path_ov.with_suffix(".bin").stat().st_size / 10 ** 6 + model_size_int8 = model_path_ov_int8.with_suffix(".bin").stat().st_size / 10 ** 6 + print(f"{model_type} model footprint comparison:") + print(f" * FP32 IR model size: {model_size_fp32:.2f} MB") + print(f" * INT8 IR model size: {model_size_int8:.2f} MB") + return model_size_fp32, model_size_int8 + + lid_model_size_fp32, lid_model_size_int8 = \ + calculate_compression_rate(lid_model_xml_path, quantized_lid_model_xml_path, 'LID') + asr_model_size_fp32, asr_model_size_int8 = \ + calculate_compression_rate(Path(asr_model_xml_path_template.format(language_id)), quantized_asr_model_xml_path, 'ASR') + + +.. parsed-literal:: + + LID model footprint comparison: + * FP32 IR model size: 1931.81 MB + * INT8 IR model size: 968.96 MB + ASR model footprint comparison: + * FP32 IR model size: 1930.10 MB + * INT8 IR model size: 968.29 MB + + +Secondly we compare accuracy values of the original and quantized models +on a test split of MLS dataset. We rely on the Word Error Rate (WER) +metric and compute accuracy as ``(1 - WER)``. + +We also measure inference time for both language identification and +speech recognition models. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import time + from tqdm.notebook import tqdm + import numpy as np + from jiwer import wer + + TEST_DATASET_SIZE = 20 + test_dataset = load_dataset("facebook/multilingual_librispeech", SAMPLE_LANG.value, split="test", streaming=True) + test_dataset = test_dataset.take(TEST_DATASET_SIZE) + + def calculate_transcription_time_and_accuracy(lid_model, asr_model): + ground_truths = [] + predictions = [] + identification_time = [] + transcription_time = [] + for data_item in tqdm(test_dataset, desc="Measuring performance and accuracy", total=TEST_DATASET_SIZE): + audio = data_item["audio"]["array"] + + start_time = time.perf_counter() + detect_language(lid_model, audio) + end_time = time.perf_counter() + identification_time.append(end_time - start_time) + + start_time = time.perf_counter() + transcription = recognize_audio(asr_model, audio) + end_time = time.perf_counter() + transcription_time.append(end_time - start_time) + + ground_truths.append(data_item["text"]) + predictions.append(transcription) + + word_accuracy = (1 - wer(ground_truths, predictions)) * 100 + mean_identification_time = np.mean(identification_time) + mean_transcription_time = np.mean(transcription_time) + return mean_identification_time, mean_transcription_time, word_accuracy + + identification_time_fp32, transcription_time_fp32, accuracy_fp32 = \ + calculate_transcription_time_and_accuracy(compiled_lid_model, compiled_asr_model) + identification_time_int8, transcription_time_int8, accuracy_int8 = \ + calculate_transcription_time_and_accuracy(compiled_quantized_lid_model, compiled_quantized_asr_model) + print(f"LID model footprint reduction: {lid_model_size_fp32 / lid_model_size_int8:.3f}") + print(f"ASR model footprint reduction: {asr_model_size_fp32 / asr_model_size_int8:.3f}") + print(f"Language identification performance speedup: {identification_time_fp32 / identification_time_int8:.3f}") + print(f"Language transcription performance speedup: {transcription_time_fp32 / transcription_time_int8:.3f}") + print(f"Transcription word accuracy. FP32: {accuracy_fp32:.2f}%. INT8: {accuracy_int8:.2f}%. Accuracy drop :{accuracy_fp32 - accuracy_int8:.2f}%.") + + + +.. parsed-literal:: + + Measuring performance and accuracy: 0%| | 0/20 [00:00 Note: In order to run quantized model to transcribe some language, first the quantized model for that specific language must be prepared.' + + + current_state = { + "fp32": {"model": None, "language": None}, + "int8": {"model": None, "language": None} + } + + + def infer(src_audio_path, quantized): + src_audio, _ = librosa.load(src_audio_path) + lid_model = compiled_quantized_lid_model if quantized else compiled_lid_model + + start_time = time.perf_counter() + detected_language_id = detect_language(lid_model, src_audio) + end_time = time.perf_counter() + identification_delta_time = f"{end_time - start_time:.2f}" + + state = current_state["int8" if quantized else "fp32"] + if detected_language_id != state["language"]: + template_path = quantized_asr_model_xml_path_template if quantized else asr_model_xml_path_template + try: + gr.Info(f"Loading {'quantized' if quantized else ''} ASR model for '{detected_language_id}' language. " + "This will take some time.") + state["model"] = get_asr_model(template_path, detected_language_id) + state["language"] = detected_language_id + except RuntimeError as e: + if "Unable to read the model:" in str(e) and quantized: + raise gr.Error(f"There is no quantized ASR model for '{detected_language_id}' language. " + "Please run quantization for this language first.") + + start_time = time.perf_counter() + transcription = recognize_audio(state["model"], src_audio) + end_time = time.perf_counter() + transcription_delta_time = f"{end_time - start_time:.2f}" + + return detected_language_id, transcription, identification_delta_time, transcription_delta_time + + + with gr.Blocks() as demo: + with gr.Row(): + gr.Markdown(f"# {title}") + with gr.Row(): + gr.Markdown(description) + + run_button = {True: None, False: None} + detected_language = {True: None, False: None} + transcription = {True: None, False: None} + identification_time = {True: None, False: None} + transcription_time = {True: None, False: None} + for quantized in [False, True]: + if quantized and not to_quantize.value: + break + with gr.Row(): + with gr.Column(): + if not quantized: + audio = gr.Audio(label="Source Audio", type='filepath') + run_button_name = "Run INT8" if quantized else "Run FP32" if to_quantize.value else "Run" + run_button[quantized] = gr.Button(value=run_button_name) + with gr.Column(): + detected_language[quantized] = gr.Textbox(label=f"Detected language ID{' (Quantized)' if quantized else ''}") + transcription[quantized] = gr.Textbox(label=f"Transcription{' (Quantized)' if quantized else ''}") + identification_time[quantized] = gr.Textbox(label=f"Identification time{' (Quantized)' if quantized else ''}") + transcription_time[quantized] = gr.Textbox(label=f"Transcription time{' (Quantized)' if quantized else ''}") + + run_button[False].click(infer, + inputs=[audio, gr.Number(0, visible=False)], + outputs=[detected_language[False], transcription[False], identification_time[False], transcription_time[False]]) + if to_quantize.value: + run_button[True].click(infer, + inputs=[audio, gr.Number(1, visible=False)], + outputs=[detected_language[True], transcription[True], identification_time[True], transcription_time[True]]) + + + try: + demo.queue().launch(debug=False) + except Exception: + demo.queue().launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ + +.. parsed-literal:: + + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + Ignored unknown kwarg option normalize + WARNING:nncf:NNCF provides best results with torch==2.0.1, while current torch version is 1.13.1+cu117. If you encounter issues, consider switching to torch==2.0.1 + + +.. parsed-literal:: + + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda-11.7' + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + /home/nsavel/venvs/ov_notebooks_tmp/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:634: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): + diff --git a/docs/notebooks/256-bark-text-to-audio-with-output.rst b/docs/notebooks/256-bark-text-to-audio-with-output.rst new file mode 100644 index 00000000000000..4ffede0bda03fd --- /dev/null +++ b/docs/notebooks/256-bark-text-to-audio-with-output.rst @@ -0,0 +1,1111 @@ +Text-to-speech generation using Bark and OpenVINO +================================================= + +🐶 Bark is a transformer-based text-to-audio model created by +`Suno `__. Bark can generate highly realistic, +multilingual speech as well as other audio - including music, background +noise and simple sound effects. The model can also produce nonverbal +communications like laughing, sighing and crying. + +With Bark, users can also produce nonverbal communications like +laughing, sighing, and crying, making it a versatile tool for a variety +of applications. + +.. figure:: https://user-images.githubusercontent.com/5068315/235310676-a4b3b511-90ec-4edf-8153-7ccf14905d73.png + :alt: image.png + + image.png + +Bark is a cutting-edge text-to-speech (TTS) technology that has taken +the AI world by storm. Unlike the typical TTS engines that sound robotic +and mechanic, Bark offers human-like voices that are highly realistic +and natural sounding. Bark uses GPT-style models to generate speech with +minimal tweaking, producing highly expressive and emotive voices that +can capture nuances such as tone, pitch, and rhythm. It offers a +fantastic experience that can leave you wondering if you’re listening to +human beings. + +Notably, Bark supports multiple languages and can generate speech in +Mandarin, French, Italian, Spanish, and other languages with impressive +clarity and accuracy. With Bark, you can easily switch between languages +and still enjoy high-quality sound effects. + +Bark is not only intelligent but also intuitive, making it an ideal tool +for individuals and businesses looking to create high-quality voice +content for their platforms. Whether you’re looking to create podcasts, +audiobooks, video game sounds, or any other form of voice content, Bark +has you covered. + +So, if you’re looking for a revolutionary text-to-speech technology that +can elevate your voice content, Bark is the way to go! In this tutorial +we consider how to convert and run bark with OpenVINO. + +About model +----------- + +Bark uses GPT-style models to generate audio from scratch, but the +initial text prompt is embedded into high-level semantic tokens without +the use of phonemes. This allows Bark to generalize to arbitrary +instructions beyond speech that occur in the training data, such as +music lyrics, sound effects, or other non-speech sounds. + +A subsequent second model is used to convert the generated semantic +tokens into audio codec tokens to generate the full waveform. To enable +the community to use Bark via public code, EnCodec codec from Facebook +is used to act as an audio representation. + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Download and Convert + models <#download-and-convert-models>`__ + + - `Text Encoder <#text-encoder>`__ + - `Coarse encoder <#coarse-encoder>`__ + - `Fine encoder <#fine-encoder>`__ + - `Prepare Inference + pipeline <#prepare-inference-pipeline>`__ + +- `Run model inference <#run-model-inference>`__ + + - `Select Inference device <#select-inference-device>`__ + +- `Interactive demo <#interactive-demo>`__ + +Prerequisites +------------------------------------------------------- + +.. code:: ipython3 + + import sys + + if sys.platform == "linux": + %pip install -q "torch==1.13.1" "torchvision" "torchaudio==0.13.1" --index-url https://download.pytorch.org/whl/cpu + else: + %pip install -q "torch==1.13.1" "torchvision" "torchaudio==0.13.1" + %pip install -q "openvino>=2023.1.0" gradio + %pip install -q "git+https://github.com/suno-ai/bark.git" + +Download and Convert models +--------------------------------------------------------------------- + +.. code:: ipython3 + + from pathlib import Path + from bark.generation import load_model, codec_decode, _flatten_codebooks + + models_dir = Path("models") + models_dir.mkdir(exist_ok=True) + + +.. parsed-literal:: + + torch version does not support flash attention. You will get faster inference speed by upgrade torch to newest nightly version. + + +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Text encoder is responsible for embedding initial text prompt into +high-level semantic tokens. it uses tokenizer for conversion input text +to token ids and predicts semantic text tokens that capture the meaning +of the text. There are some differences between text encoder behavior on +first step and others. It is the reason why we need to use separated +models for that. + +.. code:: ipython3 + + text_use_small = True + + text_encoder = load_model( + model_type="text", use_gpu=False, use_small=text_use_small, force_reload=False + ) + + text_encoder_model = text_encoder["model"] + tokenizer = text_encoder["tokenizer"] + +.. code:: ipython3 + + import torch + import openvino as ov + + text_model_suffix = "_small" if text_use_small else "" + text_model_dir = models_dir / f"text_encoder{text_model_suffix}" + text_model_dir.mkdir(exist_ok=True) + text_encoder_path1 = text_model_dir / "bark_text_encoder_1.xml" + text_encoder_path0 = text_model_dir / "bark_text_encoder_0.xml" + +.. code:: ipython3 + + class TextEncoderModel(torch.nn.Module): + def __init__(self, encoder): + super().__init__() + self.encoder = encoder + + def forward(self, idx, past_kv=None): + return self.encoder(idx, merge_context=True, past_kv=past_kv, use_cache=True) + + + if not text_encoder_path0.exists() or not text_encoder_path1.exists(): + text_encoder_exportable = TextEncoderModel(text_encoder_model) + ov_model = ov.convert_model( + text_encoder_exportable, example_input=torch.ones((1, 513), dtype=torch.int64) + ) + ov.save_model(ov_model, text_encoder_path0) + logits, kv_cache = text_encoder_exportable(torch.ones((1, 513), dtype=torch.int64)) + ov_model = ov.convert_model( + text_encoder_exportable, + example_input=(torch.ones((1, 1), dtype=torch.int64), kv_cache), + ) + ov.save_model(ov_model, text_encoder_path1) + del ov_model + del text_encoder_exportable + del text_encoder_model, text_encoder + +Coarse encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Coarse encoder is a causal autoregressive transformer, that takes as +input the results of the text encoder model. It aims at predicting the +first two audio codebooks necessary for EnCodec. Coarse encoder is +autoregressive model, it means that for making prediction on next step, +it uses own output from previous step. For reducing model complexity and +optimization, caching key and values for attention blocks can be used. +past_key_values contains set of precomputed attention keys and values +for each attention module in the model from previous step as they will +be not changed from step to step and allow us calculate only update for +the current step and join to previous. For avoiding to have separated +model for first inference, where model does not have “past”, we will +provide empty tensor on the first step. + +.. code:: ipython3 + + coarse_use_small = True + + coarse_model = load_model( + model_type="coarse", use_gpu=False, use_small=coarse_use_small, force_reload=False, + ) + + coarse_model_suffix = "_small" if coarse_use_small else "" + coarse_model_dir = models_dir / f"coarse{coarse_model_suffix}" + coarse_model_dir.mkdir(exist_ok=True) + coarse_encoder_path = coarse_model_dir / "bark_coarse_encoder.xml" + +.. code:: ipython3 + + class CoarseEncoderModel(torch.nn.Module): + def __init__(self, encoder): + super().__init__() + self.encoder = encoder + + def forward(self, idx, past_kv=None): + return self.encoder(idx, past_kv=past_kv, use_cache=True) + + + if not coarse_encoder_path.exists(): + coarse_encoder_exportable = CoarseEncoderModel(coarse_model) + logits, kv_cache = coarse_encoder_exportable( + torch.ones((1, 886), dtype=torch.int64) + ) + ov_model = ov.convert_model( + coarse_encoder_exportable, + example_input=(torch.ones((1, 1), dtype=torch.int64), kv_cache), + ) + ov.save_model(ov_model, coarse_encoder_path) + del ov_model + del coarse_encoder_exportable + del coarse_model + +.. code:: ipython3 + + fine_use_small = False + + fine_model = load_model(model_type="fine", use_gpu=False, use_small=fine_use_small, force_reload=False) + + fine_model_suffix = "_small" if fine_use_small else "" + fine_model_dir = models_dir / f"fine_model{fine_model_suffix}" + fine_model_dir.mkdir(exist_ok=True) + +.. code:: ipython3 + + class FineModel(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, pred_idx, idx): + b, t, codes = idx.size() + pos = torch.arange(0, t, dtype=torch.long).unsqueeze(0) # shape (1, t) + + # forward the GPT model itself + tok_embs = [ + wte(idx[:, :, i]).unsqueeze(-1) + for i, wte in enumerate(self.model.transformer.wtes) + ] # token embeddings of shape (b, t, n_embd) + tok_emb = torch.cat(tok_embs, dim=-1) + pos_emb = self.model.transformer.wpe( + pos + ) # position embeddings of shape (1, t, n_embd) + x = tok_emb[:, :, :, : pred_idx + 1].sum(dim=-1) + x = self.model.transformer.drop(x + pos_emb) + for block in self.model.transformer.h: + x = block(x) + x = self.model.transformer.ln_f(x) + return x + + + fine_feature_extractor_path = fine_model_dir / "bark_fine_feature_extractor.xml" + +Fine encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Fine encoder is time a non-causal autoencoder transformer, which +iteratively predicts the last codebooks based on the sum of the previous +codebooks embeddings obtained using Coarse encoder. + +.. code:: ipython3 + + if not fine_feature_extractor_path.exists(): + lm_heads = fine_model.lm_heads + fine_feature_extractor = FineModel(fine_model) + feature_extractor_out = fine_feature_extractor( + 3, torch.zeros((1, 1024, 8), dtype=torch.int32) + ) + ov_model = ov.convert_model( + fine_feature_extractor, + example_input=( + torch.ones(1, dtype=torch.long), + torch.zeros((1, 1024, 8), dtype=torch.long), + ), + ) + ov.save_model(ov_model, fine_feature_extractor_path) + for i, lm_head in enumerate(lm_heads): + ov.save_model( + ov.convert_model(lm_head, example_input=feature_extractor_out), + fine_model_dir / f"bark_fine_lm_{i}.xml", + ) + +Prepare Inference pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For better usability, classes for working with models provided below. + +.. code:: ipython3 + + class OVBarkTextEncoder: + def __init__(self, core, device, model_path1, model_path2): + self.compiled_model1 = core.compile_model(model_path1, device) + self.compiled_model2 = core.compile_model(model_path2, device) + + def __call__(self, input_ids, past_kv=None): + if past_kv is None: + outputs = self.compiled_model1(input_ids, share_outputs=True) + else: + outputs = self.compiled_model2([input_ids, *past_kv], share_outputs=True) + logits, kv_cache = self.postprocess_outputs(outputs, past_kv is None) + return logits, kv_cache + + def postprocess_outputs(self, outs, is_first_stage): + net_outs = ( + self.compiled_model1.outputs + if is_first_stage + else self.compiled_model2.outputs + ) + logits = outs[net_outs[0]] + kv_cache = [] + for out_tensor in net_outs[1:]: + kv_cache.append(outs[out_tensor]) + return logits, kv_cache + + + class OVBarkEncoder: + def __init__(self, core, device, model_path): + self.compiled_model = core.compile_model(model_path, device) + + def __call__(self, idx, past_kv=None): + if past_kv is None: + past_kv = self._init_past_kv() + outs = self.compiled_model([idx, *past_kv], share_outputs=True) + return self.postprocess_outputs(outs) + + def postprocess_outputs(self, outs): + net_outs = self.compiled_model.outputs + logits = outs[net_outs[0]] + kv_cache = [] + for out_tensor in net_outs[1:]: + kv_cache.append(outs[out_tensor]) + return logits, kv_cache + + def _init_past_kv(self): + inputs = [] + for input_t in self.compiled_model.inputs[1:]: + input_shape = input_t.partial_shape + input_shape[0] = 1 + input_shape[2] = 0 + inputs.append(ov.Tensor(ov.Type.f32, input_shape.get_shape())) + return inputs + + + class OVBarkFineEncoder: + def __init__(self, core, device, model_dir, num_lm_heads=7): + self.feats_compiled_model = core.compile_model( + model_dir / "bark_fine_feature_extractor.xml", device + ) + self.feats_out = self.feats_compiled_model.output(0) + lm_heads = [] + for i in range(num_lm_heads): + lm_heads.append( + core.compile_model(model_dir / f"bark_fine_lm_{i}.xml", device) + ) + self.lm_heads = lm_heads + + def __call__(self, pred_idx, idx): + feats = self.feats_compiled_model([ov.Tensor(pred_idx), ov.Tensor(idx)])[ + self.feats_out + ] + lm_id = pred_idx - 1 + logits = self.lm_heads[int(lm_id)](feats)[0] + return logits + +``generate_audio`` function is the main function for starting audio +generation process. It accepts input text and optionally history prompt, +provided by user and run inference pipeline. The inference pipeline +consists from several steps, illustrated on the diagram below: + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3a272a34-50bc-4d4a-bb1f-8a649cbf1d6d + :alt: bark_pipeline + + bark_pipeline + +1. Generation semantic tokens from input text using Text Encoder +2. Generation coarse acoustic codebooks from semantic tokens using + Coarse Encoder +3. Generation fine acoustic codebooks from coarse codebooks using Fine + Encoder +4. Decode codebooks to audio waveform + +.. code:: ipython3 + + from typing import Optional, Union, Dict + import tqdm + import numpy as np + + + def generate_audio( + text: str, + history_prompt: Optional[Union[Dict, str]] = None, + text_temp: float = 0.7, + waveform_temp: float = 0.7, + silent: bool = False, + ): + """Generate audio array from input text. + + Args: + text: text to be turned into audio + history_prompt: history choice for audio cloning + text_temp: generation temperature (1.0 more diverse, 0.0 more conservative) + waveform_temp: generation temperature (1.0 more diverse, 0.0 more conservative) + silent: disable progress bar + + Returns: + numpy audio array at sample frequency 24khz + """ + semantic_tokens = text_to_semantic( + text, + history_prompt=history_prompt, + temp=text_temp, + silent=silent, + ) + out = semantic_to_waveform( + semantic_tokens, + history_prompt=history_prompt, + temp=waveform_temp, + silent=silent, + ) + return out + +.. code:: ipython3 + + def text_to_semantic( + text: str, + history_prompt: Optional[Union[Dict, str]] = None, + temp: float = 0.7, + silent: bool = False, + ): + """Generate semantic array from text. + + Args: + text: text to be turned into audio + history_prompt: history choice for audio cloning + temp: generation temperature (1.0 more diverse, 0.0 more conservative) + silent: disable progress bar + + Returns: + numpy semantic array to be fed into `semantic_to_waveform` + """ + x_semantic = generate_text_semantic( + text, + history_prompt=history_prompt, + temp=temp, + silent=silent, + ) + return x_semantic + +.. code:: ipython3 + + from bark.generation import ( + _load_history_prompt, + _tokenize, + _normalize_whitespace, + TEXT_PAD_TOKEN, + TEXT_ENCODING_OFFSET, + SEMANTIC_VOCAB_SIZE, + SEMANTIC_PAD_TOKEN, + SEMANTIC_INFER_TOKEN, + COARSE_RATE_HZ, + SEMANTIC_RATE_HZ, + N_COARSE_CODEBOOKS, + COARSE_INFER_TOKEN, + CODEBOOK_SIZE, + N_FINE_CODEBOOKS, + COARSE_SEMANTIC_PAD_TOKEN, + ) + import torch.nn.functional as F + from typing import List, Optional, Union, Dict + + + def generate_text_semantic( + text: str, + history_prompt: List[str] = None, + temp: float = 0.7, + top_k: int = None, + top_p: float = None, + silent: bool = False, + min_eos_p: float = 0.2, + max_gen_duration_s: int = None, + allow_early_stop: bool = True, + ): + """ + Generate semantic tokens from text. + Args: + text: text to be turned into audio + history_prompt: history choice for audio cloning + temp: generation temperature (1.0 more diverse, 0.0 more conservative) + top_k: top k number of probabilities for considering during generation + top_p: top probabilities higher than p for considering during generation + silent: disable progress bar + min_eos_p: minimum probability to select end of string token + max_gen_duration_s: maximum duration for generation in seconds + allow_early_stop: allow to stop generation if maximum duration is not reached + Returns: + numpy semantic array to be fed into `semantic_to_waveform` + + """ + text = _normalize_whitespace(text) + if history_prompt is not None: + history_prompt = _load_history_prompt(history_prompt) + semantic_history = history_prompt["semantic_prompt"] + else: + semantic_history = None + encoded_text = ( + np.ascontiguousarray(_tokenize(tokenizer, text)) + TEXT_ENCODING_OFFSET + ) + if len(encoded_text) > 256: + p = round((len(encoded_text) - 256) / len(encoded_text) * 100, 1) + logger.warning(f"warning, text too long, lopping of last {p}%") + encoded_text = encoded_text[:256] + encoded_text = np.pad( + encoded_text, + (0, 256 - len(encoded_text)), + constant_values=TEXT_PAD_TOKEN, + mode="constant", + ) + if semantic_history is not None: + semantic_history = semantic_history.astype(np.int64) + # lop off if history is too long, pad if needed + semantic_history = semantic_history[-256:] + semantic_history = np.pad( + semantic_history, + (0, 256 - len(semantic_history)), + constant_values=SEMANTIC_PAD_TOKEN, + mode="constant", + ) + else: + semantic_history = np.array([SEMANTIC_PAD_TOKEN] * 256) + x = np.hstack( + [encoded_text, semantic_history, np.array([SEMANTIC_INFER_TOKEN])] + ).astype(np.int64)[None] + assert x.shape[1] == 256 + 256 + 1 + n_tot_steps = 768 + # custom tqdm updates since we don't know when eos will occur + pbar = tqdm.tqdm(disable=silent, total=100) + pbar_state = 0 + tot_generated_duration_s = 0 + kv_cache = None + for n in range(n_tot_steps): + if kv_cache is not None: + x_input = x[:, [-1]] + else: + x_input = x + logits, kv_cache = ov_text_model(ov.Tensor(x_input), kv_cache) + relevant_logits = logits[0, 0, :SEMANTIC_VOCAB_SIZE] + if allow_early_stop: + relevant_logits = np.hstack( + (relevant_logits, logits[0, 0, [SEMANTIC_PAD_TOKEN]]) + ) # eos + if top_p is not None: + sorted_indices = np.argsort(relevant_logits)[::-1] + sorted_logits = relevant_logits[sorted_indices] + cumulative_probs = np.cumsum(F.softmax(sorted_logits)) + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].copy() + sorted_indices_to_remove[0] = False + relevant_logits[sorted_indices[sorted_indices_to_remove]] = -np.inf + relevant_logits = torch.from_numpy(relevant_logits) + if top_k is not None: + relevant_logits = torch.from_numpy(relevant_logits) + v, _ = torch.topk(relevant_logits, min(top_k, relevant_logits.size(-1))) + relevant_logits[relevant_logits < v[-1]] = -float("Inf") + probs = F.softmax(torch.from_numpy(relevant_logits) / temp, dim=-1) + item_next = torch.multinomial(probs, num_samples=1) + if allow_early_stop and ( + item_next == SEMANTIC_VOCAB_SIZE + or (min_eos_p is not None and probs[-1] >= min_eos_p) + ): + # eos found, so break + pbar.update(100 - pbar_state) + break + x = torch.cat((torch.from_numpy(x), item_next[None]), dim=1).numpy() + tot_generated_duration_s += 1 / SEMANTIC_RATE_HZ + if ( + max_gen_duration_s is not None + and tot_generated_duration_s > max_gen_duration_s + ): + pbar.update(100 - pbar_state) + break + if n == n_tot_steps - 1: + pbar.update(100 - pbar_state) + break + del logits, relevant_logits, probs, item_next + req_pbar_state = np.min([100, int(round(100 * n / n_tot_steps))]) + if req_pbar_state > pbar_state: + pbar.update(req_pbar_state - pbar_state) + pbar_state = req_pbar_state + pbar.close() + out = x.squeeze()[256 + 256 + 1 :] + return out + +.. code:: ipython3 + + def semantic_to_waveform( + semantic_tokens: np.ndarray, + history_prompt: Optional[Union[Dict, str]] = None, + temp: float = 0.7, + silent: bool = False, + ): + """Generate audio array from semantic input. + + Args: + semantic_tokens: semantic token output from `text_to_semantic` + history_prompt: history choice for audio cloning + temp: generation temperature (1.0 more diverse, 0.0 more conservative) + silent: disable progress bar + + Returns: + numpy audio array at sample frequency 24khz + """ + coarse_tokens = generate_coarse( + semantic_tokens, + history_prompt=history_prompt, + temp=temp, + silent=silent, + ) + fine_tokens = generate_fine( + coarse_tokens, + history_prompt=history_prompt, + temp=0.5, + ) + audio_arr = codec_decode(fine_tokens) + return audio_arr + +.. code:: ipython3 + + def generate_coarse( + x_semantic: np.ndarray, + history_prompt: Optional[Union[Dict, str]] = None, + temp: float = 0.7, + top_k: int = None, + top_p: float = None, + silent: bool = False, + max_coarse_history: int = 630, # min 60 (faster), max 630 (more context) + sliding_window_len: int = 60, + ): + """ + Generate coarse audio codes from semantic tokens. + Args: + x_semantic: semantic token output from `text_to_semantic` + history_prompt: history prompt, will be prepened to generated if provided + temp: generation temperature (1.0 more diverse, 0.0 more conservative) + top_k: top k number of probabilities for considering during generation + top_p: top probabilities higher than p for considering during generation + silent: disable progress bar + max_coarse_history: threshold for cutting coarse history (minimum 60 for faster generation, maximum 630 for more context) + sliding_window_len: size of sliding window for generation cycle + Returns: + numpy audio array with coarse audio codes + + """ + semantic_to_coarse_ratio = COARSE_RATE_HZ / SEMANTIC_RATE_HZ * N_COARSE_CODEBOOKS + max_semantic_history = int(np.floor(max_coarse_history / semantic_to_coarse_ratio)) + if history_prompt is not None: + history_prompt = _load_history_prompt(history_prompt) + x_semantic_history = history_prompt["semantic_prompt"] + x_coarse_history = history_prompt["coarse_prompt"] + x_coarse_history = _flatten_codebooks(x_coarse_history) + SEMANTIC_VOCAB_SIZE + # trim histories correctly + n_semantic_hist_provided = np.min( + [ + max_semantic_history, + len(x_semantic_history) - len(x_semantic_history) % 2, + int(np.floor(len(x_coarse_history) / semantic_to_coarse_ratio)), + ] + ) + n_coarse_hist_provided = int( + round(n_semantic_hist_provided * semantic_to_coarse_ratio) + ) + x_semantic_history = x_semantic_history[-n_semantic_hist_provided:].astype( + np.int32 + ) + x_coarse_history = x_coarse_history[-n_coarse_hist_provided:].astype(np.int32) + x_coarse_history = x_coarse_history[:-2] + else: + x_semantic_history = np.array([], dtype=np.int32) + x_coarse_history = np.array([], dtype=np.int32) + # start loop + n_steps = int( + round( + np.floor(len(x_semantic) * semantic_to_coarse_ratio / N_COARSE_CODEBOOKS) + * N_COARSE_CODEBOOKS + ) + ) + x_semantic = np.hstack([x_semantic_history, x_semantic]).astype(np.int32) + x_coarse = x_coarse_history.astype(np.int32) + base_semantic_idx = len(x_semantic_history) + x_semantic_in = x_semantic[None] + x_coarse_in = x_coarse[None] + n_window_steps = int(np.ceil(n_steps / sliding_window_len)) + n_step = 0 + for _ in tqdm.tqdm(range(n_window_steps), total=n_window_steps, disable=silent): + semantic_idx = base_semantic_idx + int(round(n_step / semantic_to_coarse_ratio)) + # pad from right side + x_in = x_semantic_in[:, np.max([0, semantic_idx - max_semantic_history]) :] + x_in = x_in[:, :256] + x_in = F.pad( + torch.from_numpy(x_in), + (0, 256 - x_in.shape[-1]), + "constant", + COARSE_SEMANTIC_PAD_TOKEN, + ) + x_in = torch.hstack( + [ + x_in, + torch.tensor([COARSE_INFER_TOKEN])[None], + torch.from_numpy(x_coarse_in[:, -max_coarse_history:]), + ] + ).numpy() + kv_cache = None + for _ in range(sliding_window_len): + if n_step >= n_steps: + continue + is_major_step = n_step % N_COARSE_CODEBOOKS == 0 + + if kv_cache is not None: + x_input = x_in[:, [-1]] + else: + x_input = x_in + + logits, kv_cache = ov_coarse_model(x_input, past_kv=kv_cache) + logit_start_idx = ( + SEMANTIC_VOCAB_SIZE + (1 - int(is_major_step)) * CODEBOOK_SIZE + ) + logit_end_idx = ( + SEMANTIC_VOCAB_SIZE + (2 - int(is_major_step)) * CODEBOOK_SIZE + ) + relevant_logits = logits[0, 0, logit_start_idx:logit_end_idx] + if top_p is not None: + sorted_indices = np.argsort(relevant_logits)[::-1] + sorted_logits = relevant_logits[sorted_indices] + cumulative_probs = np.cumsum(F.softmax(sorted_logits)) + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].copy() + sorted_indices_to_remove[0] = False + relevant_logits[sorted_indices[sorted_indices_to_remove]] = -np.inf + relevant_logits = torch.from_numpy(relevant_logits) + if top_k is not None: + relevant_logits = torch.from_numpy(relevant_logits) + v, _ = torch.topk(relevant_logits, min(top_k, relevant_logits.size(-1))) + relevant_logits[relevant_logits < v[-1]] = -float("Inf") + probs = F.softmax(torch.from_numpy(relevant_logits) / temp, dim=-1) + item_next = torch.multinomial(probs, num_samples=1) + item_next = item_next + item_next += logit_start_idx + x_coarse_in = torch.cat( + (torch.from_numpy(x_coarse_in), item_next[None]), dim=1 + ).numpy() + x_in = torch.cat((torch.from_numpy(x_in), item_next[None]), dim=1).numpy() + del logits, relevant_logits, probs, item_next + n_step += 1 + del x_in + del x_semantic_in + gen_coarse_arr = x_coarse_in.squeeze()[len(x_coarse_history) :] + del x_coarse_in + gen_coarse_audio_arr = ( + gen_coarse_arr.reshape(-1, N_COARSE_CODEBOOKS).T - SEMANTIC_VOCAB_SIZE + ) + for n in range(1, N_COARSE_CODEBOOKS): + gen_coarse_audio_arr[n, :] -= n * CODEBOOK_SIZE + return gen_coarse_audio_arr + + + def generate_fine( + x_coarse_gen: np.ndarray, + history_prompt: Optional[Union[Dict, str]] = None, + temp: float = 0.5, + silent: bool = True, + ): + """ + Generate full audio codes from coarse audio codes. + Args: + x_coarse_gen: generated coarse codebooks from `generate_coarse` + history_prompt: history prompt, will be prepended to generated + temp: generation temperature (1.0 more diverse, 0.0 more conservative) + silent: disable progress bar + Returns: + numpy audio array with coarse audio codes + + """ + if history_prompt is not None: + history_prompt = _load_history_prompt(history_prompt) + x_fine_history = history_prompt["fine_prompt"] + else: + x_fine_history = None + n_coarse = x_coarse_gen.shape[0] + # make input arr + in_arr = np.vstack( + [ + x_coarse_gen, + np.zeros((N_FINE_CODEBOOKS - n_coarse, x_coarse_gen.shape[1])) + + CODEBOOK_SIZE, + ] + ).astype( + np.int32 + ) # padding + # prepend history if available (max 512) + if x_fine_history is not None: + x_fine_history = x_fine_history.astype(np.int32) + in_arr = np.hstack([x_fine_history[:, -512:].astype(np.int32), in_arr]) + n_history = x_fine_history[:, -512:].shape[1] + else: + n_history = 0 + n_remove_from_end = 0 + # need to pad if too short (since non-causal model) + if in_arr.shape[1] < 1024: + n_remove_from_end = 1024 - in_arr.shape[1] + in_arr = np.hstack( + [ + in_arr, + np.zeros((N_FINE_CODEBOOKS, n_remove_from_end), dtype=np.int32) + + CODEBOOK_SIZE, + ] + ) + n_loops = ( + np.max([0, int(np.ceil((x_coarse_gen.shape[1] - (1024 - n_history)) / 512))]) + + 1 + ) + in_arr = in_arr.T + for n in tqdm.tqdm(range(n_loops), disable=silent): + start_idx = np.min([n * 512, in_arr.shape[0] - 1024]) + start_fill_idx = np.min([n_history + n * 512, in_arr.shape[0] - 512]) + rel_start_fill_idx = start_fill_idx - start_idx + in_buffer = in_arr[start_idx : start_idx + 1024, :][None] + for nn in range(n_coarse, N_FINE_CODEBOOKS): + logits = ov_fine_model( + np.array([nn]).astype(np.int64), in_buffer.astype(np.int64) + ) + if temp is None: + relevant_logits = logits[0, rel_start_fill_idx:, :CODEBOOK_SIZE] + codebook_preds = torch.argmax(relevant_logits, -1) + else: + relevant_logits = logits[0, :, :CODEBOOK_SIZE] / temp + probs = F.softmax(torch.from_numpy(relevant_logits), dim=-1) + codebook_preds = torch.hstack( + [ + torch.multinomial(probs[nnn], num_samples=1) + for nnn in range(rel_start_fill_idx, 1024) + ] + ) + in_buffer[0, rel_start_fill_idx:, nn] = codebook_preds.numpy() + del logits, codebook_preds + for nn in range(n_coarse, N_FINE_CODEBOOKS): + in_arr[ + start_fill_idx : start_fill_idx + (1024 - rel_start_fill_idx), nn + ] = in_buffer[0, rel_start_fill_idx:, nn] + del in_buffer + gen_fine_arr = in_arr.squeeze().T + del in_arr + gen_fine_arr = gen_fine_arr[:, n_history:] + if n_remove_from_end > 0: + gen_fine_arr = gen_fine_arr[:, :-n_remove_from_end] + return gen_fine_arr + +Run model inference +------------------------------------------------------------- + +Now is time to see model in action. We need only wrap our models to +classes and run ``generate_audio`` function. + +Select Inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + import openvino as ov + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value="AUTO", + description="Device:", + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + core = ov.Core() + + ov_text_model = OVBarkTextEncoder( + core, device.value, text_encoder_path0, text_encoder_path1 + ) + ov_coarse_model = OVBarkEncoder(core, device.value, coarse_encoder_path) + ov_fine_model = OVBarkFineEncoder(core, device.value, fine_model_dir) + +.. code:: ipython3 + + import time + from bark import SAMPLE_RATE + + torch.manual_seed(42) + t0 = time.time() + text = "Hello, my name is Suno. And, uh — and I like banana and apples. [laughs] But I also have other interests such as playing tic tac toe." + audio_array = generate_audio(text) + generation_duration_s = time.time() - t0 + audio_duration_s = audio_array.shape[0] / SAMPLE_RATE + + print(f"took {generation_duration_s:.0f}s to generate {audio_duration_s:.0f}s of audio") + + +.. parsed-literal:: + + 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:13<00:00, 7.61it/s] + 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:48<00:00, 1.87s/it] + + +.. parsed-literal:: + + took 67s to generate 10s of audio + + +.. code:: ipython3 + + from IPython.display import Audio + from bark import SAMPLE_RATE + + Audio(audio_array, rate=SAMPLE_RATE) + + + + +.. raw:: html + + + + + + + +Interactive demo +---------------------------------------------------------- + +.. code:: ipython3 + + import numpy as np + import gradio as gr + from bark import SAMPLE_RATE + from bark.generation import SUPPORTED_LANGS + + AVAILABLE_PROMPTS = ["Unconditional", "Announcer"] + PROMPT_LOOKUP = {} + for _, lang in SUPPORTED_LANGS: + for n in range(10): + label = f"Speaker {n} ({lang})" + AVAILABLE_PROMPTS.append(label) + PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}" + PROMPT_LOOKUP["Unconditional"] = None + PROMPT_LOOKUP["Announcer"] = "announcer" + + default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe." + + title = "# 🐶 Bark: Text-to-Speech using OpenVINO" + + description = """ + Bark is a universal text-to-audio model created by [Suno](http://suno.ai). \ + Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \ + The model output is not censored and the authors do not endorse the opinions in the generated content. \ + Use at your own risk. + """ + + article = """ + + ## 🌎 Foreign Language + + Bark supports various languages out-of-the-box and automatically determines language from input text. \ + When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice. + + Try the prompt: + + ``` + Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible. + ``` + + ## 🤭 Non-Speech Sounds + + Below is a list of some known non-speech sounds, but we are finding more every day. \ + Please let us know if you find patterns that work particularly well on Discord! + + * [laughter] + * [laughs] + * [sighs] + * [music] + * [gasps] + * [clears throat] + * — or ... for hesitations + * ♪ for song lyrics + * capitalization for emphasis of a word + * MAN/WOMAN: for bias towards speaker + + Try the prompt: + + ``` + " [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪." + ``` + + ## 🎶 Music + Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \ + Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics. + + Try the prompt: + + ``` + ♪ In the jungle, the mighty jungle, the lion barks tonight ♪ + ``` + + ## 🧬 Voice Cloning + + Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \ + The model also attempts to preserve music, ambient noise, etc. from input audio. \ + However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from. + + ## 👥 Speaker Prompts + + You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \ + Please note that these are not always respected, especially if a conflicting audio history prompt is given. + + Try the prompt: + + ``` + WOMAN: I would like an oatmilk latte please. + MAN: Wow, that's expensive! + ``` + + """ + + examples = [ + [ + "Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!", + "Unconditional", + ], + [ + "Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.", + "Speaker 1 (en)", + ], + [ + "Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.", + "Speaker 0 (es)", + ], + ] + + + def gen_tts(text, history_prompt): + history_prompt = PROMPT_LOOKUP[history_prompt] + audio_arr = generate_audio(text, history_prompt=history_prompt) + audio_arr = (audio_arr * 32767).astype(np.int16) + return (SAMPLE_RATE, audio_arr) + + + with gr.Blocks() as block: + gr.Markdown(title) + gr.Markdown(description) + with gr.Row(): + with gr.Column(): + input_text = gr.Textbox(label="Input Text", lines=2, value=default_text) + options = gr.Dropdown( + AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt" + ) + run_button = gr.Button() + with gr.Column(): + audio_out = gr.Audio(label="Generated Audio", type="numpy") + inputs = [input_text, options] + outputs = [audio_out] + gr.Examples(examples=examples, fn=gen_tts, inputs=inputs, outputs=outputs) + gr.Markdown(article) + run_button.click(fn=gen_tts, inputs=inputs, outputs=outputs, queue=True) + try: + block.queue().launch(debug=False) + except Exception: + block.queue().launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ diff --git a/docs/notebooks/257-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/257-llava-multimodal-chatbot-with-output.rst new file mode 100644 index 00000000000000..1234442a3b8780 --- /dev/null +++ b/docs/notebooks/257-llava-multimodal-chatbot-with-output.rst @@ -0,0 +1,1206 @@ +Visual-language assistant with LLaVA and OpenVINO +================================================= + +`LLaVA `__ (Large Language and Vision +Assistant) is large multimodal model that aims to develop a +general-purpose visual assistant that can follow both language and image +instructions to complete various real-world tasks. The idea is to +combine the power of large language models (LLMs) with vision encoders +like CLIP to create an end-to-end trained neural assistant that +understands and acts upon multimodal instructions. + +In the field of artificial intelligence, the goal is to create a +versatile assistant capable of understanding and executing tasks based +on both visual and language inputs. Current approaches often rely on +large vision models that solve tasks independently, with language only +used to describe image content. While effective, these models have fixed +interfaces with limited interactivity and adaptability to user +instructions. On the other hand, large language models (LLMs) have shown +promise as a universal interface for general-purpose assistants. By +explicitly representing various task instructions in language, these +models can be guided to switch and solve different tasks. To extend this +capability to the multimodal domain, the `LLaVA +paper `__ introduces \`visual +instruction-tuning, a novel approach to building a general-purpose +visual assistant. + +In this tutorial we consider how to use LLaVA model to build multimodal +chatbot. For demonstration purposes we will use +`LLaVA-Lightning-MPT-7B-preview `__ +model for conversion, similar steps required to run other models from +`LLaVA Model +Zoo `__. + +The tutorial consists from following steps: + +- Install prerequisites +- Prepare input processor and tokenizer +- Download original model +- Compress model weights to INT8 using NNCF +- Convert model to OpenVINO Intermediate Representation (IR) format +- Prepare OpenVINO-based inference pipeline +- Run OpenVINO model + +**Table of contents:** + + +- `About model <#about-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Build model tokenizer and image + processor <#build-model-tokenizer-and-image-processor>`__ +- `Build model and convert it to OpenVINO IR + format <#build-model-and-convert-it-to-openvino-ir-format>`__ + + - `Prepare helpers for model + conversion <#prepare-helpers-for-model-conversion>`__ + - `Convert and Optimize + Model <#convert-and-optimize-model>`__ + + - `instantiate PyTorch + model <#instantiate-pytorch-model>`__ + - `Compress Model weights to INT8 using + NNCF <#compress-model-weights-to-int-using-nncf>`__ + - `Convert model to OpenVINO IR + format <#convert-model-to-openvino-ir-format>`__ + +- `Prepare OpenVINO based inference + pipeline <#prepare-openvino-based-inference-pipeline>`__ +- `Run model inference <#run-model-inference>`__ + + - `Select inference device <#select-inference-device>`__ + - `Load OpenVINO model <#load-openvino-model>`__ + - `Prepare input data <#prepare-input-data>`__ + - `Test model inference <#test-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +About model +----------------------------------------------------- + +LLaVA connects pre-trained `CLIP +ViT-L/14 `__ visual encoder and large +language model like Vicuna, LLaMa v2 or MPT, using a simple projection +matrix + +.. figure:: https://llava-vl.github.io/images/llava_arch.png + :alt: vlp_matrix.png + + vlp_matrix.png + +Model training procedure consists of 2 stages: + +- Stage 1: Pre-training for Feature Alignment. Only the projection + matrix is updated, based on a subset of CC3M. +- Stage 2: Fine-tuning End-to-End.. Both the projection matrix and LLM + are updated for two different use scenarios: + + - Visual Chat: LLaVA is fine-tuned on our generated multimodal + instruction-following data for daily user-oriented applications. + - Science QA: LLaVA is fine-tuned on this multimodal reasoning + dataset for the science domain. + +More details about model can be found in original `project +web-page `__, +`paper `__ and +`repo `__. + +Prerequisites +------------------------------------------------------- + +Install required dependencies + +.. code:: ipython3 + + import sys + + if sys.platform == "linux": + %pip install -q "torch==2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + else: + %pip install -q "torch==2.1.0" "torchvision" "torchaudio" + + %pip install -q "openvino==2023.2.0.dev20230922" "nncf>=2.6.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.31.0" "gradio" + +.. code:: ipython3 + + from pathlib import Path + + repo_dir = Path("LLaVA") + + if not repo_dir.exists(): + !git clone https://github.com/haotian-liu/LLaVA.git + + sys.path.insert(0, str(repo_dir.resolve())) + + +.. parsed-literal:: + + Cloning into 'LLaVA'... + remote: Enumerating objects: 1262, done. + remote: Counting objects: 100% (408/408), done. + remote: Compressing objects: 100% (127/127), done. + remote: Total 1262 (delta 343), reused 282 (delta 281), pack-reused 854 + Receiving objects: 100% (1262/1262), 11.94 MiB | 8.90 MiB/s, done. + Resolving deltas: 100% (789/789), done. + + +Build model tokenizer and image processor +----------------------------------------------------------------------------------- + +For starting work with model, we need understand how to prepare input +data first. As it is already discussed before, LLaVA is multimodal model +that accepts input user instructions in text format and image for +analysis. In the same time, LLaVA is combination of 2 fundamental +pretrained models for text and image processing, CLIP and MPT, each of +them has own approach for preparing data - tokenization for input text +and preprocessing for input image. LLaVA reuses these steps with small +adoption: introduced special tokens that serves for specification of +image location in the text that should be injected in provided user +instruction. + +.. code:: ipython3 + + from transformers import AutoTokenizer, AutoConfig, CLIPImageProcessor + from llava.model.language_model.llava_mpt import LlavaMPTForCausalLM + + model_id = "liuhaotian/LLaVA-Lightning-MPT-7B-preview" + + config = AutoConfig.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + image_processor = CLIPImageProcessor.from_pretrained(config.mm_vision_tower) + + +.. parsed-literal:: + + 2023-10-04 09:48:12.750646: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-04 09:48:12.789652: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-04 09:48:13.494345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. code:: ipython3 + + from llava.constants import ( + DEFAULT_IMAGE_PATCH_TOKEN, + DEFAULT_IM_START_TOKEN, + DEFAULT_IM_END_TOKEN, + DEFAULT_IMAGE_TOKEN + ) + + mm_use_im_start_end = getattr(config, "mm_use_im_start_end", False) + mm_use_im_patch_token = getattr(config, "mm_use_im_patch_token", True) + if mm_use_im_patch_token: + tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True) + if mm_use_im_start_end: + tokenizer.add_tokens( + [DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True + ) + + if hasattr(config, "max_sequence_length"): + context_len = config.max_sequence_length + else: + context_len = 2048 + +Build model and convert it to OpenVINO IR format +------------------------------------------------------------------------------------------ + +LLaVA is autoregressive transformer generative model, it means that each +next model step depends from model output from previous step. The +generation approach is based on the assumption that the probability +distribution of a word sequence can be decomposed into the product of +conditional next word distributions. In other words, model predicts the +next token in the loop guided by previously generated tokens until the +stop-condition will be not reached (generated sequence of maximum length +or end of string token obtained). The way the next token will be +selected over predicted probabilities is driven by the selected decoding +methodology. You can find more information about the most popular +decoding methods in this +`blog `__. The entry point +for the generation process for models from the Hugging Face Transformers +library is the ``generate`` method. You can find more information about +its parameters and configuration in the +`documentation `__. +To preserve flexibility in the selection decoding methodology, we will +convert only model inference for one step. + +The inference flow has difference on first step and for the next. On the +first step, model accept preprocessed input instruction and image, that +transformed to the unified embedding space using ``token_embedding`` and +``image_encoder`` models, after that LLM-based part of model runs on +input embeddings to predict probability of next generated tokens. On the +next step, model accepts only next token id selected based on sampling +strategy and cached attention key and values. Since the output side is +auto-regressive, an output token hidden state remains the same once +computed for every further generation step. Therefore, recomputing it +every time you want to generate a new token seems wasteful. With the +cache, the model saves the hidden state once it has been computed. The +model only computes the one for the most recently generated output token +at each time step, re-using the saved ones for hidden tokens. This +reduces the generation complexity from O(n^3) to O(n^2) for a +transformer model. More details about how it works can be found in this +`article `__. + +Prepare helpers for model conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The code below preparing function for converting LLaVA model to OpenVINO +Intermediate Representation format. It splits model on parts described +above, prepare example inputs for each part and convert each part using +`OpenVINO Model Conversion +API `__. +``ov.convert_model`` function accepts PyTorch model instance and returns +``ov.Model`` object that represent model in OpenVINO format. It is ready +to use for loading on device using ``ov.compile_model`` or can be saved +on disk using ``ov.save_model``. + +.. code:: ipython3 + + from functools import wraps + import gc + import warnings + import torch + import openvino as ov + from typing import Optional, Tuple, List + import torch.nn.functional as F + + warnings.filterwarnings('ignore') + + + class ModelWrapper(torch.nn.Module): + """ + Model wrapper class for export for spliting original forward logic on preparing multimodal data and inference using it. + That allows us to sperate image encoder and token embeddings model from general flow. + """ + def __init__(self, model): + super().__init__() + self.model = model + + def forward( + self, + input_ids: torch.LongTensor = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + ): + outputs = self.model.transformer( + input_ids=input_ids, + inputs_embeds=inputs_embeds, + past_key_values=past_key_values, + attention_mask=attention_mask, + prefix_mask=None, + sequence_id=None, + return_dict=True, + output_attentions=False, + output_hidden_states=False, + use_cache=True, + ) + logits = F.linear( + outputs.last_hidden_state.to(self.model.transformer.wte.weight.device), + self.model.transformer.wte.weight.to(outputs.last_hidden_state.dtype), + ) + if self.model.logit_scale is not None: + if self.model.logit_scale == 0: + warnings.warn( + f"Multiplying logits by self.logit_scale={self.model.logit_scale!r}." + "This will produce uniform (uninformative) outputs." + ) + logits *= self.model.logit_scale + + return (logits, tuple(outputs.past_key_values)) + + + def patch_model_forward(model): + """ + Helper function for patching model forward for model with past. + It makes model more convinient for export to TorchScript format avoiding limitation + that list of tensors can not be correctly traced as model input + """ + + orig_forward = model.forward + + @wraps(orig_forward) + def ts_patched_forward( + input_ids: torch.Tensor, + past_key_values: Tuple[Tuple[torch.Tensor]], + attention_mask: torch.LongTensor, + ): + pkv_list = list(past_key_values) + outs = orig_forward(input_ids=input_ids, past_key_values=pkv_list, attention_mask=attention_mask,) + return outs + + model.forward = ts_patched_forward + return model + + + def flattenize_inputs(inputs): + """ + Helper function for making nested inputs flattens + """ + flatten_inputs = [] + for input_data in inputs: + if input_data is None: + continue + if isinstance(input_data, (list, tuple)): + flatten_inputs.extend(flattenize_inputs(input_data)) + else: + flatten_inputs.append(input_data) + return flatten_inputs + + + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + def postprocess_converted_model(ov_model, example_input=None, input_names=None, output_names=None, dynamic_shapes=None): + """ + Helper function for appling postprocessing on converted model with updating input names, shapes and output names + acording to requested specification + """ + flatten_example_inputs = flattenize_inputs(example_input) if example_input else [] + + if input_names: + for inp_name, m_input, input_data in zip(input_names, ov_model.inputs, flatten_example_inputs): + input_node = m_input.get_node() + if input_node.element_type == ov.Type.dynamic: + m_input.get_node().set_element_type(ov.Type.f32) + shape = list(input_data.shape) + if dynamic_shapes is not None and inp_name in dynamic_shapes: + for k in dynamic_shapes[inp_name]: + shape[k] = -1 + input_node.set_partial_shape(ov.PartialShape(shape)) + m_input.get_tensor().set_names({inp_name}) + + if output_names: + for out, out_name in zip(ov_model.outputs, output_names): + out.get_tensor().set_names({out_name}) + ov_model.validate_nodes_and_infer_types() + return ov_model + + + def convert_llava_mpt(pt_model: torch.nn.Module, model_path: Path): + """ + LLaVA MPT model conversion function + + Params: + pt_model: PyTorch model + model_path: path for saving model + Returns: + None + """ + ov_out_path = Path(model_path) + pt_model.config.save_pretrained(ov_out_path) + pt_model.config.use_cache = True + pt_model.config.torchscript = True + first_stage_model_path = ov_out_path / "llava_input_embed.xml" + image_encoder_path = ov_out_path / "image_encoder.xml" + token_embedding_model_path = ov_out_path / "token_embed.xml" + second_stage_model_path = ov_out_path / "llava_with_past.xml" + if not image_encoder_path.exists(): + model.forward = model.encode_images + ov_model = ov.convert_model( + model, example_input=torch.zeros((1, 3, 224, 224)), input=[(-1, 3, 224, 224)] + ) + ov.save_model(ov_model, image_encoder_path) + cleanup_torchscript_cache() + del ov_model + gc.collect() + print("Image Encoder model successfuly converted") + + if not token_embedding_model_path.exists(): + model.forward = model.get_model().embed_tokens + ov_model = ov.convert_model( + model, example_input=torch.ones((1, 10), dtype=torch.long) + ) + ov.save_model(ov_model, token_embedding_model_path) + cleanup_torchscript_cache() + del ov_model + gc.collect() + print("Token Embedding model successfuly converted") + + if first_stage_model_path.exists() and second_stage_model_path.exists(): + print("LLaVA model successfuly converted") + del pt_model + return + model_wrap = ModelWrapper(model) + example_input_first_stage = { + "inputs_embeds": torch.zeros((1, 307, 4096)), + "attention_mask": torch.ones((1, 307), dtype=torch.long), + } + outs = model_wrap(**example_input_first_stage) + inputs = ["input_ids"] + outputs = ["logits"] + dynamic_shapes = {"input_ids": {1: "seq_len"}, "attention_mask": {1: "seq_len"}} + for idx in range(len(outs[1])): + inputs.extend([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"]) + dynamic_shapes[inputs[-1]] = {2: "past_sequence + sequence"} + dynamic_shapes[inputs[-2]] = {3: "past_sequence + sequence"} + outputs.extend([f"present.{idx}.key", f"present.{idx}.value"]) + + inputs.extend(["attention_mask"]) + if not first_stage_model_path.exists(): + ov_model = ov.convert_model( + model_wrap, example_input=example_input_first_stage + ) + ov_model = postprocess_converted_model(ov_model, output_names=outputs) + ov.save_model(ov_model, first_stage_model_path) + cleanup_torchscript_cache() + del ov_model + gc.collect() + + + if not second_stage_model_path.exists(): + model_wrap = patch_model_forward(model_wrap) + example_input_second_stage = { + "input_ids": torch.ones((1, 1), dtype=torch.long), + "past_key_values": outs[1], + "attention_mask": torch.ones((1, outs[1][-1][-1].shape[-2] + 1), dtype=torch.long) + } + ov_model = ov.convert_model(model_wrap, example_input=example_input_second_stage) + ov_model = postprocess_converted_model( + ov_model, + example_input=example_input_second_stage.values(), + input_names=inputs, + output_names=outputs, + dynamic_shapes=dynamic_shapes + ) + + ov.save_model(ov_model, ov_out_path / "llava_with_past.xml") + del ov_model + cleanup_torchscript_cache() + print("LLaVA model successfuly converted") + del model_wrap + del pt_model + +Convert and Optimize Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Compress model weights to INT8 using +NNCF 3. Convert model to OpenVINO format and save it on disk. + +Let’s consider each step more deeply. + +instantiate PyTorch model +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For creating PyTorch model we should use ``from_pretrained`` method of +``LlavaMPTForCausalLM`` model class. Model weights will be downloaded +from `HuggingFace hub `__ during first +run. It may takes some time and requires at least 13 Gb free space on +disk. + +Compress Model weights to INT8 using NNCF +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For reducing memory consumption, weights compression optimization can be +applied using `NNCF `__. Weight +compression aims to reduce the memory footprint of a model. It can also +lead to significant performance improvement for large memory-bound +models, such as Large Language Models (LLMs). LLMs and other models, +which require extensive memory to store the weights during inference, +can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +Currently, `Neural Network Compression Framework +(NNCF) `__ provides 8-bit +weight quantization as a compression method primarily designed to +optimize LLMs. The main difference between weights compression and full +model quantization (post-training quantization) is that activations +remain floating-point in the case of weights compression which leads to +a better accuracy. Weight compression for LLMs provides a solid +inference performance improvement which is on par with the performance +of the full model quantization. In addition, weight compression is +data-free and does not require a calibration dataset, making it easy to +use. + +``nncf.compress_weights`` function can be used for performing weights +compression. It accepts PyTorch model that next can be converted to +OpenVINO model using Model Conversion API or OpenVINO Model after +conversion. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +Convert model to OpenVINO IR format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Convert model to OpenVINO format using conversion helper function +defined above. + +.. code:: ipython3 + + from nncf import compress_weights + + compressed_model_dir = Path("llava-mpt/INT8_compressed_weights") + if not compressed_model_dir.exists(): + compressed_model_dir.mkdir(exist_ok=True, parents=True) + config.save_pretrained(compressed_model_dir) + model = LlavaMPTForCausalLM.from_pretrained(model_id) + vision_tower = model.get_vision_tower() + if not vision_tower.is_loaded: + vision_tower.load_model() + + if mm_use_im_start_end: + model.resize_token_embeddings(len(tokenizer)) + + model.eval() + with torch.no_grad(): + model = compress_weights(model) + convert_llava_mpt(model, compressed_model_dir) + del model + gc.collect(); + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization. + + + +.. parsed-literal:: + + Loading checkpoint shards: 0%| | 0/2 [00:00`__. + +.. code:: ipython3 + + from transformers.generation import GenerationConfig, GenerationMixin + from transformers.modeling_outputs import CausalLMOutputWithPast + from transformers import AutoConfig + import numpy as np + import torch + + + class OVLlavaMPTForCausalLM(GenerationMixin): + def __init__(self, core, model_dir, device): + self.image_encoder = core.compile_model(model_dir / "image_encoder.xml", device) + self.token_embed = core.compile_model(model_dir / "token_embed.xml", device) + self.model = core.read_model(model_dir / "llava_with_past.xml") + self.model_input_embed = core.compile_model( + model_dir / "llava_input_embed.xml", device + ) + self.input_names = { + key.get_any_name(): idx for idx, key in enumerate(self.model.inputs) + } + self.output_names = { + key.get_any_name(): idx for idx, key in enumerate(self.model.outputs) + } + self.key_value_input_names = [ + key for key in self.input_names if "key_values" in key + ] + self.key_value_output_names = [ + key for key in self.output_names if "present" in key + ] + compiled_model = core.compile_model(self.model, device) + self.request = compiled_model.create_infer_request() + self.config = AutoConfig.from_pretrained(model_dir) + self.generation_config = GenerationConfig.from_model_config(config) + self.main_input_name = "input_ids" + self.device = torch.device("cpu") + self.num_pkv = 2 + + def can_generate(self): + """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" + return True + + def __call__( + self, + input_ids: torch.LongTensor, + images: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + prefix_mask: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + **kwargs, + ) -> CausalLMOutputWithPast: + return self.forward( + input_ids, images, attention_mask, prefix_mask, past_key_values + ) + + def forward( + self, + input_ids: torch.LongTensor, + images: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + prefix_mask: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + **kwargs, + ) -> CausalLMOutputWithPast: + """General inference method""" + inputs = {} + if past_key_values is not None: + # Flatten the past_key_values + attention_mask = torch.ones( + (input_ids.shape[0], past_key_values[-1][-1].shape[-2] + 1), + dtype=input_ids.dtype, + ) + past_key_values = tuple( + past_key_value + for pkv_per_layer in past_key_values + for past_key_value in pkv_per_layer + ) + # Add the past_key_values to the decoder inputs + inputs = dict(zip(self.key_value_input_names, past_key_values)) + + else: + return self.forward_with_image(input_ids, images, attention_mask) + inputs["input_ids"] = np.array(input_ids) + + if "attention_mask" in self.input_names: + inputs["attention_mask"] = np.array(attention_mask) + + # Run inference + self.request.start_async(inputs, share_inputs=True) + self.request.wait() + + logits = torch.from_numpy(self.request.get_tensor("logits").data) + + # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) + past_key_values = tuple( + self.request.get_tensor(key).data for key in self.key_value_output_names + ) + # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) + + past_key_values = tuple( + past_key_values[i : i + self.num_pkv] + for i in range(0, len(past_key_values), self.num_pkv) + ) + return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) + + def forward_with_image(self, input_ids, images, attention_mask): + """First step inference method, that resolves multimodal data""" + input_embed, attention_mask = self.prepare_multimodal_input( + input_ids, images, attention_mask + ) + outs = self.model_input_embed([input_embed, attention_mask]) + logits = outs[0] + pkv = list(outs.values())[1:] + pkv = tuple(pkv[i : i + self.num_pkv] for i in range(0, len(pkv), self.num_pkv)) + return CausalLMOutputWithPast( + logits=torch.from_numpy(logits), past_key_values=pkv + ) + + def prepare_multimodal_input(self, input_ids, images, attention_mask): + """Preprocessing function for embedding multimodal data""" + image_features = [] + if images is not None: + image_features = self.image_encoder(images)[0] + + new_input_embeds = [] + cur_image_idx = 0 + for batch_idx, cur_input_ids in enumerate(input_ids): + if (cur_input_ids == IMAGE_TOKEN_INDEX).sum() == 0: + # multimodal LLM, but the current sample is not multimodal + cur_input_embeds = torch.from_numpy(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) + new_input_embeds.append(cur_input_embeds) + cur_image_idx += 1 + continue + image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] + cur_new_input_embeds = [] + while image_token_indices.numel() > 0: + cur_image_features = image_features[cur_image_idx] + image_token_start = image_token_indices[0] + if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr( + self.config, "mm_use_im_start_end", False + ): + embd = self.token_embed(cur_input_ids[: image_token_start - 1].unsqueeze(0))[0][0] + cur_new_input_embeds.append(embd) + embd = self.token_embed(cur_input_ids[image_token_start - 1 : image_token_start].unsqueeze(0))[0][0] + cur_new_input_embeds.append(embd) + cur_new_input_embeds.append(cur_image_features) + embd = self.token_embed(cur_input_ids[image_token_start + 1 : image_token_start + 2].unsqueeze(0))[0][0] + cur_new_input_embeds.append(embd) + else: + cur_new_input_embeds.append(self.token_embed(cur_input_ids[:image_token_start].unsqueeze(0))[0][0]) + cur_new_input_embeds.append(cur_image_features) + cur_image_idx += 1 + if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr( + self.config, "mm_use_im_start_end", False + ): + cur_input_ids = cur_input_ids[image_token_start + 2 :] + else: + cur_input_ids = cur_input_ids[image_token_start + 1 :] + image_token_indices = torch.where(cur_input_ids == IMAGE_TOKEN_INDEX)[0] + if cur_input_ids.numel() > 0: + if getattr(self.config, "tune_mm_mlp_adapter", False) and getattr( + self.config, "mm_use_im_start_end", False + ): + cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) + else: + cur_new_input_embeds.append(self.token_embed(cur_input_ids.unsqueeze(0))[0][0]) + cur_new_input_embeds = [torch.from_numpy(x) for x in cur_new_input_embeds] + cur_new_input_embeds = torch.cat(cur_new_input_embeds, dim=0) + new_input_embeds.append(cur_new_input_embeds) + + if any(x.shape != new_input_embeds[0].shape for x in new_input_embeds): + max_len = max(x.shape[0] for x in new_input_embeds) + + new_input_embeds_align = [] + for cur_new_embed in new_input_embeds: + cur_new_embed = torch.cat( + ( + cur_new_embed, + torch.zeros( + (max_len - cur_new_embed.shape[0], cur_new_embed.shape[1]), + dtype=cur_new_embed.dtype, + ), + ), + dim=0, + ) + new_input_embeds_align.append(cur_new_embed) + new_input_embeds = torch.stack(new_input_embeds_align, dim=0) + + if attention_mask is not None: + new_attention_mask = [] + for cur_attention_mask, cur_new_labels, cur_new_labels_align in zip( + attention_mask, _new_labels, new_labels + ): + new_attn_mask_pad_left = torch.full( + (cur_new_labels.shape[0] - labels.shape[1],), True, + dtype=attention_mask.dtype, + ) + new_attn_mask_pad_right = torch.full( + (cur_new_labels_align.shape[0] - cur_new_labels.shape[0], ), False, + dtype=attention_mask.dtype, + ) + cur_new_attention_mask = torch.cat( + (new_attn_mask_pad_left, cur_attention_mask, new_attn_mask_pad_right), + dim=0, + ) + new_attention_mask.append(cur_new_attention_mask) + attention_mask = torch.stack(new_attention_mask, dim=0) + assert attention_mask.shape == new_labels.shape + else: + new_input_embeds = torch.stack(new_input_embeds, dim=0) + + if attention_mask is not None: + new_attn_mask_pad_left = torch.full( + (attention_mask.shape[0], new_input_embeds.shape[1] - input_ids.shape[1],), True, + dtype=attention_mask.dtype, + ) + attention_mask = torch.cat((new_attn_mask_pad_left, attention_mask), dim=1) + assert attention_mask.shape == new_input_embeds.shape[:2] + + return new_input_embeds, attention_mask + + def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwargs): + """ + This function is used during running GenerationMixin.generate for preparing model specific inputs for + each generation step + """ + past_len = 0 + if past_key_values is not None: + input_ids = input_ids[:, -1].unsqueeze(-1) + past_len = past_key_values[-1][-1].shape[-2] + attention_mask = kwargs.get( + "attention_mask", + torch.ones(input_ids.shape[0], input_ids.shape[1] + past_len), + ) + if not kwargs.get("use_cache", True): + raise NotImplementedError("MPT with prefix_lm=True does not support use_cache=False.") + else: + prefix_mask = None + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "prefix_mask": prefix_mask, + "past_key_values": past_key_values, + "images": kwargs.get("images", None), + } + + def _reorder_cache( + self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor + ) -> Tuple[Tuple[torch.Tensor]]: + """ + This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or + [`~PreTrainedModel.beam_sample`] is called. + This is required to match `past_key_values` with the correct beam_idx at every generation step. + """ + + # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache + return tuple( + tuple(np.take(past_state, beam_idx, 0) for past_state in layer_past) + for layer_past in past_key_values + ) + +Run model inference +------------------------------------------------------------- + +Now, when we have model and defined generation pipeline, we can run +model inference. + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value="AUTO", + description="Device:", + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO') + + + +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + compressed_model_dir = Path("llava-mpt/INT8_compressed_weights") + ov_model = OVLlavaMPTForCausalLM(core, compressed_model_dir, device.value) + +Prepare input data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For preparing input data, we will use tokenizer and image processor +defined in the begging of our tutorial. For alignment with original +PyTorch implementation we will use PyTorch tensors as input. + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + + + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + return image + + + image_file = "https://llava-vl.github.io/static/images/view.jpg" + + image = load_image(image_file) + image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] + + text_message = "What are the things I should be cautious about when I visit here?" + print(f"Question: {text_message}") + image + + +.. parsed-literal:: + + Question: What are the things I should be cautious about when I visit here? + + + + +.. image:: 257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.png + + + +Test model inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Generation process for long response maybe time consuming, for accessing +partial result as soon as it is generated without waiting when whole +process finished, Streaming API can be used. Token streaming is the mode +in which the generative system returns the tokens one by one as the +model generates them. This enables showing progressive generations to +the user rather than waiting for the whole generation. Streaming is an +essential aspect of the end-user experience as it reduces latency, one +of the most critical aspects of a smooth experience. You can find more +details about how streaming work in `HuggingFace +documentation `__. + +Also for simplification of preparing input in conversational mode, we +will use Conversation Template helper provided by model authors for +accumulating history of provided messages and images. + +.. code:: ipython3 + + from llava.mm_utils import tokenizer_image_token, KeywordsStoppingCriteria + from llava.constants import IMAGE_TOKEN_INDEX + from transformers import TextStreamer + from llava.conversation import conv_templates, SeparatorStyle + + # Prepare + streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) + conv_mode = "mpt" + + conv = conv_templates[conv_mode].copy() + roles = ("user", "assistant") + + if mm_use_im_start_end: + inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + "\n" + text_message + else: + inp = DEFAULT_IMAGE_TOKEN + "\n" + text_message + conv.append_message(conv.roles[0], inp) + conv.append_message(conv.roles[1], None) + + prompt = conv.get_prompt() + input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) + stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 + keywords = [stop_str] + stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) + streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) + print("Answer:") + + output_ids = ov_model.generate( + input_ids, + images=image_tensor, + do_sample=True, + temperature=0.2, + max_new_tokens=1024, + streamer=streamer, + use_cache=True, + stopping_criteria=[stopping_criteria], + ) + + +.. parsed-literal:: + + Answer: + When visiting this location, I should be cautious about the water level, as the lake appears to be low. This could indicate that the water level might be low, which could pose a risk to people or boats. Additionally, I should be mindful of the wooden pier, as it might be slippery or unstable due to the water level, which could lead to accidents or injuries. It is essential to exercise caution while walking on the pier or near the water to ensure safety. + + +Interactive demo +---------------------------------------------------------- + +.. code:: ipython3 + + import gradio as gr + from threading import Event, Thread + from transformers import TextIteratorStreamer + + title_markdown = (""" + # 🌋 LLaVA: Large Language and Vision Assistant + """) + + tos_markdown = (""" + ### Terms of use + By using this service, users are required to agree to the following terms: + The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research. + """) + + conv = conv_templates[conv_mode].copy() + conv.messages = [] + + + def clear_history(textbox, imagebox, chatbot): + """ + callback function for clearing chat windows in interface on clear button click + + Params: + textbox: current textbox for user messages state + imagebox: current imagebox state + chatbot: current chatbot state + Returns: + empty textbox, imagebox and chatbot states + """ + conv.messages = [] + + return None, None, None + + def user(message, history): + """ + callback function for updating user messages in interface on submit button click + + Params: + message: current message + history: conversation history + Returns: + updated message and conversation history + """ + # Append the user's message to the conversation history + return "", history + [[message, ""]] + + def bot(image, history, temperature=0.2, top_p=0.7, max_new_tokens=1024): + """ + callback function for running chatbot on submit button click + + Params: + history: conversation history + temperature: parameter for control the level of creativity in AI-generated text. + By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse. + top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability. + + """ + + text = history[-1][0] + if len(text) <= 0 and image is None: + conv.skip_next = True + yield history + text = text[:1536] # Hard cut-off + if image is not None: + text = text[:1200] # Hard cut-off for images + if '' not in text: + text = text + '\n' + text = (text, image, 'Resize') + conv.append_message(conv.roles[0], text) + conv.append_message(conv.roles[1], None) + conv.skip_next = False + + # Construct the input message string for the model by concatenating the current system message and conversation history + prompt = conv.get_prompt() + image = conv.get_images(return_pil=True) + if not image: + image_tensor = None + else: + image_tensor = image_processor.preprocess(image, return_tensors="pt")["pixel_values"] + input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) + stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 + keywords = [stop_str] + stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids) + # Tokenize the messages string + streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) + generate_kwargs = dict( + input_ids=input_ids, + images=image_tensor, + max_new_tokens=max_new_tokens, + temperature=temperature, + do_sample=temperature > 0.001, + top_p=top_p, + streamer=streamer, + use_cache=True, + stopping_criteria=[stopping_criteria], + ) + + stream_complete = Event() + + def generate_and_signal_complete(): + """ + genration function for single thread + """ + ov_model.generate(**generate_kwargs) + stream_complete.set() + + t1 = Thread(target=generate_and_signal_complete) + t1.start() + + # Initialize an empty string to store the generated text + partial_text = "" + for new_text in streamer: + if not new_text: + continue + partial_text += new_text + conv.messages[-1][-1] = partial_text + history[-1][1] = partial_text + yield history + + with gr.Blocks(title="LLaVA", height=600) as demo: + gr.Markdown(title_markdown) + + with gr.Row(): + with gr.Column(): + imagebox = gr.Image(type="pil") + with gr.Accordion("Parameters", open=False, visible=True) as parameter_row: + temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",) + top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",) + max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",) + + with gr.Column(scale=3): + with gr.Column(scale=6): + chatbot = gr.Chatbot(height=400) + with gr.Row(): + with gr.Column(scale=8): + textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", visible=True, container=False) + with gr.Column(scale=1, min_width=60): + submit_btn = gr.Button(value="Submit", visible=True) + with gr.Row(visible=True) as button_row: + clear_btn = gr.Button(value="🗑️ Clear history", interactive=True) + + gr.Markdown(tos_markdown) + + + submit_event = textbox.submit( + fn=user, + inputs=[textbox, chatbot], + outputs=[textbox, chatbot], + queue=False, + ).then( + bot, [imagebox, chatbot, temperature, top_p, max_output_tokens], chatbot, queue=True + ) + # Register listeners + clear_btn.click(clear_history, [textbox, imagebox, chatbot], [chatbot, textbox, imagebox]) + submit_click_event = submit_btn.click( + fn=user, + inputs=[textbox, chatbot], + outputs=[textbox, chatbot], + queue=False, + ).then(bot, [imagebox, chatbot, temperature, top_p, max_output_tokens], chatbot, queue=True) + + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + try: + demo.queue(max_size=2).launch(debug=False) + except Exception: + demo.queue(max_size=2).launch(share=True, debug=False) + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ diff --git a/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.jpg b/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.jpg new file mode 100644 index 00000000000000..29fc338b516a09 --- /dev/null +++ b/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f825c10443339b42cb5e2415f48bb7bafb4e087fb29bce6d2feaf3c2f89788c8 +size 72374 diff --git a/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.png b/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.png new file mode 100644 index 00000000000000..c1062ffb3d6d10 --- /dev/null +++ b/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/257-llava-multimodal-chatbot-with-output_19_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde262e54da6d8dad5062989d7863db7cd85ac0403b9015a76f5884472f67ceb +size 599941 diff --git a/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/index.html b/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/index.html new file mode 100644 index 00000000000000..45960f065f4cfc --- /dev/null +++ b/docs/notebooks/257-llava-multimodal-chatbot-with-output_files/index.html @@ -0,0 +1,8 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/257-llava-multimodal-chatbot-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/257-llava-multimodal-chatbot-with-output_files/


../
+257-llava-multimodal-chatbot-with-output_19_1.jpg  31-Oct-2023 00:35               72374
+257-llava-multimodal-chatbot-with-output_19_1.png  31-Oct-2023 00:35              599941
+

+ diff --git a/docs/notebooks/258-blip-diffusion-subject-generation-with-output.rst b/docs/notebooks/258-blip-diffusion-subject-generation-with-output.rst new file mode 100644 index 00000000000000..fee1ea4415c238 --- /dev/null +++ b/docs/notebooks/258-blip-diffusion-subject-generation-with-output.rst @@ -0,0 +1,1429 @@ +Subject-driven image generation and editing using BLIP Diffusion and OpenVINO +============================================================================= + +|image0| `BLIP-Diffusion `__ is a +text-to-image diffusion model with built-in support for multimodal +subject-and-text condition. BLIP-Diffusion enables zero-shot +subject-driven generation, and efficient fine-tuning for customized +subjects with up to 20x speedup. In addition, BLIP-Diffusion can be +flexibly combined with ControlNet and prompt-to-prompt to enable novel +subject-driven generation and editing applications. + +**Table of contents:** +--- + +- `Prerequisites <#prerequisites>`__ +- `Load the model <#load-the-model>`__ +- `Infer the original model <#infer-the-original-model>`__ +- `Zero-Shot subject-driven generation <#zero-shot-subject-driven-generation>`__ +- `Controlled subject-driven generation (Canny-edge) <#controlled-subject-driven-generation-canny-edge>`__ +- `Controlled subject-driven generation (Scribble) <#controlled-subject-driven-generation-scribble>`__ +- `Convert the model to OpenVINO Intermediate Representation (IR) <#convert-the-model-to-openvino-intermediate-representation-ir>`__ +- `QFormer <#qformer>`__ +- `Text encoder <#text-encoder>`__ +- `ControlNet <#controlnet>`__ +- `UNet <#unet>`__ +- `Variational Autoencoder (VAE) <#variational-autoencoder-vae>`__ +- `Select inference device <#select-inference-device>`__ +- `Inference <#inference>`__ +- `Zero-Shot subject-driven generation <#zero-shot-subject-driven-generation>`__ +- `Controlled subject-driven generation (Canny-edge) <#controlled-subject-driven-generation-canny-edge>`__ +- `Controlled subject-driven generation (Scribble) <#controlled-subject-driven-generation-scribble>`__ +- `Interactive inference <#interactive-inference>`__ + +.. |image0| image:: https://github.com/salesforce/LAVIS/raw/main/projects/blip-diffusion/teaser-website.png + +Prerequisites +------------------------------------------------------- + +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" matplotlib Pillow gradio + %pip install -q -extra-index-url https://download.pytorch.org/whl/cpu torch transformers accelerate controlnet_aux + %pip install -q "git+https://github.com/huggingface/diffusers.git" # TODO: Change to PyPI package where https://github.com/huggingface/diffusers/pull/4388 is included + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + from pathlib import Path + import gc + from typing import List, Optional, Union + from functools import partial + from urllib.request import urlretrieve + + import diffusers + import torch + import matplotlib.pyplot as plt + import ipywidgets + import PIL + import numpy as np + import gradio as gr + import controlnet_aux + + import openvino as ov + + +.. parsed-literal:: + + /home/itrushkin/.virtualenvs/blip_diffusion/lib/python3.10/site-packages/controlnet_aux/mediapipe_face/mediapipe_face_common.py:7: UserWarning: The module 'mediapipe' is not installed. The package will have limited functionality. Please install it using the command: pip install 'mediapipe' + warnings.warn( + + +.. code:: ipython3 + + MODELS_DIR = Path("models") + QFORMER_PATH = MODELS_DIR / "qformer.xml" + TEXT_ENCODER_PATH = MODELS_DIR / "text_encoder.xml" + NEG_TEXT_ENCODER_PATH = MODELS_DIR / "neg_text_encoder.xml" + CONTROLNET_PATH = MODELS_DIR / "controlnet.xml" + UNET_PATH = MODELS_DIR / "unet.xml" + UNET_CONTROLNET_PATH = MODELS_DIR / "unet_controlnet.xml" + VAE_PATH = MODELS_DIR / "vae.xml" + + DATA_DIR = Path("data") + DOG_IMG_URL = "https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/dog.jpg" + DOG_IMG_PATH = DATA_DIR / "dog.jpg" + KETTLE_IMG_URL = ( + "https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/kettle.jpg" + ) + KETTLE_IMG_PATH = DATA_DIR / "kettle.jpg" + FLOWER_IMG_URL = ( + "https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg" + ) + FLOWER_IMG_PATH = DATA_DIR / "flower.jpg" + BAG_IMG_URL = "https://huggingface.co/lllyasviel/sd-controlnet-scribble/resolve/main/images/bag.png" + BAG_IMG_PATH = DATA_DIR / "bag.jpg" + + MODELS_DIR.mkdir(parents=True, exist_ok=True) + DATA_DIR.mkdir(parents=True, exist_ok=True) + +Load the model +-------------------------------------------------------- + +We use Hugging Face ``diffusers`` library to load the model using +``from_pretrained`` method. + +.. code:: ipython3 + + pipe = diffusers.pipelines.BlipDiffusionPipeline.from_pretrained("ayushtues/blipdiffusion") + pipe_controlnet = diffusers.pipelines.BlipDiffusionControlNetPipeline.from_pretrained( + "ayushtues/blipdiffusion-controlnet" + ) + + +.. parsed-literal:: + + qformer/model.safetensors not found + + + +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/7 [00:00`__ is a +popular edge detection algorithm that produces high-quality edge maps +from images. + +The approach is first to use the Canny edge detector to generate an edge +map of the desired object. The edge map is then used to condition the +diffusion model during image generation. This results in images that are +more likely to contain the desired object and more faithful to the text +description. + +.. code:: ipython3 + + style_subject = ["flower"] # subject that defines the style + tgt_subject = ["teapot"] # subject to generate. + text_prompt = ["on a marble table"] + cond_image = PIL.Image.open(KETTLE_IMG_PATH).resize((512, 512)) + canny = controlnet_aux.CannyDetector() + cldm_cond_image = canny(cond_image, 30, 70, output_type="pil") + cldm_cond_image = [cldm_cond_image] + + style_image = PIL.Image.open(FLOWER_IMG_PATH) + + + guidance_scale = 7.5 + num_inference_steps = 50 + negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate" + +.. code:: ipython3 + + output = pipe_controlnet( + text_prompt, + style_image, + cldm_cond_image, + style_subject, + tgt_subject, + guidance_scale=guidance_scale, + num_inference_steps=num_inference_steps, + neg_prompt=negative_prompt, + height=512, + width=512, + ) + + + +.. parsed-literal:: + + 0%| | 0/51 [00:00`__ (HED) is a deep +learning model for edge detection. + +HED first uses the scribble to generate a seed map. The seed map is a +binary image where the scribbled pixels are set to 1 and the other +pixels are set to 0. Then, it uses the seed map to initialize a +diffusion process. The diffusion process gradually spreads the edge +information from the seed pixels to the other pixels in the image. The +diffusion process is stopped when the edge map converges. The converged +edge map is the final output of HED and input of our diffusion model. + +.. code:: ipython3 + + style_subject = ["flower"] # subject that defines the style + tgt_subject = ["bag"] # subject to generate. + text_prompt = ["on a table"] + bag_img = PIL.Image.open(BAG_IMG_PATH) + cldm_cond_image = bag_img.resize((512, 512)) + hed = controlnet_aux.HEDdetector.from_pretrained("lllyasviel/Annotators") + cldm_cond_image = hed(cldm_cond_image) + cldm_cond_image = [cldm_cond_image] + + guidance_scale = 7.5 + num_inference_steps = 50 + negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate" + + output = pipe_controlnet( + text_prompt, + style_image, + cldm_cond_image, + style_subject, + tgt_subject, + guidance_scale=guidance_scale, + num_inference_steps=num_inference_steps, + neg_prompt=negative_prompt, + height=512, + width=512, + ) + + + +.. parsed-literal:: + + 0%| | 0/51 [00:00`__ paper and is a +transformer that accepts a fixed number a learnable query tokens and an +input text. It is used in BLIP Diffusion pipeline as a multimodal +encoder for image-text alignment. The query tokens interact with text +through self-attention layers, and interact with frozen image features +through cross-attention layers, and produces text-aligned image features +as output. The output is of the same dimension as the number of query +tokens. + +Original QFormer model takes raw text as input, so we redefine the +``forward`` function to accept tokenization result as ``input_ids`` and +``attention_mask`` tensors. + +.. code:: ipython3 + + class OVQFormer(torch.nn.Module): + def __init__(self, qformer): + super().__init__() + self._qformer = qformer + + def __getattr__(self, name): + if name == "_qformer": + return super().__getattr__(name) + return getattr(self._qformer, name) + + def forward( + self, + text_input_ids, + text_attention_mask, + image_input, + ): + batch_size = text_input_ids.shape[0] + query_atts = torch.ones((batch_size, self.query_tokens.size()[1]), dtype=torch.long) + attention_mask = torch.cat([query_atts, text_attention_mask], dim=1) + + output_attentions = self.config.output_attentions + output_hidden_states = self.config.output_hidden_states + return_dict = self.config.use_return_dict + + query_length = self.query_tokens.shape[1] + + embedding_output = self.embeddings(input_ids=text_input_ids, query_embeds=self.query_tokens) + + # embedding_output = self.layernorm(query_embeds) + # embedding_output = self.dropout(embedding_output) + + input_shape = embedding_output.size()[:-1] + batch_size, seq_length = input_shape + device = embedding_output.device + + image_embeds_frozen = self.visual_encoder(image_input).last_hidden_state + # image_embeds_frozen = torch.ones_like(image_embeds_frozen) + encoder_hidden_states = image_embeds_frozen + + if attention_mask is None: + attention_mask = torch.ones(((batch_size, seq_length)), device=device) + + # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] + # ourselves in which case we just need to make it broadcastable to all heads. + extended_attention_mask = self.get_extended_attention_mask( + attention_mask, input_shape, device + ) + + # If a 2D or 3D attention mask is provided for the cross-attention + # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length] + if encoder_hidden_states is not None: + if isinstance(encoder_hidden_states, list): + encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[0].size() + else: + encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size() + encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length) + encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device) + encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask) + else: + encoder_extended_attention_mask = None + + head_mask = [None] * self.config.qformer_config.num_hidden_layers + + encoder_outputs = self.encoder( + embedding_output, + attention_mask=extended_attention_mask, + head_mask=head_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_extended_attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + query_length=query_length, + ) + sequence_output = encoder_outputs[0] + return self.proj_layer(sequence_output[:, :query_length, :]) + +.. code:: ipython3 + + serialize_openvino( + OVQFormer(qformer), + QFORMER_PATH, + example_input={ + "image_input": torch.randn(1, 3, 16, 16), + "text_input_ids": torch.zeros((1, 3), dtype=torch.int64), + "text_attention_mask": torch.zeros((1, 3), dtype=torch.int64), + }, + input={ + "image_input": ((1, 3, 224, 224),), + "text_input_ids": ((1, ov.Dimension(3, 77)), np.int64), + "text_attention_mask": ((1, ov.Dimension(3, 77)), np.int64), + }, + ) + + del qformer + gc.collect() + + + + +.. parsed-literal:: + + 0 + + + +Text encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +BLIP-Diffusion pipeline uses CLIP text encoder, the default encoder for +Stable Diffusion-based models. The only difference is it allows for an +extra input of “context embeddings”, which are the query embeddings used +in Q-Former. They pass through the CLIP model, along with the text +embeddings, and interact with them using self-attention. + +.. code:: ipython3 + + serialize_openvino( + text_encoder, + TEXT_ENCODER_PATH, + example_input={ + "input_ids": torch.zeros((1, 61), dtype=torch.int64), + "ctx_embeddings": torch.zeros((1, 16, 768)), + "ctx_begin_pos": torch.tensor([2]), + }, + input={ + "input_ids": ((1, 61), np.int64), + "ctx_embeddings": ((1, 16, 768),), + "ctx_begin_pos": ((1),), + }, + ) + + # Convert 2nd instance for negative prompt encoding + serialize_openvino( + text_encoder, + NEG_TEXT_ENCODER_PATH, + example_input={ + "input_ids": torch.zeros((1, 77), dtype=torch.int64), + }, + input={ + "input_ids": ((1, 77), np.int64), + }, + ) + + del text_encoder + gc.collect() + + + + +.. parsed-literal:: + + 0 + + + +ControlNet +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ControlNet model was introduced in `Adding Conditional Control to +Text-to-Image Diffusion +Models `__. It provides a +greater degree of control over text-to-image generation by conditioning +the model on additional inputs such as edge maps, depth maps, +segmentation maps, and keypoints for pose detection. + +.. code:: ipython3 + + controlnet.forward = partial(controlnet.forward, return_dict=False) + example_input = { + "sample": torch.randn(2, 4, 64, 64), + "timestep": torch.tensor(1), + "encoder_hidden_states": torch.randn(2, 77, 768), + "controlnet_cond": torch.randn(2, 3, 512, 512), + } + with torch.no_grad(): + down_block_res_samples, mid_block_res_sample = controlnet(**example_input) + serialize_openvino( + controlnet, + CONTROLNET_PATH, + example_input=example_input, + input={ + "sample": ((2, 4, 64, 64)), + "timestep": ((),), + "encoder_hidden_states": ((2, 77, 768),), + "controlnet_cond": ((2, 3, 512, 512)), + }, + ) + del controlnet + gc.collect() + + + + +.. parsed-literal:: + + 4463 + + + +UNet +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `UNet `__ model is one of +the most important components of a diffusion system because it +facilitates the actual diffusion process. + +.. code:: ipython3 + + serialize_openvino( + unet, + UNET_PATH, + example_input={ + "sample": torch.randn(2, 4, 32, 32), + "timestep": torch.tensor(1), + "encoder_hidden_states": torch.randn(2, 77, 768), + }, + input={ + "sample": ((2, 4, unet_sample_size, unet_sample_size),), + "timestep": ((),), + "encoder_hidden_states": ((2, 77, 768),), + }, + ) + + dtype_mapping = { + torch.float32: ov.Type.f32, + torch.float64: ov.Type.f64, + torch.int32: ov.Type.i32, + torch.int64: ov.Type.i64, + } + + + def flatten_inputs(inputs): + flat_inputs = [] + for input_data in inputs: + if input_data is None: + continue + if isinstance(input_data, (list, tuple)): + flat_inputs.extend(flatten_inputs(input_data)) + else: + flat_inputs.append(input_data) + return flat_inputs + + + # convert 2nd time for stylization task + example_input = { + "sample": torch.randn(2, 4, unet_sample_size, unet_sample_size), + "timestep": torch.tensor(1), + "encoder_hidden_states": torch.randn(2, 77, 768), + "down_block_additional_residuals": down_block_res_samples, + "mid_block_additional_residual": mid_block_res_sample, + } + if not UNET_CONTROLNET_PATH.exists(): + with torch.no_grad(): + ov_unet = ov.convert_model( + unet, + example_input=example_input, + ) + flat_inputs = flatten_inputs(example_input.values()) + for input_data, input_tensor in zip(flat_inputs, ov_unet.inputs): + input_tensor.get_node().set_partial_shape(ov.PartialShape(input_data.shape)) + input_tensor.get_node().set_element_type(dtype_mapping[input_data.dtype]) + ov_unet.validate_nodes_and_infer_types() + ov.save_model(ov_unet, UNET_CONTROLNET_PATH) + del ov_unet + del unet + gc.collect() + + + + +.. parsed-literal:: + + 0 + + + +Variational Autoencoder (VAE) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The variational autoencoder (VAE) model with KL loss was introduced in +`Auto-Encoding Variational +Bayes `__. The model is used to +encode images into latents and to decode latent representations into +images. For inference we use only decoding part of the VAE. We wrap the +decoder in separate ``torch.nn.Module``. + +.. code:: ipython3 + + class VaeDecoderWrapper(torch.nn.Module): + def __init__(self, vae: torch.nn.Module): + super().__init__() + self.vae = vae + + def forward(self, z: torch.FloatTensor): + return self.vae.decode(z / self.vae.config.scaling_factor, return_dict=False)[0] + + + serialize_openvino( + VaeDecoderWrapper(vae), + VAE_PATH, + example_input=torch.randn(1, 4, 64, 64), + input=((1, 4, 64, 64)), + ) + del vae + gc.collect() + + + + +.. parsed-literal:: + + 0 + + + +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + core = ov.Core() + + device = ipywidgets.Dropdown( + options=core.available_devices + ["AUTO"], + value="AUTO", + description="Device:", + disabled=False, + ) + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + qformer = core.compile_model(QFORMER_PATH, device_name=device.value) + +.. code:: ipython3 + + text_encoder = core.compile_model(TEXT_ENCODER_PATH, device_name=device.value) + +.. code:: ipython3 + + neg_text_encoder = core.compile_model(NEG_TEXT_ENCODER_PATH, device_name=device.value) + +.. code:: ipython3 + + controlnet = core.compile_model(CONTROLNET_PATH, device_name=device.value) + +.. code:: ipython3 + + unet = core.compile_model(UNET_PATH, device_name=device.value) + +.. code:: ipython3 + + unet_controlnet = core.compile_model(UNET_CONTROLNET_PATH, device_name=device.value) + +.. code:: ipython3 + + vae = core.compile_model(VAE_PATH, device_name=device.value) + +Inference +--------------------------------------------------- + +.. code:: ipython3 + + def call(compiled_model, *args, **kwargs): + if len(args) and not kwargs: + result = compiled_model([np.array(a) for a in args])[0] + elif kwargs and not len(args): + result = compiled_model({k: np.array(v) for k, v in kwargs.items()})[0] + else: + raise NotImplementedError(f"{args=},{kwargs=}") + result = torch.tensor(result) + return result + +.. code:: ipython3 + + class OvBlipDiffusionPipeline(diffusers.DiffusionPipeline): + def __init__(self): + self.tokenizer = tokenizer + self.qformer_tokenizer = qformer_tokenizer + self.text_encoder = partial(call, text_encoder) + self.neg_text_encoder = partial(call, neg_text_encoder) + self.vae = partial(call, vae) + self.unet = partial(call, unet) + self.unet_controlnet = partial(call, unet_controlnet) + self.controlnet = controlnet + self.scheduler = scheduler + self.qformer = partial(call, qformer) + self.image_processor = image_processor + self.register_to_config(**config) + + def __call__( + self, + prompt: List[str], + reference_image: PIL.Image.Image, + source_subject_category: List[str], + target_subject_category: List[str], + conditioning_image: Optional[PIL.Image.Image] = None, + latents: Optional[torch.FloatTensor] = None, + guidance_scale: float = 7.5, + num_inference_steps: int = 50, + generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, + neg_prompt: Optional[str] = "", + prompt_strength: float = 1.0, + prompt_reps: int = 20, + output_type: Optional[str] = "pil", + ): + """ + Function invoked when calling the pipeline for generation. + + Args: + prompt (`List[str]`): + The prompt or prompts to guide the image generation. + reference_image (`PIL.Image.Image`): + The reference image to condition the generation on. + source_subject_category (`List[str]`): + The source subject category. + target_subject_category (`List[str]`): + The target subject category. + conditioning_image (`PIL.Image.Image`): + The conditioning canny edge image to condition the generation on. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by random sampling. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + generator (`torch.Generator` or `List[torch.Generator]`, *optional*): + One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) + to make generation deterministic. + neg_prompt (`str`, *optional*, defaults to ""): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + prompt_strength (`float`, *optional*, defaults to 1.0): + The strength of the prompt. Specifies the number of times the prompt is repeated along with prompt_reps + to amplify the prompt. + prompt_reps (`int`, *optional*, defaults to 20): + The number of times the prompt is repeated along with prompt_strength to amplify the prompt. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"` + (`np.array`) or `"pt"` (`torch.Tensor`). + """ + width = 512 + height = 512 + reference_image = self.image_processor.preprocess( + reference_image, + image_mean=self.config.mean, + image_std=self.config.std, + return_tensors="pt", + )["pixel_values"] + + if isinstance(prompt, str): + prompt = [prompt] + if isinstance(source_subject_category, str): + source_subject_category = [source_subject_category] + if isinstance(target_subject_category, str): + target_subject_category = [target_subject_category] + + batch_size = len(prompt) + + prompt = self._build_prompt( + prompts=prompt, + tgt_subjects=target_subject_category, + prompt_strength=prompt_strength, + prompt_reps=prompt_reps, + ) + qformer_input = self.qformer_tokenizer( + source_subject_category, return_tensors="pt", padding=True + ) + query_embeds = self.qformer( + image_input=reference_image, + text_input_ids=qformer_input.input_ids, + text_attention_mask=qformer_input.attention_mask, + ) + text_embeddings = self.encode_prompt(query_embeds, prompt, device) + do_classifier_free_guidance = guidance_scale > 1.0 + if do_classifier_free_guidance: + max_length = self.config.text_encoder_max_position_embeddings + + uncond_input = self.tokenizer( + [neg_prompt] * batch_size, + padding="max_length", + max_length=max_length, + return_tensors="pt", + ) + uncond_embeddings = self.neg_text_encoder(input_ids=uncond_input.input_ids) + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + scale_down_factor = 2 ** (len(self.config.unet_block_out_channels) - 1) + latents = self.prepare_latents( + batch_size=batch_size, + num_channels=self.config.unet_in_channels, + height=height // scale_down_factor, + width=width // scale_down_factor, + generator=generator, + latents=latents, + device=None, + dtype=None, + ) + # set timesteps + extra_set_kwargs = {} + self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) + + if conditioning_image: + cond_image = self.prepare_control_image( + image=conditioning_image, + width=width, + height=height, + batch_size=batch_size, + num_images_per_prompt=1, + device=None, + dtype=None, + do_classifier_free_guidance=do_classifier_free_guidance, + ) + for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): + # expand the latents if we are doing classifier free guidance + do_classifier_free_guidance = guidance_scale > 1.0 + + latent_model_input = ( + torch.cat([latents] * 2) if do_classifier_free_guidance else latents + ) + if conditioning_image: + controlnet_output = self.controlnet( + [ + latent_model_input, + t, + text_embeddings, + cond_image, + ] + ) + noise_pred = ( + self.unet( + sample=latent_model_input, timestep=t, encoder_hidden_states=text_embeddings + ) + if not conditioning_image + else self.unet_controlnet( + latent_model_input, + t, + text_embeddings, + *[v for _, v in controlnet_output.items()], + ) + ) + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * ( + noise_pred_text - noise_pred_uncond + ) + + latents = self.scheduler.step( + noise_pred, + t, + latents, + )["prev_sample"] + + image = self.vae(latents) + image = self.image_processor.postprocess(image, output_type=output_type) + return image + + def encode_prompt(self, query_embeds, prompt, device=None): + # embeddings for prompt, with query_embeds as context + max_len = self.config.text_encoder_max_position_embeddings + max_len -= self.config.qformer_num_query_tokens + + tokenized_prompt = self.tokenizer( + prompt, + padding="max_length", + truncation=True, + max_length=max_len, + return_tensors="pt", + ) + + batch_size = query_embeds.shape[0] + ctx_begin_pos = [self.config.ctx_begin_pos] * batch_size + + text_embeddings = self.text_encoder( + input_ids=tokenized_prompt.input_ids, + ctx_embeddings=query_embeds, + ctx_begin_pos=ctx_begin_pos, + ) + + return text_embeddings + + + OvBlipDiffusionPipeline.prepare_control_image = ( + diffusers.pipelines.BlipDiffusionControlNetPipeline.prepare_control_image + ) + OvBlipDiffusionPipeline._build_prompt = diffusers.pipelines.BlipDiffusionPipeline._build_prompt + OvBlipDiffusionPipeline.prepare_latents = diffusers.pipelines.BlipDiffusionPipeline.prepare_latents + +.. code:: ipython3 + + ov_pipe = OvBlipDiffusionPipeline() + +Zero-Shot subject-driven generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + output = ov_pipe( + text_prompt_input, + dog_img, + cond_subject, + tgt_subject, + guidance_scale=guidance_scale, + num_inference_steps=num_inference_steps, + neg_prompt=negative_prompt + ) + + + +.. parsed-literal:: + + 0%| | 0/51 [00:00 +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/258-blip-diffusion-subject-generation-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/258-blip-diffusion-subject-generation-with-output_files/


../
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35              495502
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35              680845
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35              541801
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35              522726
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35              683108
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35              539707
+

+ diff --git a/docs/notebooks/259-decidiffusion-image-generation-with-output.rst b/docs/notebooks/259-decidiffusion-image-generation-with-output.rst new file mode 100644 index 00000000000000..56a22385887170 --- /dev/null +++ b/docs/notebooks/259-decidiffusion-image-generation-with-output.rst @@ -0,0 +1,1173 @@ +Image generation with DeciDiffusion and OpenVINO +================================================ + +DeciDiffusion 1.0 is a diffusion-based text-to-image generation model. +While it maintains foundational architecture elements from Stable +Diffusion, such as the Variational Autoencoder (VAE) and CLIP’s +pre-trained Text Encoder, DeciDiffusion introduces significant +enhancements. The primary innovation is the substitution of U-Net with +the more efficient U-Net-NAS, a design pioneered by Deci. This novel +component streamlines the model by reducing the number of parameters, +leading to superior computational efficiency. + +The domain of text-to-image generation, with its transformative +potential in design, art, and advertising, has captivated both experts +and laypeople. This technology’s allure lies in its ability to +effortlessly transform text into vivid images, marking a significant +leap in AI capabilities. While Stable Diffusion’s open-source foundation +has spurred many advancements, it grapples with practical deployment +challenges due to its heavy computational needs. These challenges lead +to notable latency and cost concerns in training and deployment. In +contrast, DeciDiffusion stands out. Its superior computational +efficiency ensures a smoother user experience and boasts an impressive +reduction of nearly 66% in production costs. + +In this tutorial we consider how to convert and run DeciDiffusion using +OpenVINO, making text-to-image generative applications more accessible +and feasible. + +The notebook contains the following steps: + +1. Convert PyTorch models to OpenVINO Intermediate Representation using + OpenVINO Converter Tool (OVC). +2. Prepare Inference Pipeline. +3. Run Inference pipeline with OpenVINO. +4. Run Interactive demo for DeciDiffusion model + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Prepare DeciDiffusion models for OpenVINO format + conversion <#prepare-decidiffusion-models-for-openvino-format-conversion>`__ + + - `About model <#about-model>`__ + - `DeciDiffusion integration with Diffusers + library <#decidiffusion-integration-with-diffusers-library>`__ + +- `Convert models to OpenVINO + format <#convert-models-to-openvino-format>`__ + + - `Text Encoder <#text-encoder>`__ + - `U-Net NAS <#u-net-nas>`__ + - `VAE <#vae>`__ + +- `Prepare inference pipeline <#prepare-inference-pipeline>`__ + + - `Guidance scale and negative prompt for controlling generation + result. <#guidance-scale-and-negative-prompt-for-controlling-generation-result>`__ + - `Strength for controlling Image-to-Image + generation <#strength-for-controlling-image-to-image-generation>`__ + - `Configure Inference + Pipeline <#configure-inference-pipeline>`__ + +- `Text-to-Image generation <#text-to-image-generation>`__ + + - `Image-to-Image generation <#image-to-image-generation>`__ + +- `Interactive demo <#interactive-demo>`__ + +Prerequisites +------------------------------------------------------- + +install required packages + +.. code:: ipython3 + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "diffusers" "transformers" "torch" "pillow" "openvino>=2023.1.0" "gradio" + +Prepare DeciDiffusion models for OpenVINO format conversion +----------------------------------------------------------------------------------------------------- + +About model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +DeciDiffusion 1.0 is an 820 million parameter text-to-image latent +diffusion model trained on the LAION-v2 dataset and fine-tuned on the +LAION-ART dataset. It’s architecture based on Stable Diffusion +foundational model with the replacement of the traditional U-Net +component with a more streamlined variant, U-Net-NAS, conceived by Deci. + +To understand the role and significance of the the U-Net component, it’s +worth diving into the latent diffusion architecture: + +Latent diffusion starts with a rudimentary, “noisy” image representation +in latent space. With textual guidance, like “A drawing of a pint of +beer on a brick wall,” the model progressively refines this +representation, gradually unveiling a denoised image representation. +After sufficient iterations, this representation in latent space is +expanded into a high-resolution image. + +Latent diffusion comprises three primary components: + +- **Variational Autoencoder (VAE)**: Transforms images into latent + representations and vice versa. During training, the encoder converts + an image into a latent version, while the decoder reverses this + during both training and inference. + +- **U-Net**: An iterative encoder-decoder mechanism that introduces and + subsequently reduces noise in the latent images. The decoder employs + cross-attention layers, conditioning output on text embeddings linked + to the given text description. + +- **Text Encoder**: This component transforms textual prompts into + latent text embeddings, which the U-Net decoder uses. + +U-Net is a resource-intensive component during training and inference. +The repetitive noising and denoising processes incur substantial +computational costs at every iteration. + +.. figure:: https://deci.ai/wp-content/uploads/2023/09/U-Net-NAS-1024x632.png + :alt: unet-vs-unet-nas + + unet-vs-unet-nas + +U-Net-NAS features two fewer up and down blocks than U-Net. Its +distinctive feature is the variable composition of each block, where the +number of ResNet and Attention blocks is optimized to achieve the best +overall model performance using the fewest computations. With +DeciDiffusion’s incorporation of U-Net-NAS — characterized by fewer +parameters and enhanced computational efficiency — the overall model’s +computational demands are reduced. + +DeciDiffusion integration with Diffusers library +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To work with DeciDiffusion, we will use Hugging Face +`Diffusers `__ library. +DeciDiffusion the +`StableDiffusionPipeline `__ +with small customization: overriding default parameters and replacing +U-Net model. The code, defined in +``load_orginal_pytorch_pipeline_componets`` function, demonstrates how +to create diffusers pipeline for DeciDiffusion. + +.. code:: ipython3 + + from pathlib import Path + import gc + import torch + import openvino as ov + from diffusers import StableDiffusionPipeline + import warnings + + warnings.filterwarnings('ignore') + + TEXT_ENCODER_OV_PATH = Path("model/text_encoder.xml") + UNET_OV_PATH = Path('model/unet_nas.xml') + VAE_ENCODER_OV_PATH = Path("model/vae_encoder.xml") + VAE_DECODER_OV_PATH = Path('model/vae_decoder.xml') + checkpoint = "Deci/DeciDiffusion-v1-0" + scheduler_config_dir = Path("model/scheduler") + tokenizer_dir = Path("model/tokenizer") + + def load_orginal_pytorch_pipeline_componets(): + pipeline = StableDiffusionPipeline.from_pretrained(checkpoint, custom_pipeline=checkpoint, torch_dtype=torch.float32) + pipeline.unet = pipeline.unet.from_pretrained(checkpoint, subfolder='flexible_unet', torch_dtype=torch.float32) + text_encoder = pipeline.text_encoder + text_encoder.eval() + unet = pipeline.unet + unet.eval() + vae = pipeline.vae + vae.eval() + + del pipeline + gc.collect(); + return text_encoder, unet, vae + + + def cleanup_torchscript_cache(): + """ + Helper for removing cached model representation + """ + torch._C._jit_clear_class_registry() + torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() + torch.jit._state._clear_class_state() + + + skip_conversion = TEXT_ENCODER_OV_PATH.exists() and UNET_OV_PATH.exists() and VAE_ENCODER_OV_PATH.exists() and VAE_DECODER_OV_PATH.exists() + + if not skip_conversion: + text_encoder, unet, vae = load_orginal_pytorch_pipeline_componets() + else: + text_encoder, unet, vae = None, None, None + + +.. parsed-literal:: + + 2023-10-13 18:51:04.214433: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-13 18:51:04.252034: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-13 18:51:04.947207: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +Convert models to OpenVINO format +--------------------------------------------------------------------------- + +Starting from 2023.0 release, OpenVINO supports PyTorch models directly +via Model Conversion API. ``ov.convert_model`` function accepts instance +of PyTorch model and example inputs for tracing and returns object of +``ov.Model`` class, ready to use or save on disk using ``ov.save_model`` +function. + +As we already discussed above, the pipeline consists of three important +parts: + +- Text Encoder to create condition to generate an image from a text + prompt. +- U-Net-NAS for step-by-step denoising latent image representation. +- Autoencoder (VAE) for decoding latent space to image. + +Let us convert each part: + +Text Encoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The text-encoder is responsible for transforming the input prompt, for +example, “a photo of an astronaut riding a horse” into an embedding +space that can be understood by the U-Net. It is usually a simple +transformer-based encoder that maps a sequence of input tokens to a +sequence of latent text embeddings. + +Input of the text encoder is the tensor ``input_ids`` which contains +indexes of tokens from text processed by tokenizer and padded to maximum +length accepted by model. Model outputs are two tensors: +``last_hidden_state`` - hidden state from the last MultiHeadAttention +layer in the model and ``pooler_out`` - Pooled output for whole model +hidden states. + +.. code:: ipython3 + + def convert_encoder(text_encoder: torch.nn.Module, ir_path:Path): + """ + Convert Text Encoder mode. + Function accepts text encoder model, and prepares example inputs for conversion, + Parameters: + text_encoder (torch.nn.Module): text_encoder model from Stable Diffusion pipeline + ir_path (Path): File for storing model + Returns: + None + """ + input_ids = torch.ones((1, 77), dtype=torch.long) + # switch model to inference mode + text_encoder.eval() + + # disable gradients calculation for reducing memory consumption + with torch.no_grad(): + # Export model to IR format + ov_model = ov.convert_model(text_encoder, example_input=input_ids, input=[(1,77),]) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + gc.collect(); + print(f'Text Encoder successfully converted to IR and saved to {ir_path}') + + + if not TEXT_ENCODER_OV_PATH.exists(): + convert_encoder(text_encoder, TEXT_ENCODER_OV_PATH) + else: + print(f"Text encoder will be loaded from {TEXT_ENCODER_OV_PATH}") + + del text_encoder + gc.collect(); + + +.. parsed-literal:: + + Text encoder will be loaded from model/text_encoder.xml + + +U-Net NAS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +U-Net NAS model, similar to Stable Diffusion UNet model, has three +inputs: + +- ``sample`` - latent image sample from previous step. Generation + process has not been started yet, so you will use random noise. +- ``timestep`` - current scheduler step. +- ``encoder_hidden_state`` - hidden state of text encoder. + +Model predicts the ``sample`` state for the next step. + +.. code:: ipython3 + + import numpy as np + + dtype_mapping = { + torch.float32: ov.Type.f32, + torch.float64: ov.Type.f64 + } + + + def convert_unet(unet:torch.nn.Module, ir_path:Path): + """ + Convert U-net model to IR format. + Function accepts unet model, prepares example inputs for conversion, + Parameters: + unet (StableDiffusionPipeline): unet from Stable Diffusion pipeline + ir_path (Path): File for storing model + Returns: + None + """ + # prepare inputs + encoder_hidden_state = torch.ones((2, 77, 768)) + latents_shape = (2, 4, 512 // 8, 512 // 8) + latents = torch.randn(latents_shape) + t = torch.from_numpy(np.array(1, dtype=float)) + dummy_inputs = (latents, t, encoder_hidden_state) + input_info = [] + for i, input_tensor in enumerate(dummy_inputs): + shape = ov.PartialShape(tuple(input_tensor.shape)) + if i != 1: + shape[0] = -1 + element_type = dtype_mapping[input_tensor.dtype] + input_info.append((shape, element_type)) + + unet.eval() + with torch.no_grad(): + ov_model = ov.convert_model(unet, example_input=dummy_inputs, input=input_info) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + gc.collect(); + print(f'U-Net NAS successfully converted to IR and saved to {ir_path}') + + + if not UNET_OV_PATH.exists(): + convert_unet(unet, UNET_OV_PATH) + else: + print(f"U-Net NAS will be loaded from {UNET_OV_PATH}") + del unet + gc.collect(); + + +.. parsed-literal:: + + U-Net NAS will be loaded from model/unet_nas.xml + + +VAE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The VAE model has two parts, an encoder and a decoder. The encoder is +used to convert the image into a low dimensional latent representation, +which will serve as the input to the U-Net model. The decoder, +conversely, transforms the latent representation back into an image. + +During latent diffusion training, the encoder is used to get the latent +representations (latents) of the images for the forward diffusion +process, which applies more and more noise at each step. During +inference, the denoised latents generated by the reverse diffusion +process are converted back into images using the VAE decoder. When you +run inference for text-to-image, there is no initial image as a starting +point. You can skip this step and directly generate initial random +noise. + +As the encoder and the decoder are used independently in different parts +of the pipeline, it will be better to convert them to separate models. + +.. code:: ipython3 + + def convert_vae_encoder(vae: torch.nn.Module, ir_path: Path): + """ + Convert VAE model for encoding to IR format. + Function accepts vae model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, + Parameters: + vae (torch.nn.Module): VAE model from StableDiffusio pipeline + ir_path (Path): File for storing model + Returns: + None + """ + class VAEEncoderWrapper(torch.nn.Module): + def __init__(self, vae): + super().__init__() + self.vae = vae + + def forward(self, image): + return self.vae.encode(x=image)["latent_dist"].sample() + vae_encoder = VAEEncoderWrapper(vae) + vae_encoder.eval() + image = torch.zeros((1, 3, 512, 512)) + with torch.no_grad(): + ov_model = ov.convert_model(vae_encoder, example_input=image, input=[((1,3,512,512),)]) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + gc.collect(); + print(f'VAE encoder successfully converted to IR and saved to {ir_path}') + + + if not VAE_ENCODER_OV_PATH.exists(): + convert_vae_encoder(vae, VAE_ENCODER_OV_PATH) + else: + print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH}") + + + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path): + """ + Convert VAE model for decoding to IR format. + Function accepts vae model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, + Parameters: + vae (torch.nn.Module): VAE model frm StableDiffusion pipeline + ir_path (Path): File for storing model + Returns: + None + """ + class VAEDecoderWrapper(torch.nn.Module): + def __init__(self, vae): + super().__init__() + self.vae = vae + + def forward(self, latents): + return self.vae.decode(latents) + + vae_decoder = VAEDecoderWrapper(vae) + latents = torch.zeros((1, 4, 64, 64)) + + vae_decoder.eval() + with torch.no_grad(): + ov_model = ov.convert_model(vae_decoder, example_input=latents, input=[((1,4,64,64),)]) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + gc.collect(); + print(f'VAE decoder successfully converted to IR and saved to {ir_path}') + + + if not VAE_DECODER_OV_PATH.exists(): + convert_vae_decoder(vae, VAE_DECODER_OV_PATH) + else: + print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") + + del vae + gc.collect(); + + +.. parsed-literal:: + + VAE encoder will be loaded from model/vae_encoder.xml + VAE decoder will be loaded from model/vae_decoder.xml + + +Prepare inference pipeline +-------------------------------------------------------------------- + +Putting it all together, let us now take a closer look at how the model +works in inference by illustrating the logical flow. |sd-pipeline| + +As you can see from the diagram, the only difference between +Text-to-Image and text-guided Image-to-Image generation in approach is +how initial latent state is generated. In case of Image-to-Image +generation, you additionally have an image encoded by VAE encoder mixed +with the noise produced by using latent seed, while in Text-to-Image you +use only noise as initial latent state. The stable diffusion model takes +both a latent image representation of size :math:`64 \times 64` and a +text prompt is transformed to text embeddings of size +:math:`77 \times 768` via CLIP’s text encoder as an input. + +Next, the U-Net iteratively *denoises* the random latent image +representations while being conditioned on the text embeddings. The +output of the U-Net, being the noise residual, is used to compute a +denoised latent image representation via a scheduler algorithm. Many +different scheduler algorithms can be used for this computation, each +having its pros and cons. More information about supported schedulers +algorithms can be found in `diffusers +documentation `__. + +Theory on how the scheduler algorithm function works is out of scope for +this notebook. Nonetheless, in short, you should remember that you +compute the predicted denoised image representation from the previous +noise representation and the predicted noise residual. For more +information, refer to the recommended `Elucidating the Design Space of +Diffusion-Based Generative Models `__ + +The *denoising* process is repeated given number of times (by default 30 +for DeciDiffusion) to step-by-step retrieve better latent image +representations. When complete, the latent image representation is +decoded by the decoder part of the variational auto encoder. + +Guidance scale and negative prompt for controlling generation result. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Guidance scale controls how similar the generated image will be to the +prompt. A higher guidance scale means the model will try to generate an +image that follows the prompt more strictly. A lower guidance scale +means the model will have more creativity. guidance_scale is a way to +increase the adherence to the conditional signal that guides the +generation (text, in this case) as well as overall sample quality. It is +also known as `classifier-free +guidance `__. The default guidance +scale in DeciDiffusion is 0.7. + +Additionally, to improve image generation quality, model supports +negative prompting. Technically, positive prompt steers the diffusion +toward the images associated with it, while negative prompt steers the +diffusion away from it.In other words, negative prompt declares +undesired concepts for generation image, e.g. if we want to have +colorful and bright image, gray scale image will be result which we want +to avoid, in this case gray scale can be treated as negative prompt. The +positive and negative prompt are in equal footing. You can always use +one with or without the other. More explanation of how it works can be +found in this +`article `__. + +**Note**: negative prompting applicable only for high guidance scale (at +least > 1). + +Strength for controlling Image-to-Image generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the Image-to-Image mode, the strength parameter plays a crucial role. +It determines the level of noise that is added to the initial image +while generating a new one. By adjusting this parameter, you can achieve +better consistency with the original image and accomplish your creative +objectives. It gives you the flexibility to make small alterations or +lets you entirely transform the image. + +Working with the strength parameter is really straightforward, you only +need to remember how the extremes work: + +- setting strength close to 0 will produce an image nearly identical to + the original, + +- setting strength to 1 will produce an image that greatly differs from + the original. + +For optimal results - combining elements from the original image with +the concepts outlined in the prompt, it is best to aim for values +between 0.4 and 0.6. + +.. |sd-pipeline| image:: https://user-images.githubusercontent.com/29454499/260981188-c112dd0a-5752-4515-adca-8b09bea5d14a.png + +.. code:: ipython3 + + import inspect + from typing import List, Optional, Union, Dict + + import PIL + import cv2 + + from transformers import CLIPTokenizer + from diffusers.pipelines.pipeline_utils import DiffusionPipeline + from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler + from openvino.runtime import Model + + + def scale_fit_to_window(dst_width:int, dst_height:int, image_width:int, image_height:int): + """ + Preprocessing helper function for calculating image size for resize with peserving original aspect ratio + and fitting image to specific window size + + Parameters: + dst_width (int): destination window width + dst_height (int): destination window height + image_width (int): source image width + image_height (int): source image height + Returns: + result_width (int): calculated width for resize + result_height (int): calculated height for resize + """ + im_scale = min(dst_height / image_height, dst_width / image_width) + return int(im_scale * image_width), int(im_scale * image_height) + + + def preprocess(image: PIL.Image.Image): + """ + Image preprocessing function. Takes image in PIL.Image format, resizes it to keep aspect ration and fits to model input window 512x512, + then converts it to np.ndarray and adds padding with zeros on right or bottom side of image (depends from aspect ratio), after that + converts data to float32 data type and change range of values from [0, 255] to [-1, 1], finally, converts data layout from planar NHWC to NCHW. + The function returns preprocessed input tensor and padding size, which can be used in postprocessing. + + Parameters: + image (PIL.Image.Image): input image + Returns: + image (np.ndarray): preprocessed image tensor + meta (Dict): dictionary with preprocessing metadata info + """ + src_width, src_height = image.size + dst_width, dst_height = scale_fit_to_window(512, 512, src_width, src_height) + image = np.array(image.resize((dst_width, dst_height), + resample=PIL.Image.Resampling.LANCZOS))[None, :] + pad_width = 512 - dst_width + pad_height = 512 - dst_height + pad = ((0, 0), (0, pad_height), (0, pad_width), (0, 0)) + image = np.pad(image, pad, mode="constant") + image = image.astype(np.float32) / 255.0 + image = 2.0 * image - 1.0 + image = image.transpose(0, 3, 1, 2) + return image, {"padding": pad, "src_width": src_width, "src_height": src_height} + + + class OVStableDiffusionPipeline(DiffusionPipeline): + def __init__( + self, + vae_decoder: Model, + text_encoder: Model, + tokenizer: CLIPTokenizer, + unet: Model, + scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + vae_encoder: Model = None, + ): + """ + Pipeline for text-to-image generation using Stable Diffusion. + Parameters: + vae (Model): + Variational Auto-Encoder (VAE) Model to decode images to and from latent representations. + text_encoder (Model): + Frozen text-encoder. Stable Diffusion uses the text portion of + [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically + the clip-vit-large-patch14(https://huggingface.co/openai/clip-vit-large-patch14) variant. + tokenizer (CLIPTokenizer): + Tokenizer of class CLIPTokenizer(https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + unet (Model): Conditional U-Net architecture to denoise the encoded image latents. + scheduler (SchedulerMixin): + A scheduler to be used in combination with unet to denoise the encoded image latents. Can be one of + DDIMScheduler, LMSDiscreteScheduler, or PNDMScheduler. + """ + super().__init__() + self.scheduler = scheduler + self.vae_decoder = vae_decoder + self.vae_encoder = vae_encoder + self.text_encoder = text_encoder + self.unet = unet + self._text_encoder_output = text_encoder.output(0) + self._unet_output = unet.output(0) + self._vae_d_output = vae_decoder.output(0) + self._vae_e_output = vae_encoder.output(0) if vae_encoder is not None else None + self.height = 512 + self.width = 512 + self.tokenizer = tokenizer + + def __call__( + self, + prompt: Union[str, List[str]], + image: PIL.Image.Image = None, + num_inference_steps: Optional[int] = 30, + negative_prompt: Union[str, List[str]] = None, + guidance_scale: Optional[float] = 0.7, + eta: Optional[float] = 0.0, + output_type: Optional[str] = "pil", + seed: Optional[int] = None, + strength: float = 1.0, + gif: Optional[bool] = False, + **kwargs, + ): + """ + Function invoked when calling the pipeline for generation. + Parameters: + prompt (str or List[str]): + The prompt or prompts to guide the image generation. + image (PIL.Image.Image, *optional*, None): + Intinal image for generation. + num_inference_steps (int, *optional*, defaults to 30): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + negative_prompt (str or List[str]): + The negative prompt or prompts to guide the image generation. + guidance_scale (float, *optional*, defaults to 0.7): + Guidance scale as defined in Classifier-Free Diffusion Guidance(https://arxiv.org/abs/2207.12598). + guidance_scale is defined as `w` of equation 2. + Higher guidance scale encourages to generate images that are closely linked to the text prompt, + usually at the expense of lower image quality. + eta (float, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [DDIMScheduler], will be ignored for others. + output_type (`str`, *optional*, defaults to "pil"): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): PIL.Image.Image or np.array. + seed (int, *optional*, None): + Seed for random generator state initialization. + gif (bool, *optional*, False): + Flag for storing all steps results or not. + Returns: + Dictionary with keys: + sample - the last generated image PIL.Image.Image or np.array + iterations - *optional* (if gif=True) images for all diffusion steps, List of PIL.Image.Image or np.array. + """ + if seed is not None: + np.random.seed(seed) + + img_buffer = [] + do_classifier_free_guidance = guidance_scale > 1.0 + # get prompt text embeddings + text_embeddings = self._encode_prompt(prompt, do_classifier_free_guidance=do_classifier_free_guidance, negative_prompt=negative_prompt) + + # set timesteps + accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) + extra_set_kwargs = {} + if accepts_offset: + extra_set_kwargs["offset"] = 1 + + self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) + timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength) + latent_timestep = timesteps[:1] + + # get the initial random noise unless the user supplied it + latents, meta = self.prepare_latents(image, latent_timestep) + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + for i, t in enumerate(self.progress_bar(timesteps)): + # expand the latents if you are doing classifier free guidance + latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet([latent_model_input, t, text_embeddings])[self._unet_output] + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred[0], noise_pred[1] + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(torch.from_numpy(noise_pred), t, torch.from_numpy(latents), **extra_step_kwargs)["prev_sample"].numpy() + if gif: + image = self.vae_decoder(latents * (1 / 0.18215))[self._vae_d_output] + image = self.postprocess_image(image, meta, output_type) + img_buffer.extend(image) + + # scale and decode the image latents with vae + image = self.vae_decoder(latents * (1 / 0.18215))[self._vae_d_output] + + image = self.postprocess_image(image, meta, output_type) + return {"sample": image, 'iterations': img_buffer} + + def _encode_prompt(self, prompt:Union[str, List[str]], num_images_per_prompt:int = 1, do_classifier_free_guidance:bool = True, negative_prompt:Union[str, List[str]] = None): + """ + Encodes the prompt into text encoder hidden states. + + Parameters: + prompt (str or list(str)): prompt to be encoded + num_images_per_prompt (int): number of images that should be generated per prompt + do_classifier_free_guidance (bool): whether to use classifier free guidance or not + negative_prompt (str or list(str)): negative prompt to be encoded + Returns: + text_embeddings (np.ndarray): text encoder hidden states + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + # tokenize input prompts + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="np", + ) + text_input_ids = text_inputs.input_ids + + text_embeddings = self.text_encoder( + text_input_ids)[self._text_encoder_output] + + # duplicate text embeddings for each generation per prompt + if num_images_per_prompt != 1: + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = np.tile( + text_embeddings, (1, num_images_per_prompt, 1)) + text_embeddings = np.reshape( + text_embeddings, (bs_embed * num_images_per_prompt, seq_len, -1)) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + max_length = text_input_ids.shape[-1] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + else: + uncond_tokens = negative_prompt + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="np", + ) + + uncond_embeddings = self.text_encoder(uncond_input.input_ids)[self._text_encoder_output] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = np.tile(uncond_embeddings, (1, num_images_per_prompt, 1)) + uncond_embeddings = np.reshape(uncond_embeddings, (batch_size * num_images_per_prompt, seq_len, -1)) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) + + return text_embeddings + + + def prepare_latents(self, image:PIL.Image.Image = None, latent_timestep:torch.Tensor = None): + """ + Function for getting initial latents for starting generation + + Parameters: + image (PIL.Image.Image, *optional*, None): + Input image for generation, if not provided randon noise will be used as starting point + latent_timestep (torch.Tensor, *optional*, None): + Predicted by scheduler initial step for image generation, required for latent image mixing with nosie + Returns: + latents (np.ndarray): + Image encoded in latent space + """ + latents_shape = (1, 4, self.height // 8, self.width // 8) + noise = np.random.randn(*latents_shape).astype(np.float32) + if image is None: + # if you use LMSDiscreteScheduler, let's make sure latents are multiplied by sigmas + if isinstance(self.scheduler, LMSDiscreteScheduler): + noise = noise * self.scheduler.sigmas[0].numpy() + return noise, {} + input_image, meta = preprocess(image) + latents = self.vae_encoder(input_image)[self._vae_e_output] * 0.18215 + latents = self.scheduler.add_noise(torch.from_numpy(latents), torch.from_numpy(noise), latent_timestep).numpy() + return latents, meta + + def postprocess_image(self, image:np.ndarray, meta:Dict, output_type:str = "pil"): + """ + Postprocessing for decoded image. Takes generated image decoded by VAE decoder, unpad it to initila image size (if required), + normalize and convert to [0, 255] pixels range. Optionally, convertes it from np.ndarray to PIL.Image format + + Parameters: + image (np.ndarray): + Generated image + meta (Dict): + Metadata obtained on latents preparing step, can be empty + output_type (str, *optional*, pil): + Output format for result, can be pil or numpy + Returns: + image (List of np.ndarray or PIL.Image.Image): + Postprocessed images + """ + if "padding" in meta: + pad = meta["padding"] + (_, end_h), (_, end_w) = pad[1:3] + h, w = image.shape[2:] + unpad_h = h - end_h + unpad_w = w - end_w + image = image[:, :, :unpad_h, :unpad_w] + image = np.clip(image / 2 + 0.5, 0, 1) + image = np.transpose(image, (0, 2, 3, 1)) + # 9. Convert to PIL + if output_type == "pil": + image = self.numpy_to_pil(image) + if "src_height" in meta: + orig_height, orig_width = meta["src_height"], meta["src_width"] + image = [img.resize((orig_width, orig_height), + PIL.Image.Resampling.LANCZOS) for img in image] + else: + if "src_height" in meta: + orig_height, orig_width = meta["src_height"], meta["src_width"] + image = [cv2.resize(img, (orig_width, orig_width)) + for img in image] + return image + + def get_timesteps(self, num_inference_steps:int, strength:float): + """ + Helper function for getting scheduler timesteps for generation + In case of image-to-image generation, it updates number of steps according to strength + + Parameters: + num_inference_steps (int): + number of inference steps for generation + strength (float): + value between 0.0 and 1.0, that controls the amount of noise that is added to the input image. + Values that approach 1.0 enable lots of variations but will also produce images that are not semantically consistent with the input. + """ + # get the original timestep using init_timestep + init_timestep = min(int(num_inference_steps * strength), num_inference_steps) + + t_start = max(num_inference_steps - init_timestep, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps, num_inference_steps - t_start + +Configure Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + core = ov.Core() + +First, you should create instances of OpenVINO Model and compile it +using selected device. Select device from dropdown list for running +inference using OpenVINO. + +.. code:: ipython3 + + import ipywidgets as widgets + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='CPU', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') + + + +.. code:: ipython3 + + text_enc = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) + +.. code:: ipython3 + + unet_model = core.compile_model(UNET_OV_PATH, device.value) + +.. code:: ipython3 + + ov_vae_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + + vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_vae_config) + vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value, ov_vae_config) + +Model tokenizer and scheduler are also important parts of the pipeline. +Let us define them and put all components together + +.. code:: ipython3 + + from transformers import AutoTokenizer + from diffusers import DDIMScheduler + + if not tokenizer_dir.exists(): + tokenizer = AutoTokenizer.from_pretrained(checkpoint, subfolder='tokenizer') + tokenizer.save_pretrained(tokenizer_dir) + else: + tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir) + + if not scheduler_config_dir.exists(): + scheduler = DDIMScheduler.from_pretrained(checkpoint, subfolder="scheduler") + scheduler.save_pretrained(scheduler_config_dir) + else: + scheduler = DDIMScheduler.from_pretrained(scheduler_config_dir) + + ov_pipe = OVStableDiffusionPipeline( + tokenizer=tokenizer, + text_encoder=text_enc, + unet=unet_model, + vae_encoder=vae_encoder, + vae_decoder=vae_decoder, + scheduler=scheduler + ) + + +.. parsed-literal:: + + Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + + +Text-to-Image generation +------------------------------------------------------------------ + +Now, let’s see model in action + +.. code:: ipython3 + + text_prompt = 'Highly detailed portrait of a small, adorable cat with round, expressive eyes and a friendly smile' + num_steps = 30 + seed = 4217 + +.. code:: ipython3 + + print('Pipeline settings') + print(f'Input text: {text_prompt}') + print(f'Seed: {seed}') + print(f'Number of steps: {num_steps}') + + +.. parsed-literal:: + + Pipeline settings + Input text: Highly detailed portrait of a small, adorable cat with round, expressive eyes and a friendly smile + Seed: 4217 + Number of steps: 30 + + +.. code:: ipython3 + + result = ov_pipe(text_prompt, num_inference_steps=num_steps, seed=seed) + + + +.. parsed-literal:: + + 0%| | 0/30 [00:00 +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/259-decidiffusion-image-generation-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/259-decidiffusion-image-generation-with-output_files/


../
+259-decidiffusion-image-generation-with-output_..> 31-Oct-2023 00:35               39088
+259-decidiffusion-image-generation-with-output_..> 31-Oct-2023 00:35              442923
+259-decidiffusion-image-generation-with-output_..> 31-Oct-2023 00:35               15736
+259-decidiffusion-image-generation-with-output_..> 31-Oct-2023 00:35               61144
+259-decidiffusion-image-generation-with-output_..> 31-Oct-2023 00:35                9338
+259-decidiffusion-image-generation-with-output_..> 31-Oct-2023 00:35              115778
+

+ diff --git a/docs/notebooks/260-pix2struct-docvqa-with-output.rst b/docs/notebooks/260-pix2struct-docvqa-with-output.rst new file mode 100644 index 00000000000000..23bc19c8c13535 --- /dev/null +++ b/docs/notebooks/260-pix2struct-docvqa-with-output.rst @@ -0,0 +1,319 @@ +Document Visual Question Answering Using Pix2Struct and OpenVINO™ +================================================================= + +DocVQA (Document Visual Question Answering) is a research field in +computer vision and natural language processing that focuses on +developing algorithms to answer questions related to the content of a +document represented in image format, like a scanned document, +screenshots, or an image of a text document. Unlike other types of +visual question answering, where the focus is on answering questions +related to images or videos, DocVQA is focused on understanding and +answering questions based on the text and layout of a document. The +questions can be about any aspect of the document text. DocVQA requires +understanding the document’s visual content and the ability to read and +comprehend the text in it. + +DocVQA offers several benefits compared to OCR (Optical Character +Recognition) technology: \* Firstly, DocVQA can not only recognize and +extract text from a document, but it can also understand the context in +which the text appears. This means it can answer questions about the +document’s content rather than simply provide a digital version. \* +Secondly, DocVQA can handle documents with complex layouts and +structures, like tables and diagrams, which can be challenging for +traditional OCR systems. \* Finally, DocVQA can automate many +document-based workflows, like document routing and approval processes, +to make employees focus on more meaningful work. The potential +applications of DocVQA include automating tasks like information +retrieval, document analysis, and document summarization. + +`Pix2Struct `__ is a multimodal +model for understanding visually situated language that easily copes +with extracting information from images. The model is trained using the +novel learning technique to parse masked screenshots of web pages into +simplified HTML, providing a significantly well-suited pretraining data +source for the range of downstream activities such as OCR, visual +question answering, and image captioning. + +In this tutorial, we consider how to run the Pix2Struct model using +OpenVINO for solving document visual question answering task. We will +use a pre-trained model from the `Hugging Face +Transformers `__ +library. To simplify the user experience, the `Hugging Face +Optimum `__ library is used to +convert the model to OpenVINO™ IR format. + +**Table of contents:** + + +- `About Pix2Struct <#about-pixstruct>`__ +- `Prerequisites <#prerequisites>`__ +- `Download and Convert + Model <#download-and-convert-model>`__ +- `Select inference device <#select-inference-device>`__ +- `Test model inference <#test-model-inference>`__ +- `Interactive demo <#interactive-demo>`__ + +About Pix2Struct +---------------------------------------------------------- + +Pix2Struct is an image encoder - text decoder model that is trained on +image-text pairs for various tasks, including image captioning and +visual question answering. The model combines the simplicity of purely +pixel-level inputs with the generality and scalability provided by +self-supervised pretraining from diverse and abundant web data. The +model does this by recommending a screenshot parsing objective that +needs predicting an HTML-based parse from a screenshot of a web page +that has been partially masked. With the diversity and complexity of +textual and visual elements found on the web, Pix2Struct learns rich +representations of the underlying structure of web pages, which can +effectively transfer to various downstream visual language understanding +tasks. + +Pix2Struct is based on the Vision Transformer (ViT), an +image-encoder-text-decoder model with changes in input representation to +make the model more robust to processing images with various aspect +ratios. Standard ViT extracts fixed-size patches after scaling input +images to a predetermined resolution. This distorts the proper aspect +ratio of the image, which can be highly variable for documents, mobile +UIs, and figures. Pix2Struct proposes to scale the input image up or +down to extract the maximum number of patches that fit within the given +sequence length. This approach is more robust to extreme aspect ratios, +common in the domains Pix2Struct experiments with. Additionally, the +model can handle on-the-fly changes to the sequence length and +resolution. To handle variable resolutions unambiguously, 2-dimensional +absolute positional embeddings are used for the input patches. + +Prerequisites +------------------------------------------------------- + +First, we need to install the `Hugging Face +Optimum `__ library +accelerated by OpenVINO integration. The Hugging Face Optimum API is a +high-level API that enables us to convert and quantize models from the +Hugging Face Transformers library to the OpenVINO™ IR format. For more +details, refer to the `Hugging Face Optimum +documentation `__. + +.. code:: ipython3 + + %pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" "openvino>=2023.1.0" transformers onnx gradio + +Download and Convert Model +-------------------------------------------------------------------- + +Optimum Intel can be used to load optimized models from the `Hugging +Face Hub `__ and +create pipelines to run an inference with OpenVINO Runtime using Hugging +Face APIs. The Optimum Inference models are API compatible with Hugging +Face Transformers models. This means we just need to replace the +``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` +class. + +Model class initialization starts with calling the ``from_pretrained`` +method. When downloading and converting the Transformers model, the +parameter ``export=True`` should be added. We can save the converted +model for the next usage with the ``save_pretrained`` method. After +model saving using the ``save_pretrained`` method, you can load your +converted model without the ``export`` parameter, avoiding model +conversion for the next time. For reducing memory consumption, we can +compress model to float16 using ``half()`` method. + +In this tutorial, we separate model export and loading for a +demonstration of how to work with the model in both modes. We will use +the +`pix2struct-docvqa-base `__ +model as an example in this tutorial, but the same steps for running are +applicable for other models from pix2struct family. + +.. code:: ipython3 + + import gc + from pathlib import Path + from optimum.intel.openvino import OVModelForPix2Struct + + model_id = "google/pix2struct-docvqa-base" + model_dir = Path(model_id.split('/')[-1]) + + if not model_dir.exists(): + ov_model = OVModelForPix2Struct.from_pretrained(model_id, export=True, compile=False) + ov_model.half() + ov_model.save_pretrained(model_dir) + del ov_model + gc.collect(); + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' + 2023-10-20 13:49:09.525682: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-20 13:49:09.565139: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-20 13:49:10.397504: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations + warnings.warn( + + +Select inference device +----------------------------------------------------------------- + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + import openvino as ov + + core = ov.Core() + + device = widgets.Dropdown( + options=[d for d in core.available_devices if "GPU" not in d] + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Test model inference +-------------------------------------------------------------- + +The diagram below demonstrates how the model works: +|pix2struct_diagram.png| + +For running model inference we should preprocess data first. +``Pix2StructProcessor`` is responsible for preparing input data and +decoding output for the original PyTorch model and easily can be reused +for running with the Optimum Intel model. Then +``OVModelForPix2Struct.generate`` method will launch answer generation. +Finally, generated answer token indices should be decoded in text format +by ``Pix2StructProcessor.decode`` + +.. |pix2struct_diagram.png| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/c7456b17-0687-4aa9-851b-267bff3dac79 + +.. code:: ipython3 + + from transformers import Pix2StructProcessor + + processor = Pix2StructProcessor.from_pretrained(model_id) + ov_model = OVModelForPix2Struct.from_pretrained(model_dir, device=device.value) + + +.. parsed-literal:: + + Compiling the encoder to AUTO ... + Compiling the decoder to AUTO ... + Compiling the decoder to AUTO ... + + +Let’s see the model in action. For testing the model, we will use a +screenshot from `OpenVINO +documentation `__ + +.. code:: ipython3 + + import requests + from PIL import Image + from io import BytesIO + + + def load_image(image_file): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + return image + + test_image_url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/aa46ef0c-c14d-4bab-8bb7-3b22fe73f6bc" + + image = load_image(test_image_url) + text = "What performance hints do?" + + inputs = processor(images=image, text=text, return_tensors="pt") + display(image) + + + +.. image:: 260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.png + + +.. code:: ipython3 + + answer_tokens = ov_model.generate(**inputs) + answer = processor.decode(answer_tokens[0], skip_special_tokens=True) + print(f"Question: {text}") + print(f"Answer: {answer}") + + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_seq2seq.py:395: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + last_hidden_state = torch.from_numpy(self.request(inputs, shared_memory=True)["last_hidden_state"]).to( + /home/ea/work/ov_venv/lib/python3.8/site-packages/transformers/generation/utils.py:1260: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation. + warnings.warn( + /home/ea/work/ov_venv/lib/python3.8/site-packages/optimum/intel/openvino/modeling_seq2seq.py:476: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly. + self.request.start_async(inputs, shared_memory=True) + + +.. parsed-literal:: + + Question: What performance hints do? + Answer: automatically adjust runtime parameters to prioritize for low latency or high throughput + + +Interactive demo +---------------------------------------------------------- + +.. code:: ipython3 + + import gradio as gr + + example_images_urls = [ + "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/94ef687c-aebb-452b-93fe-c7f29ce19503", + "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/70b2271c-9295-493b-8a5c-2f2027dcb653", + "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/1e2be134-0d45-4878-8e6c-08cfc9c8ea3d" + ] + + file_names = ["eiffel_tower.png", "exsibition.jpeg", "population_table.jpeg"] + + for img_url, image_file in zip(example_images_urls, file_names): + load_image(img_url).save(image_file) + + questions = ["What is Eiffel tower tall?", "When is the coffee break?", "What the population of Stoddard?"] + + examples = [list(pair) for pair in zip(file_names, questions)] + + def generate(img, question): + inputs = processor(images=img, text=question, return_tensors="pt") + predictions = ov_model.generate(**inputs, max_new_tokens=256) + return processor.decode(predictions[0], skip_special_tokens=True) + + demo = gr.Interface( + fn=generate, + inputs=["image", "text"], + outputs="text", + title="Pix2Struct for DocVQA", + examples=examples, + cache_examples=False, + allow_flagging="never", + ) + + try: + demo.queue().launch(debug=False) + except Exception: + demo.queue().launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.jpg b/docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.jpg new file mode 100644 index 00000000000000..6e51fa5a49d9ac --- /dev/null +++ b/docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25269a3a1e49d96f9c95144211ee2614ca41782cc870f86fd758c4b9fb5d6a5 +size 134092 diff --git a/docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.png b/docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.png new file mode 100644 index 00000000000000..2ed53c36850f65 --- /dev/null +++ b/docs/notebooks/260-pix2struct-docvqa-with-output_files/260-pix2struct-docvqa-with-output_11_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9fa291a981591490c2e4b97a709e750c1eb7fc734dd8645888bdb97ccb935a6 +size 221889 diff --git a/docs/notebooks/260-pix2struct-docvqa-with-output_files/index.html b/docs/notebooks/260-pix2struct-docvqa-with-output_files/index.html new file mode 100644 index 00000000000000..913b17d8c6d663 --- /dev/null +++ b/docs/notebooks/260-pix2struct-docvqa-with-output_files/index.html @@ -0,0 +1,8 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/260-pix2struct-docvqa-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/260-pix2struct-docvqa-with-output_files/


../
+260-pix2struct-docvqa-with-output_11_0.jpg         31-Oct-2023 00:35              134092
+260-pix2struct-docvqa-with-output_11_0.png         31-Oct-2023 00:35              221889
+

+ diff --git a/docs/notebooks/261-fast-segment-anything-with-output.rst b/docs/notebooks/261-fast-segment-anything-with-output.rst new file mode 100644 index 00000000000000..2e65fb6ae8504e --- /dev/null +++ b/docs/notebooks/261-fast-segment-anything-with-output.rst @@ -0,0 +1,598 @@ +Object segmentations with FastSAM and OpenVINO +============================================== + +`The Fast Segment Anything Model +(FastSAM) `__ is a +real-time CNN-based model that can segment any object within an image +based on various user prompts. ``Segment Anything`` task is designed to +make vision tasks easier by providing an efficient way to identify +objects in an image. FastSAM significantly reduces computational demands +while maintaining competitive performance, making it a practical choice +for a variety of vision tasks. + +FastSAM is a model that aims to overcome the limitations of the `Segment +Anything Model (SAM) `__, +which is a Transformer model that requires significant computational +resources. FastSAM tackles the segment anything task by dividing it into +two consecutive stages: all-instance segmentation and prompt-guided +selection. + +In the first stage, +`YOLOv8-seg `__ is used +to produce segmentation masks for all instances in the image. In the +second stage, FastSAM outputs the region-of-interest corresponding to +the prompt. + +.. figure:: https://user-images.githubusercontent.com/26833433/248551984-d98f0f6d-7535-45d0-b380-2e1440b52ad7.jpg + :alt: pipeline + + pipeline + + +**Table of contents:** +--- + +- `Requirements and Imports <#prerequisites>`__ +- `Original Pipeline Inference <#fastsam-in-ultralytics>`__ +- `Converting the Model to OpenVINO IR <#convert-the-model-to-openvino-intermediate-representation-ir-format>`__ +- `Embedding the Converted Models into the Pipeline <#embedding-the-converted-models-into-the-original-pipeline>`__ +- `Run Gradio App <#try-out-the-converted-pipeline>`__ + +Prerequisites +------------- + +Install requirements +~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + %pip install -q "ultralytics==8.0.200" onnx + %pip install -q "openvino-dev>=2023.1.0" + %pip install -q gradio + + +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + + +Imports +~~~~~~~ + +.. code:: ipython3 + + from pathlib import Path + + import openvino as ov + import torch + from PIL import Image, ImageDraw + from ultralytics import FastSAM + +FastSAM in Ultralytics +---------------------- + +To work with `Fast Segment Anything +Model `__ by +``CASIA-IVA-Lab``, we will use the `Ultralytics +package `__. Ultralytics package exposes +the ``FastSAM`` class, simplifying the model instantiation and weights +loading. The code below demonstrates how to initialize a ``FastSAM`` +model and generate a segmentation map. + +.. code:: ipython3 + + model_name = "FastSAM-x" + model = FastSAM(model_name) + + # Run inference on an image + image_uri = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg" + results = model(image_uri, device="cpu", retina_masks=True, imgsz=1024, conf=0.6, iou=0.9) + + +.. parsed-literal:: + + Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/FastSAM-x.pt to 'FastSAM-x.pt'... + + + +.. parsed-literal:: + + 0%| | 0.00/138M [00:00`__ +class provides access to the OpenVINO Runtime API. The ``core`` object, +which is an instance of the ``Core`` class represents the API and it is +used to compile the model. + +.. code:: ipython3 + + core = ov.Core() + +Select inference device +^^^^^^^^^^^^^^^^^^^^^^^ + +Select device that will be used to do models inference using OpenVINO +from the dropdown list: + +.. code:: ipython3 + + import ipywidgets as widgets + + DEVICE = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value="AUTO", + description="Device:", + disabled=False, + ) + + DEVICE + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Adapt OpenVINO models to the original pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here we create wrapper classes for the OpenVINO model that we want to +embed in the original inference pipeline. Here are some of the things to +consider when adapting an OV model: + +- Make sure that parameters passed + by the original pipeline are forwarded to the compiled OV model + properly; sometimes the OV model uses only a portion of the input + arguments and some are ignored, sometimes you need to convert the + argument to another data type or unwrap some data structures such as + tuples or dictionaries. +- Guarantee that the wrapper class returns + results to the pipeline in an expected format. In the example below you + can see how we pack OV model outputs into a tuple of ``torch`` tensors. +- Pay attention to the model method used in the original pipeline for + calling the model - it may be not the ``forward`` method! In this + example, the model is a part of a ``predictor`` object and called as and + object, so we need to redefine the magic ``__call__`` method. + +.. code:: ipython3 + + class OVWrapper: + def __init__(self, ov_model, device="CPU", stride=32) -> None: + self.model = core.compile_model(ov_model, device_name=device) + + self.stride = stride + self.pt = True + self.fp16 = False + self.names = {0: "object"} + + def __call__(self, im, **_): + result = self.model(im) + return torch.from_numpy(result[0]), torch.from_numpy(result[1]) + +Now we initialize the wrapper objects and load them to the FastSAM +pipeline. + +.. code:: ipython3 + + wrapped_model = OVWrapper(ov_model_path, device=DEVICE.value, stride=model.predictor.model.stride) + model.predictor.model = wrapped_model + + ov_results = model(image_uri, device=DEVICE.value, retina_masks=True, imgsz=640, conf=0.6, iou=0.9) + + +.. parsed-literal:: + + + Found https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg locally at coco_bike.jpg + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/261-fast-segment-anything/coco_bike.jpg: 480x640 33 objects, 356.4ms + Speed: 3.7ms preprocess, 356.4ms inference, 16.1ms postprocess per image at shape (1, 3, 480, 640) + + +One can observe the converted model outputs in the next cell, they is +the same as of the original model. + +.. code:: ipython3 + + Image.fromarray(ov_results[0].plot()[..., ::-1]) + + + + +.. image:: 261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.png + + + +Try out the converted pipeline +------------------------------ + +The demo app below is created using `Gradio +package `__. + +The app allows you to alter the model output interactively. Using the +Pixel selector type switch you can place foreground/background points or +bounding boxes on input image. + +.. code:: ipython3 + + import cv2 + import numpy as np + import matplotlib.pyplot as plt + + def fast_process( + annotations, + image, + scale, + better_quality=False, + mask_random_color=True, + bbox=None, + use_retina=True, + with_contours=True, + ): + + original_h = image.height + original_w = image.width + + if better_quality: + for i, mask in enumerate(annotations): + mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8)) + annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8)) + # device is CPU + + inner_mask = fast_show_mask( + annotations, + plt.gca(), + random_color=mask_random_color, + bbox=bbox, + retinamask=use_retina, + target_height=original_h, + target_width=original_w, + ) + + if with_contours: + contour_all = [] + temp = np.zeros((original_h, original_w, 1)) + for i, mask in enumerate(annotations): + annotation = mask.astype(np.uint8) + if not use_retina: + annotation = cv2.resize( + annotation, + (original_w, original_h), + interpolation=cv2.INTER_NEAREST, + ) + contours, _ = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + for contour in contours: + contour_all.append(contour) + cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2 // scale) + color = np.array([0 / 255, 0 / 255, 255 / 255, 0.9]) + contour_mask = temp / 255 * color.reshape(1, 1, -1) + + image = image.convert("RGBA") + overlay_inner = Image.fromarray((inner_mask * 255).astype(np.uint8), "RGBA") + image.paste(overlay_inner, (0, 0), overlay_inner) + + if with_contours: + overlay_contour = Image.fromarray((contour_mask * 255).astype(np.uint8), "RGBA") + image.paste(overlay_contour, (0, 0), overlay_contour) + + return image + + + # CPU post process + def fast_show_mask( + annotation, + ax, + random_color=False, + bbox=None, + retinamask=True, + target_height=960, + target_width=960, + ): + mask_sum = annotation.shape[0] + height = annotation.shape[1] + weight = annotation.shape[2] + # + areas = np.sum(annotation, axis=(1, 2)) + sorted_indices = np.argsort(areas)[::1] + annotation = annotation[sorted_indices] + + index = (annotation != 0).argmax(axis=0) + if random_color: + color = np.random.random((mask_sum, 1, 1, 3)) + else: + color = np.ones((mask_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255]) + transparency = np.ones((mask_sum, 1, 1, 1)) * 0.6 + visual = np.concatenate([color, transparency], axis=-1) + mask_image = np.expand_dims(annotation, -1) * visual + + mask = np.zeros((height, weight, 4)) + + h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing="ij") + indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None)) + + mask[h_indices, w_indices, :] = mask_image[indices] + if bbox is not None: + x1, y1, x2, y2 = bbox + ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor="b", linewidth=1)) + + if not retinamask: + mask = cv2.resize(mask, (target_width, target_height), interpolation=cv2.INTER_NEAREST) + + return mask + +.. code:: ipython3 + + import gradio as gr + + examples = [[image_uri], ["https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/empty_road_mapillary.jpg"], + ["https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/wall.jpg"]] + + object_points = [] + background_points = [] + bbox_points = [] + last_image = examples[0][0] + +This is the main callback function that is called to segment an image +based on user input. + +.. code:: ipython3 + + def segment( + image, + input_size=1024, + iou_threshold=0.75, + conf_threshold=0.4, + better_quality=True, + with_contours=True, + use_retina=True, + mask_random_color=True, + ): + input_size = int(input_size) + w, h = image.size + scale = input_size / max(w, h) + new_w = int(w * scale) + new_h = int(h * scale) + image = image.resize((new_w, new_h)) + + results = model(image, + device=DEVICE.value, + retina_masks=use_retina, + iou=iou_threshold, + conf=conf_threshold, + imgsz=input_size,) + + masks = results[0].masks.data + # Calculate annotations + if not (object_points or bbox_points): + annotations = masks.cpu().numpy() + else: + annotations = [] + + if object_points: + all_points = object_points + background_points + labels = [1] * len(object_points) + [0] * len(background_points) + scaled_points = [[int(x * scale) for x in point] for point in all_points] + h, w = masks[0].shape[:2] + assert max(h, w) == input_size + onemask = np.zeros((h, w)) + for mask in sorted(masks, key=lambda x: x.sum(), reverse=True): + mask_np = (mask == 1.0).cpu().numpy() + for point, label in zip(scaled_points, labels): + if mask_np[point[1], point[0]] == 1 and label == 1: + onemask[mask_np] = 1 + if mask_np[point[1], point[0]] == 1 and label == 0: + onemask[mask_np] = 0 + annotations.append(onemask >= 1) + if len(bbox_points) >= 2: + scaled_bbox_points = [] + for i, point in enumerate(bbox_points): + x, y = int(point[0] * scale), int(point[1] * scale) + x = max(min(x, new_w), 0) + y = max(min(y, new_h), 0) + scaled_bbox_points.append((x, y)) + + for i in range(0, len(scaled_bbox_points) - 1, 2): + x0, y0, x1, y1 = *scaled_bbox_points[i], *scaled_bbox_points[i + 1] + + intersection_area = torch.sum(masks[:, y0:y1, x0:x1], dim=(1, 2)) + masks_area = torch.sum(masks, dim=(1, 2)) + bbox_area = (y1 - y0) * (x1 - x0) + + union = bbox_area + masks_area - intersection_area + iou = intersection_area / union + max_iou_index = torch.argmax(iou) + + annotations.append(masks[max_iou_index].cpu().numpy()) + + return fast_process( + annotations=np.array(annotations), + image=image, + scale=(1024 // input_size), + better_quality=better_quality, + mask_random_color=mask_random_color, + bbox=None, + use_retina=use_retina, + with_contours=with_contours + ) + +.. code:: ipython3 + + def select_point(img: Image.Image, point_type: str, evt: gr.SelectData) -> Image.Image: + """Gradio select callback.""" + img = img.convert("RGBA") + x, y = evt.index[0], evt.index[1] + point_radius = np.round(max(img.size) / 100) + if point_type == "Object point": + object_points.append((x, y)) + color = (30, 255, 30, 200) + elif point_type == "Background point": + background_points.append((x, y)) + color = (255, 30, 30, 200) + elif point_type == "Bounding Box": + bbox_points.append((x, y)) + color = (10, 10, 255, 255) + if len(bbox_points) % 2 == 0: + # Draw a rectangle if number of points is even + new_img = Image.new("RGBA", img.size, (255, 255, 255, 0)) + _draw = ImageDraw.Draw(new_img) + x0, y0, x1, y1 = *bbox_points[-2], *bbox_points[-1] + x0, x1 = sorted([x0, x1]) + y0, y1 = sorted([y0, y1]) + # Save sorted order + bbox_points[-2] = (x0, y0) + bbox_points[-1] = (x1, y1) + _draw.rectangle((x0, y0, x1, y1), fill=(*color[:-1], 90)) + img = Image.alpha_composite(img, new_img) + # Draw a point + ImageDraw.Draw(img).ellipse( + [(x - point_radius, y - point_radius), (x + point_radius, y + point_radius)], + fill=color + ) + return img + + def clear_points() -> (Image.Image, None): + """Gradio clear points callback.""" + global object_points, background_points, bbox_points + # global object_points; global background_points; global bbox_points + object_points = [] + background_points = [] + bbox_points = [] + return last_image, None + + def save_last_picked_image(img: Image.Image) -> None: + """Gradio callback saves the last used image.""" + global last_image + last_image = img + # If we change the input image + # we should clear all the previous points + clear_points() + # Removes the segmentation map output + return None + + with gr.Blocks(title="Fast SAM") as demo: + with gr.Row(variant="panel"): + original_img = gr.Image(label="Input", value=examples[0][0], type="pil") + segmented_img = gr.Image(label="Segmentation Map", type="pil") + point_type = gr.Radio( + ["Object point", "Background point", "Bounding Box"], + value="Object point", label="Pixel selector type" + ) + with gr.Row(variant="panel"): + segment_button = gr.Button("Segment", variant="primary") + clear_button = gr.Button("Clear points", variant="secondary") + gr.Examples(examples, inputs=original_img, + fn=save_last_picked_image, run_on_click=True, outputs=segmented_img + ) + + # Callbacks + original_img.select(select_point, + inputs=[original_img, point_type], + outputs=original_img) + original_img.upload(save_last_picked_image, inputs=original_img, outputs=segmented_img) + clear_button.click(clear_points, outputs=[original_img, segmented_img]) + segment_button.click(segment, inputs=[original_img,], outputs=segmented_img) + + try: + demo.queue().launch(debug=False) + except Exception: + demo.queue().launch(share=True, debug=False) + + # If you are launching remotely, specify server_name and server_port + # EXAMPLE: `demo.launch(server_name="your server name", server_port="server port in int")` + # To learn more please refer to the Gradio docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + +.. .. raw:: html + +..
+ diff --git a/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.jpg b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.jpg new file mode 100644 index 00000000000000..f6931e77723557 --- /dev/null +++ b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56bd19004dc2468776185f901b0dc89f27cf90eac5a7acef461e2b29f3331db8 +size 116049 diff --git a/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.png b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.png new file mode 100644 index 00000000000000..30dcbc67ab339a --- /dev/null +++ b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_21_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc47ecebeff261010c81f3ddf86d22115cce68f46856a50ab293651ae766fe83 +size 824318 diff --git a/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.jpg b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.jpg new file mode 100644 index 00000000000000..46dc8da04fe240 --- /dev/null +++ b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4453972c42c553474934c5d9379d6634c66d53cfb6313e07a52fde374109722b +size 117489 diff --git a/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.png b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.png new file mode 100644 index 00000000000000..2a3e8e595c18ae --- /dev/null +++ b/docs/notebooks/261-fast-segment-anything-with-output_files/261-fast-segment-anything-with-output_9_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5125b6854db0cd1568ce74eb572a19dd58f7d50861bed81d2db6200279f5a3 +size 815077 diff --git a/docs/notebooks/261-fast-segment-anything-with-output_files/index.html b/docs/notebooks/261-fast-segment-anything-with-output_files/index.html new file mode 100644 index 00000000000000..0805c8173503d2 --- /dev/null +++ b/docs/notebooks/261-fast-segment-anything-with-output_files/index.html @@ -0,0 +1,10 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/261-fast-segment-anything-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/261-fast-segment-anything-with-output_files/


../
+261-fast-segment-anything-with-output_21_0.jpg     31-Oct-2023 00:35              116049
+261-fast-segment-anything-with-output_21_0.png     31-Oct-2023 00:35              824318
+261-fast-segment-anything-with-output_9_0.jpg      31-Oct-2023 00:35              117489
+261-fast-segment-anything-with-output_9_0.png      31-Oct-2023 00:35              815077
+

+ diff --git a/docs/notebooks/262-softvc-voice-conversion-with-output.rst b/docs/notebooks/262-softvc-voice-conversion-with-output.rst new file mode 100644 index 00000000000000..e099f18d7f3c42 --- /dev/null +++ b/docs/notebooks/262-softvc-voice-conversion-with-output.rst @@ -0,0 +1,269 @@ +SoftVC VITS Singing Voice Conversion and OpenVINO™ +================================================== + +This tutorial is based on `SoftVC VITS Singing Voice Conversion +project `__. The +purpose of this project was to enable developers to have their beloved +anime characters perform singing tasks. The developers’ intention was to +focus solely on fictional characters and avoid any involvement of real +individuals, anything related to real individuals deviates from the +developer’s original intention. + +The singing voice conversion model uses SoftVC content encoder to +extract speech features from the source audio. These feature vectors are +directly fed into `VITS `__ +without the need for conversion to a text-based intermediate +representation. As a result, the pitch and intonations of the original +audio are preserved. + +In this tutorial we will use the base model flow. + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Use the original model to run an + inference <#use-the-original-model-to-run-an-inference->`__ +- `Convert the original model to OpenVINO Intermediate Representation + (IR) + format <#convert-the-original-model-to-openvino-intermediate-representation-ir-format>`__ +- `Run the OpenVINO model <#run-the-openvino-model>`__ +- `Interactive inference <#interactive-inference>`__ + +Prerequisites +------------- + +.. code:: ipython3 + + %pip install -q --upgrade pip setuptools + %pip install -q "openvino>=2023.2.0.dev20230922" + !git clone https://github.com/svc-develop-team/so-vits-svc -b 4.1-Stable + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu tqdm librosa torch torchaudio faiss-cpu gradio "numpy==1.23.5" "fairseq==0.12.2" praat-parselmouth + +Download pretrained models and configs. We use a recommended encoder +`ContentVec `__ and models from `a +collection of so-vits-svc-4.0 models made by the Pony Preservation +Project `__ for +example. You can choose any other pretrained model from this or another +project or `prepare your +own `__. + +.. code:: ipython3 + + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) + from notebook_utils import download_file + + # ContentVec + download_file("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt", "checkpoint_best_legacy_500.pt", directory="so-vits-svc/pretrain/") + + # pretrained models and configs from a collection of so-vits-svc-4.0 models. You can use other models. + download_file("https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Rainbow%20Dash%20(singing)/kmeans_10000.pt", "kmeans_10000.pt", directory="so-vits-svc/logs/44k/") + download_file("https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Rainbow%20Dash%20(singing)/config.json", "config.json", directory="so-vits-svc/configs/") + download_file("https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Rainbow%20Dash%20(singing)/G_30400.pth", "G_30400.pth", directory="so-vits-svc/logs/44k/") + download_file("https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Rainbow%20Dash%20(singing)/D_30400.pth", "D_30400.pth", directory="so-vits-svc/logs/44k/") + + # a wav sample + download_file("https://huggingface.co/datasets/santifiorino/spinetta/resolve/main/spinetta/000.wav", "000.wav", directory="so-vits-svc/raw/") + +Use the original model to run an inference `⇧ <#table-of-content>`__ +--------------------------------------------------------------------- + +Change directory to ``so-vits-svc`` in purpose not to brake internal +relative paths. + +.. code:: ipython3 + + %cd so-vits-svc + +Define the Sovits Model. + +.. code:: ipython3 + + from inference.infer_tool import Svc + + model = Svc("logs/44k/G_30400.pth", "configs/config.json", device='cpu') + +Define ``kwargs`` and make an inference. + +.. code:: ipython3 + + kwargs = { + 'raw_audio_path': 'raw/000.wav', # path to a source audio + 'spk': 'Rainbow Dash (singing)', # speaker ID in which the source audio should be converted. + 'tran': 0, + 'slice_db': -40, + 'cluster_infer_ratio': 0, + 'auto_predict_f0': False, + 'noice_scale': 0.4, + } + + audio = model.slice_inference(**kwargs) + +And let compare the original audio with the result. + +.. code:: ipython3 + + import IPython.display as ipd + + # original + ipd.Audio("raw/000.wav", rate=model.target_sample) + +.. code:: ipython3 + + # result + ipd.Audio(audio, rate=model.target_sample) + +Convert to OpenVINO IR model +---------------------------- + +Model components are PyTorch modules, that can be converted with +``ov.convert_model`` function directly. We also use ``ov.save_model`` +function to serialize the result of conversion. ``Svc`` is not a model, +it runs model inference inside. In base scenario only ``SynthesizerTrn`` +named ``net_g_ms`` is used. It is enough to convert only this model and +we should re-assign ``forward`` method on ``infer`` method for this +purpose. + +``SynthesizerTrn`` uses several models inside it’s flow, +i.e. \ ``TextEncoder``, ``Generator``, ``ResidualCouplingBlock``, etc., +but in our case OpenVINO allows to convert whole pipeline by one step +without need to look inside. + +.. code:: ipython3 + + import openvino as ov + import torch + from pathlib import Path + + + dummy_c = torch.randn(1, 256, 813) + dummy_f0 = torch.randn(1, 813) + dummy_uv = torch.ones(1, 813) + dummy_g = torch.tensor([[0]]) + model.net_g_ms.forward = model.net_g_ms.infer + + net_g_kwargs = { + 'c': dummy_c, + 'f0': dummy_f0, + 'uv': dummy_uv, + 'g': dummy_g, + 'noice_scale': torch.tensor(0.35), # need to wrap numeric and boolean values for conversion + 'seed': torch.tensor(52468), + 'predict_f0': torch.tensor(False), + 'vol': torch.tensor(0) + } + core = ov.Core() + + + net_g_model_xml_path = Path('models/ov_net_g_model.xml') + + if not net_g_model_xml_path.exists(): + converted_model = ov.convert_model(model.net_g_ms, example_input=net_g_kwargs) + net_g_model_xml_path.parent.mkdir(parents=True, exist_ok=True) + ov.save_model(converted_model, net_g_model_xml_path) + +Run the OpenVINO model +---------------------- + +Select a device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import ipywidgets as widgets + import openvino as ov + + core = ov.Core() + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + +We should create a wrapper for ``net_g_ms`` model to keep it’s +interface. Then replace ``net_g_ms`` original model by the converted IR +model. We use ``ov.compile_model`` to make it ready to use for loading +on a device. + +.. code:: ipython3 + + class NetGModelWrapper: + def __init__(self, net_g_model_xml_path): + super().__init__() + self.net_g_model = core.compile_model(net_g_model_xml_path, device.value) + + def infer(self, c, *, f0, uv, g, noice_scale=0.35, seed=52468, predict_f0=False, vol=None): + if vol is None: # None is not allowed as an input + results = self.net_g_model((c, f0, uv, g, noice_scale, seed, predict_f0)) + else: + results = self.net_g_model((c, f0, uv, g, noice_scale, seed, predict_f0, vol)) + + return torch.from_numpy(results[0]), torch.from_numpy(results[1]) + + + model.net_g_ms = NetGModelWrapper(net_g_model_xml_path) + audio = model.slice_inference(**kwargs) + +Check result. Is it identical to that created by the original model. + +.. code:: ipython3 + + import IPython.display as ipd + + ipd.Audio(audio, rate=model.target_sample) + +Interactive inference +--------------------- + +.. code:: ipython3 + + import gradio as gr + + + src_audio = gr.inputs.Audio(label="Source Audio", type='filepath') + output_audio = gr.outputs.Audio(label="Output Audio", type='numpy') + + title = 'SoftVC VITS Singing Voice Conversion with Gradio' + description = f'Gradio Demo for SoftVC VITS Singing Voice Conversion and OpenVINO™. Upload a source audio, then click the "Submit" button to inference. Audio sample rate should be {model.target_sample}' + + + def infer(src_audio, tran, slice_db, noice_scale): + kwargs["raw_audio_path"] = src_audio + kwargs["tran"] = tran + kwargs["slice_db"] = slice_db + kwargs["noice_scale"] = noice_scale + + audio = model.slice_inference(**kwargs) + + return model.target_sample, audio + + + demo = gr.Interface( + infer, + [ + src_audio, + gr.Slider(-100, 100, value=0, label="Pitch shift", step=1), + gr.Slider(-80, -20, value=-30, label="Slice db", step=10, info="The default is -30, noisy audio can be -30, dry sound can be -50 to preserve breathing."), + gr.Slider(0, 1, value=0.4, label="Noise scale", step=0.1, info="Noise level will affect pronunciation and sound quality, which is more metaphysical"), + ], + output_audio, + title=title, + description=description, + examples=[['raw/000.wav', 0, -30, 0.4, False]] + ) + + try: + demo.queue().launch(debug=False) + except Exception: + demo.queue().launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/263-latent-consistency-models-image-generation-with-output.rst b/docs/notebooks/263-latent-consistency-models-image-generation-with-output.rst new file mode 100644 index 00000000000000..76989c2f754f32 --- /dev/null +++ b/docs/notebooks/263-latent-consistency-models-image-generation-with-output.rst @@ -0,0 +1,939 @@ +Image generation with Latent Consistency Model and OpenVINO +=========================================================== + +LCMs: The next generation of generative models after Latent Diffusion +Models (LDMs). Latent Diffusion models (LDMs) have achieved remarkable +results in synthesizing high-resolution images. However, the iterative +sampling is computationally intensive and leads to slow generation. + +Inspired by `Consistency Models `__, +`Latent Consistency Models `__ +(LCMs) were proposed, enabling swift inference with minimal steps on any +pre-trained LDMs, including Stable Diffusion. The `Consistency Model +(CM) (Song et al., 2023) `__ is a new +family of generative models that enables one-step or few-step +generation. The core idea of the CM is to learn the function that maps +any points on a trajectory of the PF-ODE (probability flow of `ordinary +differential +equation `__) +to that trajectory’s origin (i.e., the solution of the PF-ODE). By +learning consistency mappings that maintain point consistency on +ODE-trajectory, these models allow for single-step generation, +eliminating the need for computation-intensive iterations. However, CM +is constrained to pixel space image generation tasks, making it +unsuitable for synthesizing high-resolution images. LCMs adopt a +consistency model in the image latent space for generation +high-resolution images. Viewing the guided reverse diffusion process as +solving an augmented probability flow ODE (PF-ODE), LCMs are designed to +directly predict the solution of such ODE in latent space, mitigating +the need for numerous iterations and allowing rapid, high-fidelity +sampling. Utilizing image latent space in large-scale diffusion models +like Stable Diffusion (SD) has effectively enhanced image generation +quality and reduced computational load. The authors of LCMs provide a +simple and efficient one-stage guided consistency distillation method +named Latent Consistency Distillation (LCD) to distill SD for few-step +(2∼4) or even 1-step sampling and propose the SKIPPING-STEP technique to +further accelerate the convergence. More details about proposed approach +and models can be found in `project +page `__, +`paper `__ and `original +repository `__. + +In this tutorial, we consider how to convert and run LCM using OpenVINO. + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Prepare models for OpenVINO format conversion <#prepare-models-for-openvino-format-conversion>`__ +- `Convert models to OpenVINO format <#convert-models-to-openvino-format>`__ +- `Text Encoder <#text-encoder>`__ +- `U-Net <#u-net>`__ +- `VAE <#vae>`__ +- `Prepare inference pipeline <#prepare-inference-pipeline>`__ +- `Configure Inference Pipeline <#configure-inference-pipeline>`__ +- `Text-to-image generation <#text-to-image-generation>`__ +- `Interactive demo <#interactive-demo>`__ + +Prerequisites +------------------------------------------------------- + +.. code:: ipython3 + + %pip install -q "torch" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2023.1.0" transformers "diffusers>=0.21.4" pillow gradio + +Prepare models for OpenVINO format conversion +--------------------------------------------------------------------------------------- + +In this tutorial we will use +`LCM_Dreamshaper_v7 `__ +from `HuggingFace hub `__. This model distilled +from `Dreamshaper v7 `__ +fine-tune of `Stable-Diffusion +v1-5 `__ using +Latent Consistency Distillation (LCD) approach discussed above. This +model is also integrated into +`Diffusers `__ library. 🤗 +Diffusers is the go-to library for state-of-the-art pretrained diffusion +models for generating images, audio, and even 3D structures of +molecules. This allows us to compare running original Stable Diffusion +(from this `notebook <../225-stable-diffusion-text-to-image>`__) and +distilled using LCD. The distillation approach efficiently converts a +pre-trained guided diffusion model into a latent consistency model by +solving an augmented PF-ODE. + +For starting work with LCM, we should instantiate generation pipeline +first. ``DiffusionPipeline.from_pretrained`` method download all +pipeline components for LCM and configure them. This model uses custom +inference pipeline stored as part of model repository, we also should +provide which module should be loaded for initialization using +``custom_pipeline`` argument and revision for it. + +.. code:: ipython3 + + import gc + import warnings + from pathlib import Path + from diffusers import DiffusionPipeline + + + warnings.filterwarnings("ignore") + + TEXT_ENCODER_OV_PATH = Path("model/text_encoder.xml") + UNET_OV_PATH = Path("model/unet.xml") + VAE_DECODER_OV_PATH = Path("model/vae_decoder.xml") + + + def load_orginal_pytorch_pipeline_componets(skip_models=False): + pipe = DiffusionPipeline.from_pretrained( + "SimianLuo/LCM_Dreamshaper_v7", + custom_pipeline="latent_consistency_txt2img", + custom_revision="main", + ) + scheduler = pipe.scheduler + tokenizer = pipe.tokenizer + feature_extractor = pipe.feature_extractor + safety_checker = pipe.safety_checker + text_encoder, unet, vae = None, None, None + if not skip_models: + text_encoder = pipe.text_encoder + text_encoder.eval() + unet = pipe.unet + unet.eval() + vae = pipe.vae + vae.eval() + del pipe + gc.collect() + return ( + scheduler, + tokenizer, + feature_extractor, + safety_checker, + text_encoder, + unet, + vae, + ) + + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. + warn("The installed version of bitsandbytes was compiled without GPU support. " + + +.. parsed-literal:: + + /home/ea/work/ov_venv/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32 + + +.. parsed-literal:: + + 2023-10-25 13:59:59.802031: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-25 13:59:59.841632: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-25 14:00:00.487700: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. code:: ipython3 + + skip_conversion = ( + TEXT_ENCODER_OV_PATH.exists() + and UNET_OV_PATH.exists() + and VAE_DECODER_OV_PATH.exists() + ) + + ( + scheduler, + tokenizer, + feature_extractor, + safety_checker, + text_encoder, + unet, + vae, + ) = load_orginal_pytorch_pipeline_componets(skip_conversion) + + + +.. parsed-literal:: + + Loading pipeline components...: 0%| | 0/6 [00:00`__ is crucial for + synthesizing high-quality text-aligned images in Stable Diffusion, + because it controls how similar the generated image will be to the + prompt. In Latent Consistency Models, CFG serves as augmentation + parameter for PF-ODE. + +Model predicts the ``sample`` state for the next step. + +.. code:: ipython3 + + def convert_unet(unet: torch.nn.Module, ir_path: Path): + """ + Convert U-net model to IR format. + Function accepts unet model, prepares example inputs for conversion, + Parameters: + unet (StableDiffusionPipeline): unet from Stable Diffusion pipeline + ir_path (Path): File for storing model + Returns: + None + """ + # prepare inputs + dummy_inputs = { + "sample": torch.randn((1, 4, 64, 64)), + "timestep": torch.ones([1]).to(torch.float32), + "encoder_hidden_states": torch.randn((1, 77, 768)), + "timestep_cond": torch.randn((1, 256)), + } + unet.eval() + with torch.no_grad(): + ov_model = ov.convert_model(unet, example_input=dummy_inputs) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + gc.collect() + print(f"Unet successfully converted to IR and saved to {ir_path}") + + + if not UNET_OV_PATH.exists(): + convert_unet(unet, UNET_OV_PATH) + else: + print(f"Unet will be loaded from {UNET_OV_PATH}") + del unet + gc.collect() + + +.. parsed-literal:: + + Unet will be loaded from model/unet.xml + + + + +.. parsed-literal:: + + 0 + + + +VAE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The VAE model has two parts, an encoder and a decoder. The encoder is +used to convert the image into a low dimensional latent representation, +which will serve as the input to the U-Net model. The decoder, +conversely, transforms the latent representation back into an image. + +During latent diffusion training, the encoder is used to get the latent +representations (latents) of the images for the forward diffusion +process, which applies more and more noise at each step. During +inference, the denoised latents generated by the reverse diffusion +process are converted back into images using the VAE decoder. When you +run inference for text-to-image, there is no initial image as a starting +point. You can skip this step and directly generate initial random +noise. + +In our inference pipeline, we will not use VAE encoder part and skip its +conversion for reducing memory consumption. The process of conversion +VAE encoder, can be found in Stable Diffusion notebook. + +.. code:: ipython3 + + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path): + """ + Convert VAE model for decoding to IR format. + Function accepts vae model, creates wrapper class for export only necessary for inference part, + prepares example inputs for conversion, + Parameters: + vae (torch.nn.Module): VAE model frm StableDiffusion pipeline + ir_path (Path): File for storing model + Returns: + None + """ + + class VAEDecoderWrapper(torch.nn.Module): + def __init__(self, vae): + super().__init__() + self.vae = vae + + def forward(self, latents): + return self.vae.decode(latents) + + vae_decoder = VAEDecoderWrapper(vae) + latents = torch.zeros((1, 4, 64, 64)) + + vae_decoder.eval() + with torch.no_grad(): + ov_model = ov.convert_model(vae_decoder, example_input=latents) + ov.save_model(ov_model, ir_path) + del ov_model + cleanup_torchscript_cache() + print(f"VAE decoder successfully converted to IR and saved to {ir_path}") + + + if not VAE_DECODER_OV_PATH.exists(): + convert_vae_decoder(vae, VAE_DECODER_OV_PATH) + else: + print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") + + del vae + gc.collect() + + +.. parsed-literal:: + + VAE decoder will be loaded from model/vae_decoder.xml + + + + +.. parsed-literal:: + + 0 + + + +Prepare inference pipeline +-------------------------------------------------------------------- + +Putting it all together, let us now take a closer look at how the model +works in inference by illustrating the logical flow. + +.. figure:: https://user-images.githubusercontent.com/29454499/277402235-079bacfb-3b6d-424b-8d47-5ddf601e1639.png + :alt: lcm-pipeline + + lcm-pipeline + +The pipeline takes a latent image representation and a text prompt is +transformed to text embedding via CLIP’s text encoder as an input. The +initial latent image representation generated using random noise +generator. In difference, with original Stable Diffusion pipeline, LCM +also uses guidance scale for getting timestep conditional embeddings as +input for diffusion process, while in Stable Diffusion, it used for +scaling output latents. + +Next, the U-Net iteratively *denoises* the random latent image +representations while being conditioned on the text embeddings. The +output of the U-Net, being the noise residual, is used to compute a +denoised latent image representation via a scheduler algorithm. LCM +introduces own scheduling algorithm that extends the denoising procedure +introduced in denoising diffusion probabilistic models (DDPMs) with +non-Markovian guidance. The *denoising* process is repeated given number +of times (by default 50 in original SD pipeline, but for LCM small +number of steps required ~2-8) to step-by-step retrieve better latent +image representations. When complete, the latent image representation is +decoded by the decoder part of the variational auto encoder. + +.. code:: ipython3 + + from typing import Union, Optional, Any, List, Dict + from transformers import CLIPTokenizer, CLIPImageProcessor + from diffusers.pipelines.stable_diffusion.safety_checker import ( + StableDiffusionSafetyChecker, + ) + from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput + from diffusers.image_processor import VaeImageProcessor + + + class LatentConsistencyModelPipeline(DiffusionPipeline): + def __init__( + self, + vae_decoder: ov.Model, + text_encoder: ov.Model, + tokenizer: CLIPTokenizer, + unet: ov.Model, + scheduler: None, + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPImageProcessor, + requires_safety_checker: bool = True, + ): + super().__init__() + self.vae_decoder = vae_decoder + self.text_encoder = text_encoder + self.tokenizer = tokenizer + self.unet = unet + self.scheduler = scheduler + self.safety_checker = safety_checker + self.feature_extractor = feature_extractor + self.vae_scale_factor = 2**3 + self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) + + def _encode_prompt( + self, + prompt, + num_images_per_prompt, + prompt_embeds: None, + ): + r""" + Encodes the prompt into text encoder hidden states. + Args: + prompt (`str` or `List[str]`, *optional*): + prompt to be encoded + num_images_per_prompt (`int`): + number of images that should be generated per prompt + prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not + provided, text embeddings will be generated from `prompt` input argument. + """ + + if prompt_embeds is None: + + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer( + prompt, padding="longest", return_tensors="pt" + ).input_ids + + if untruncated_ids.shape[-1] >= text_input_ids.shape[ + -1 + ] and not torch.equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode( + untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1] + ) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + + prompt_embeds = self.text_encoder(text_input_ids, share_inputs=True, share_outputs=True) + prompt_embeds = torch.from_numpy(prompt_embeds[0]) + + bs_embed, seq_len, _ = prompt_embeds.shape + # duplicate text embeddings for each generation per prompt + prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) + prompt_embeds = prompt_embeds.view( + bs_embed * num_images_per_prompt, seq_len, -1 + ) + + # Don't need to get uncond prompt embedding because of LCM Guided Distillation + return prompt_embeds + + def run_safety_checker(self, image, dtype): + if self.safety_checker is None: + has_nsfw_concept = None + else: + if torch.is_tensor(image): + feature_extractor_input = self.image_processor.postprocess( + image, output_type="pil" + ) + else: + feature_extractor_input = self.image_processor.numpy_to_pil(image) + safety_checker_input = self.feature_extractor( + feature_extractor_input, return_tensors="pt" + ) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) + return image, has_nsfw_concept + + def prepare_latents( + self, batch_size, num_channels_latents, height, width, dtype, latents=None + ): + shape = ( + batch_size, + num_channels_latents, + height // self.vae_scale_factor, + width // self.vae_scale_factor, + ) + if latents is None: + latents = torch.randn(shape, dtype=dtype) + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + return latents + + def get_w_embedding(self, w, embedding_dim=512, dtype=torch.float32): + """ + see https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 + Args: + timesteps: torch.Tensor: generate embedding vectors at these timesteps + embedding_dim: int: dimension of the embeddings to generate + dtype: data type of the generated embeddings + Returns: + embedding vectors with shape `(len(timesteps), embedding_dim)` + """ + assert len(w.shape) == 1 + w = w * 1000.0 + + half_dim = embedding_dim // 2 + emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb) + emb = w.to(dtype)[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = torch.nn.functional.pad(emb, (0, 1)) + assert emb.shape == (w.shape[0], embedding_dim) + return emb + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]] = None, + height: Optional[int] = 512, + width: Optional[int] = 512, + guidance_scale: float = 7.5, + num_images_per_prompt: Optional[int] = 1, + latents: Optional[torch.FloatTensor] = None, + num_inference_steps: int = 4, + lcm_origin_steps: int = 50, + prompt_embeds: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + cross_attention_kwargs: Optional[Dict[str, Any]] = None, + ): + + # 1. Define call parameters + if prompt is not None and isinstance(prompt, str): + batch_size = 1 + elif prompt is not None and isinstance(prompt, list): + batch_size = len(prompt) + else: + batch_size = prompt_embeds.shape[0] + + # do_classifier_free_guidance = guidance_scale > 0.0 + # In LCM Implementation: cfg_noise = noise_cond + cfg_scale * (noise_cond - noise_uncond) , (cfg_scale > 0.0 using CFG) + + # 2. Encode input prompt + prompt_embeds = self._encode_prompt( + prompt, + num_images_per_prompt, + prompt_embeds=prompt_embeds, + ) + + # 3. Prepare timesteps + self.scheduler.set_timesteps(num_inference_steps, lcm_origin_steps) + timesteps = self.scheduler.timesteps + + # 4. Prepare latent variable + num_channels_latents = 4 + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + prompt_embeds.dtype, + latents, + ) + + bs = batch_size * num_images_per_prompt + + # 5. Get Guidance Scale Embedding + w = torch.tensor(guidance_scale).repeat(bs) + w_embedding = self.get_w_embedding(w, embedding_dim=256) + + # 6. LCM MultiStep Sampling Loop: + with self.progress_bar(total=num_inference_steps) as progress_bar: + for i, t in enumerate(timesteps): + + ts = torch.full((bs,), t, dtype=torch.long) + + # model prediction (v-prediction, eps, x) + model_pred = self.unet([latents, ts, prompt_embeds, w_embedding],share_inputs=True, share_outputs=True)[0] + + # compute the previous noisy sample x_t -> x_t-1 + latents, denoised = self.scheduler.step( + torch.from_numpy(model_pred), i, t, latents, return_dict=False + ) + progress_bar.update() + + if not output_type == "latent": + image = torch.from_numpy(self.vae_decoder(denoised / 0.18215, share_inputs=True, share_outputs=True)[0]) + image, has_nsfw_concept = self.run_safety_checker( + image, prompt_embeds.dtype + ) + else: + image = denoised + has_nsfw_concept = None + + if has_nsfw_concept is None: + do_denormalize = [True] * image.shape[0] + else: + do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] + + image = self.image_processor.postprocess( + image, output_type=output_type, do_denormalize=do_denormalize + ) + + if not return_dict: + return (image, has_nsfw_concept) + + return StableDiffusionPipelineOutput( + images=image, nsfw_content_detected=has_nsfw_concept + ) + +Configure Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, you should create instances of OpenVINO Model and compile it +using selected device. Select device from dropdown list for running +inference using OpenVINO. + +.. code:: ipython3 + + core = ov.Core() + + import ipywidgets as widgets + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value="CPU", + description="Device:", + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU') + + + +.. code:: ipython3 + + text_enc = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) + unet_model = core.compile_model(UNET_OV_PATH, device.value) + + ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + + vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_config) + +Model tokenizer and scheduler are also important parts of the pipeline. +This pipeline is also uses Safety Checker, the filter for detecting that +corresponding generated image contains “not-safe-for-work” (nsfw) +content. The process of nsfw content detection requires to obtain image +embeddings using CLIP model, so additionally feature extractor component +should be added in the pipeline. We reuse tokenizer, feature extractor, +scheduler and safety checker from original LCM pipeline. + +.. code:: ipython3 + + ov_pipe = LatentConsistencyModelPipeline( + tokenizer=tokenizer, + text_encoder=text_enc, + unet=unet_model, + vae_decoder=vae_decoder, + scheduler=scheduler, + feature_extractor=feature_extractor, + safety_checker=safety_checker, + ) + +Text-to-image generation +------------------------------------------------------------------ + +Now, let’s see model in action + +.. code:: ipython3 + + prompt = "a beautiful pink unicorn, 8k" + num_inference_steps = 4 + torch.manual_seed(1234567) + + images = ov_pipe( + prompt=prompt, + num_inference_steps=num_inference_steps, + guidance_scale=8.0, + lcm_origin_steps=50, + output_type="pil", + height=512, + width=512, + ).images + + + +.. parsed-literal:: + + 0%| | 0/4 [00:00 int: + if randomize_seed: + seed = random.randint(0, MAX_SEED) + return seed + + + MAX_IMAGE_SIZE = 768 + + + def generate( + prompt: str, + seed: int = 0, + width: int = 512, + height: int = 512, + guidance_scale: float = 8.0, + num_inference_steps: int = 4, + num_images: int = 1, + randomize_seed: bool = False, + progress=gr.Progress(track_tqdm=True), + ): + seed = randomize_seed_fn(seed, randomize_seed) + torch.manual_seed(seed) + result = ov_pipe( + prompt=prompt, + width=width, + height=height, + guidance_scale=guidance_scale, + num_inference_steps=num_inference_steps, + num_images_per_prompt=num_images, + lcm_origin_steps=50, + output_type="pil", + ).images[0] + return result, seed + + + with gr.Blocks() as demo: + with gr.Group(): + with gr.Row(): + prompt = gr.Text( + label="Prompt", + show_label=False, + max_lines=1, + placeholder="Enter your prompt", + container=False, + ) + run_button = gr.Button("Run", scale=0) + result = gr.Image(label="Image", type="pil") + with gr.Accordion("Advanced options", open=False): + seed = gr.Slider( + label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, randomize=True + ) + randomize_seed = gr.Checkbox(label="Randomize seed across runs", value=True) + with gr.Row(): + width = gr.Slider( + label="Width", + minimum=256, + maximum=MAX_IMAGE_SIZE, + step=32, + value=512, + ) + height = gr.Slider( + label="Height", + minimum=256, + maximum=MAX_IMAGE_SIZE, + step=32, + value=512, + ) + with gr.Row(): + guidance_scale = gr.Slider( + label="Guidance scale for base", + minimum=2, + maximum=14, + step=0.1, + value=8.0, + ) + num_inference_steps = gr.Slider( + label="Number of inference steps for base", + minimum=1, + maximum=8, + step=1, + value=4, + ) + + gr.Examples( + examples=examples, + inputs=prompt, + outputs=result, + fn=generate, + cache_examples=False, + ) + + gr.on( + triggers=[ + prompt.submit, + run_button.click, + ], + fn=generate, + inputs=[ + prompt, + seed, + width, + height, + guidance_scale, + num_inference_steps, + randomize_seed, + ], + outputs=[result, seed], + ) + +.. code:: ipython3 + + demo.queue().launch() diff --git a/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.jpg b/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.jpg new file mode 100644 index 00000000000000..2396ed9e51ab6a --- /dev/null +++ b/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7bb8099213e5801b7710df8a2a41d0f14f17e978489c0da322979a639767c9 +size 20240 diff --git a/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.png b/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.png new file mode 100644 index 00000000000000..3f5584150d709f --- /dev/null +++ b/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/263-latent-consistency-models-image-generation-with-output_21_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1109cac4543291a38830a7a364b36ba72f6aad7700d01191b7894b06e27e0ef +size 390302 diff --git a/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/index.html b/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/index.html new file mode 100644 index 00000000000000..383d29e6d7ab33 --- /dev/null +++ b/docs/notebooks/263-latent-consistency-models-image-generation-with-output_files/index.html @@ -0,0 +1,8 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/263-latent-consistency-models-image-generation-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/263-latent-consistency-models-image-generation-with-output_files/


../
+263-latent-consistency-models-image-generation-..> 31-Oct-2023 00:35               20240
+263-latent-consistency-models-image-generation-..> 31-Oct-2023 00:35              390302
+

+ diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output.rst b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output.rst index bce43dba73904d..c0b3fd60c271ef 100644 --- a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output.rst +++ b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output.rst @@ -2,34 +2,28 @@ Post-Training Quantization with TensorFlow Classification Model =============================================================== This example demonstrates how to quantize the OpenVINO model that was -created in -`301-tensorflow-training-openvino.ipynb <301-tensorflow-training-openvino.ipynb>`__, -to improve inference speed. Quantization is performed with -`Post-training Quantization with +created in `301-tensorflow-training-openvino +notebook <301-tensorflow-training-openvino.ipynb>`__, to improve +inference speed. Quantization is performed with `Post-training +Quantization with NNCF `__. A custom dataloader and metric will be defined, and accuracy and performance will be computed for the original IR model and the quantized model. -.. _top: - -**Table of contents**: +**Table of contents:** +--- - `Preparation <#preparation>`__ - - - `Imports <#imports>`__ - +- `Imports <#imports>`__ - `Post-training Quantization with NNCF <#post-training-quantization-with-nncf>`__ - - - `Select inference device <#post-training-quantization-with-nncf>`__ - -- `Compare Metrics <#post-training-quantization-with-nncf>`__ +- `Select inference device <#select-inference-device>`__ +- `Compare Metrics <#compare-metrics>`__ - `Run Inference on Quantized Model <#run-inference-on-quantized-model>`__ - `Compare Inference Speed <#compare-inference-speed>`__ - -Preparation ------------ +Preparation +----------------------------------------------------- The notebook requires that the training notebook has been run and that the Intermediate Representation (IR) models are created. If the IR @@ -55,15 +49,17 @@ notebook. This will take a while. .. parsed-literal:: - 2023-07-05 23:54:28.962752: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-07-05 23:54:28.997784: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-31 00:10:31.765486: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-31 00:10:31.799656: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-07-05 23:54:29.609276: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-31 00:10:32.415064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: Executing training notebook. This will take a while... + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. 3670 Found 3670 files belonging to 5 classes. Using 2936 files for training. @@ -71,7 +67,7 @@ notebook. This will take a while. .. parsed-literal:: - 2023-07-05 23:54:31.178171: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2023-10-31 00:10:36.524645: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... @@ -82,47 +78,19 @@ notebook. This will take a while. ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'] -.. parsed-literal:: - - 2023-07-05 23:54:31.493885: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - 2023-07-05 23:54:31.494167: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - - - -.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_5.png - -.. parsed-literal:: - - 2023-07-05 23:54:31.947372: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - 2023-07-05 23:54:31.947613: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - 2023-07-05 23:54:32.077841: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - 2023-07-05 23:54:32.078164: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] +.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_4.png .. parsed-literal:: (32, 180, 180, 3) (32,) - 0.0 1.0 - - -.. parsed-literal:: - - 2023-07-05 23:54:32.897047: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2936] - [[{{node Placeholder/_0}}]] - 2023-07-05 23:54:32.897375: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] + 0.0 0.99167764 -.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_9.png +.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_6.png .. parsed-literal:: @@ -137,18 +105,18 @@ notebook. This will take a while. conv2d_3 (Conv2D) (None, 180, 180, 16) 448 - max_pooling2d_3 (MaxPooling (None, 90, 90, 16) 0 - 2D) + max_pooling2d_3 (MaxPoolin (None, 90, 90, 16) 0 + g2D) conv2d_4 (Conv2D) (None, 90, 90, 32) 4640 - max_pooling2d_4 (MaxPooling (None, 45, 45, 32) 0 - 2D) + max_pooling2d_4 (MaxPoolin (None, 45, 45, 32) 0 + g2D) conv2d_5 (Conv2D) (None, 45, 45, 64) 18496 - max_pooling2d_5 (MaxPooling (None, 22, 22, 64) 0 - 2D) + max_pooling2d_5 (MaxPoolin (None, 22, 22, 64) 0 + g2D) dropout (Dropout) (None, 22, 22, 64) 0 @@ -159,119 +127,50 @@ notebook. This will take a while. outputs (Dense) (None, 5) 645 ================================================================= - Total params: 3,989,285 - Trainable params: 3,989,285 - Non-trainable params: 0 + Total params: 3989285 (15.22 MB) + Trainable params: 3989285 (15.22 MB) + Non-trainable params: 0 (0.00 Byte) _________________________________________________________________ Epoch 1/15 - - -.. parsed-literal:: - - 2023-07-05 23:54:33.773069: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2936] - [[{{node Placeholder/_0}}]] - 2023-07-05 23:54:33.773519: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - - -.. parsed-literal:: - - 92/92 [==============================] - ETA: 0s - loss: 1.2943 - accuracy: 0.4486 - -.. parsed-literal:: - - 2023-07-05 23:54:40.025734: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [734] - [[{{node Placeholder/_0}}]] - 2023-07-05 23:54:40.026032: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [734] - [[{{node Placeholder/_0}}]] - - -.. parsed-literal:: - - 92/92 [==============================] - 7s 66ms/step - loss: 1.2943 - accuracy: 0.4486 - val_loss: 1.0944 - val_accuracy: 0.5354 + 92/92 [==============================] - 6s 60ms/step - loss: 1.2926 - accuracy: 0.4435 - val_loss: 1.0857 - val_accuracy: 0.5327 Epoch 2/15 - 92/92 [==============================] - 6s 63ms/step - loss: 1.0396 - accuracy: 0.5787 - val_loss: 0.9602 - val_accuracy: 0.6322 + 92/92 [==============================] - 5s 57ms/step - loss: 1.0228 - accuracy: 0.5991 - val_loss: 0.9881 - val_accuracy: 0.6226 Epoch 3/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.9646 - accuracy: 0.6213 - val_loss: 0.9223 - val_accuracy: 0.6417 + 92/92 [==============================] - 5s 57ms/step - loss: 0.9082 - accuracy: 0.6519 - val_loss: 0.8962 - val_accuracy: 0.6526 Epoch 4/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.8775 - accuracy: 0.6533 - val_loss: 0.8511 - val_accuracy: 0.6594 + 92/92 [==============================] - 5s 57ms/step - loss: 0.8277 - accuracy: 0.6832 - val_loss: 0.9586 - val_accuracy: 0.6540 Epoch 5/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.8354 - accuracy: 0.6884 - val_loss: 0.8471 - val_accuracy: 0.6689 + 92/92 [==============================] - 5s 57ms/step - loss: 0.7965 - accuracy: 0.6853 - val_loss: 0.8849 - val_accuracy: 0.6689 Epoch 6/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.7722 - accuracy: 0.7033 - val_loss: 0.8405 - val_accuracy: 0.6935 + 92/92 [==============================] - 5s 57ms/step - loss: 0.7680 - accuracy: 0.7044 - val_loss: 0.7855 - val_accuracy: 0.6962 Epoch 7/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.7347 - accuracy: 0.7207 - val_loss: 0.8848 - val_accuracy: 0.6730 + 92/92 [==============================] - 5s 57ms/step - loss: 0.7319 - accuracy: 0.7292 - val_loss: 0.7772 - val_accuracy: 0.7016 Epoch 8/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.6980 - accuracy: 0.7469 - val_loss: 0.7724 - val_accuracy: 0.6948 + 92/92 [==============================] - 5s 57ms/step - loss: 0.6945 - accuracy: 0.7415 - val_loss: 0.7605 - val_accuracy: 0.7071 Epoch 9/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.6629 - accuracy: 0.7476 - val_loss: 0.7512 - val_accuracy: 0.7071 + 92/92 [==============================] - 5s 57ms/step - loss: 0.6561 - accuracy: 0.7490 - val_loss: 0.7764 - val_accuracy: 0.6948 Epoch 10/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.6429 - accuracy: 0.7643 - val_loss: 0.7196 - val_accuracy: 0.7125 + 92/92 [==============================] - 5s 57ms/step - loss: 0.6333 - accuracy: 0.7568 - val_loss: 0.7509 - val_accuracy: 0.7207 Epoch 11/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.5967 - accuracy: 0.7755 - val_loss: 0.7228 - val_accuracy: 0.7084 + 92/92 [==============================] - 5s 57ms/step - loss: 0.5991 - accuracy: 0.7766 - val_loss: 0.7724 - val_accuracy: 0.7153 Epoch 12/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.5860 - accuracy: 0.7769 - val_loss: 0.7501 - val_accuracy: 0.7153 + 92/92 [==============================] - 5s 57ms/step - loss: 0.5786 - accuracy: 0.7810 - val_loss: 0.7096 - val_accuracy: 0.7275 Epoch 13/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.5695 - accuracy: 0.7793 - val_loss: 0.7366 - val_accuracy: 0.7153 + 92/92 [==============================] - 5s 57ms/step - loss: 0.5741 - accuracy: 0.7858 - val_loss: 0.6902 - val_accuracy: 0.7384 Epoch 14/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.5392 - accuracy: 0.7970 - val_loss: 0.7375 - val_accuracy: 0.7275 + 92/92 [==============================] - 5s 57ms/step - loss: 0.5555 - accuracy: 0.7892 - val_loss: 0.7097 - val_accuracy: 0.7193 Epoch 15/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.5098 - accuracy: 0.8048 - val_loss: 0.6984 - val_accuracy: 0.7330 - - - -.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_15.png - - -.. parsed-literal:: - - 1/1 [==============================] - 0s 76ms/step - This image most likely belongs to sunflowers with a 99.23 percent confidence. + 92/92 [==============================] - 5s 57ms/step - loss: 0.5330 - accuracy: 0.8038 - val_loss: 0.7023 - val_accuracy: 0.7289 -.. parsed-literal:: - 2023-07-05 23:56:03.289411: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'random_flip_input' with dtype float and shape [?,180,180,3] - [[{{node random_flip_input}}]] - 2023-07-05 23:56:03.376040: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.385907: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'random_flip_input' with dtype float and shape [?,180,180,3] - [[{{node random_flip_input}}]] - 2023-07-05 23:56:03.396762: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.403700: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.410703: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.421394: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.461681: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'sequential_1_input' with dtype float and shape [?,180,180,3] - [[{{node sequential_1_input}}]] - 2023-07-05 23:56:03.529355: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.549619: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'sequential_1_input' with dtype float and shape [?,180,180,3] - [[{{node sequential_1_input}}]] - 2023-07-05 23:56:03.588567: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,22,22,64] - [[{{node inputs}}]] - 2023-07-05 23:56:03.611996: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.685894: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.828047: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:03.965814: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,22,22,64] - [[{{node inputs}}]] - 2023-07-05 23:56:03.999799: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:04.028229: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-07-05 23:56:04.074705: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _update_step_xla while saving (showing 4 of 4). These functions will not be directly callable after loading. +.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_8.png .. parsed-literal:: + 1/1 [==============================] - 0s 71ms/step + This image most likely belongs to sunflowers with a 97.82 percent confidence. INFO:tensorflow:Assets written to: model/flower/saved_model/assets @@ -290,15 +189,15 @@ notebook. This will take a while. (1, 180, 180, 3) [1,180,180,3] - This image most likely belongs to dandelion with a 99.81 percent confidence. + This image most likely belongs to dandelion with a 99.80 percent confidence. -.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_22.png +.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_13.png -Imports -~~~~~~~ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Post Training Quantization API is implemented in the ``nncf`` library. @@ -324,8 +223,8 @@ library. INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino -Post-training Quantization with NNCF ------------------------------------- +Post-training Quantization with NNCF +------------------------------------------------------------------------------ `NNCF `__ provides a suite of advanced algorithms for Neural Networks inference optimization in @@ -336,7 +235,7 @@ calibration dataset. The optimization process contains the following steps: 1. Create a Dataset for quantization. -2. Run ``nncf.quantize`` for getting an optimized model. +2. Run nncf.quantize for getting an optimized model. The validation dataset already defined in the training notebook. @@ -365,14 +264,6 @@ The validation dataset already defined in the training notebook. -.. parsed-literal:: - - 2023-07-05 23:56:07.075279: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [734] - [[{{node Placeholder/_4}}]] - 2023-07-05 23:56:07.075533: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [734] - [[{{node Placeholder/_4}}]] - - The validation dataset can be reused in quantization process. But it returns a tuple (images, labels), whereas calibration_dataset should only return images. The transformation function helps to transform a @@ -395,15 +286,15 @@ Download Intermediate Representation (IR) model. .. code:: ipython3 - ie = Core() - ir_model = ie.read_model(model_xml) + core = Core() + ir_model = core.read_model(model_xml) Use `Basic Quantization -Flow `__. +Flow `__. To use the most advanced quantization flow that allows to apply 8-bit quantization to the model with accuracy control see `Quantizing with accuracy -control `__. +control `__. .. code:: ipython3 @@ -416,8 +307,8 @@ control `_ .. parsed-literal:: - Statistics collection: 73%|███████▎ | 734/1000 [00:04<00:01, 166.65it/s] - Biases correction: 100%|██████████| 5/5 [00:01<00:00, 3.99it/s] + Statistics collection: 73%|███████▎ | 734/1000 [00:04<00:01, 166.59it/s] + Applying Fast Bias correction: 100%|██████████| 5/5 [00:01<00:00, 3.92it/s] Save quantized model to benchmark. @@ -429,8 +320,35 @@ Save quantized model to benchmark. compressed_model_xml = compressed_model_dir / "flower_ir.xml" serialize(quantized_model, str(compressed_model_xml)) -Compare Metrics ---------------- +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +select device from dropdown list for running inference using OpenVINO + +.. code:: ipython3 + + import ipywidgets as widgets + + device = widgets.Dropdown( + options=core.available_devices + ["AUTO"], + value='AUTO', + description='Device:', + disabled=False, + ) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Compare Metrics +--------------------------------------------------------- Define a metric to determine the performance of the model. @@ -468,8 +386,8 @@ Calculate accuracy for the original model and the quantized model. .. code:: ipython3 - original_compiled_model = ie.compile_model(model=ir_model, device_name="CPU") - quantized_compiled_model = ie.compile_model(model=quantized_model, device_name="CPU") + original_compiled_model = core.compile_model(model=ir_model, device_name=device.value) + quantized_compiled_model = core.compile_model(model=quantized_model, device_name=device.value) original_accuracy = validate(original_compiled_model, val_dataset) quantized_accuracy = validate(quantized_compiled_model, val_dataset) @@ -480,8 +398,8 @@ Calculate accuracy for the original model and the quantized model. .. parsed-literal:: - Accuracy of the original model: 0.733 - Accuracy of the quantized model: 0.737 + Accuracy of the original model: 0.729 + Accuracy of the quantized model: 0.730 Compare file size of the models. @@ -504,8 +422,8 @@ Compare file size of the models. So, we can see that the original and quantized models have similar accuracy with a much smaller size of the quantized model. -Run Inference on Quantized Model --------------------------------- +Run Inference on Quantized Model +-------------------------------------------------------------------------- Copy the preprocess function from the training notebook and run inference on the quantized model with Inference Engine. See the @@ -573,18 +491,18 @@ Python API. 'output/A_Close_Up_Photo_of_a_Dandelion.jpg' already exists. input image shape: (1, 180, 180, 3) input layer shape: [1,180,180,3] - This image most likely belongs to dandelion with a 99.82 percent confidence. + This image most likely belongs to dandelion with a 99.79 percent confidence. -.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_24_1.png +.. image:: 301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_26_1.png -Compare Inference Speed ------------------------ +Compare Inference Speed +----------------------------------------------------------------- Measure inference speed with the `OpenVINO Benchmark -App `__. +App `__. Benchmark App is a command line tool that measures raw inference performance for a specified OpenVINO IR model. Run @@ -594,7 +512,7 @@ the ``-m`` parameter with asynchronous inference on CPU, for one minute. Use the ``-d`` parameter to test performance on a different device, for example an Intel integrated Graphics (iGPU), and ``-t`` to set the number of seconds to run inference. See the -`documentation `__ +`documentation `__ for more information. This tutorial uses a wrapper function from `Notebook @@ -605,20 +523,17 @@ In the next cells, inference speed will be measured for the original and quantized model on CPU. If an iGPU is available, inference speed will be measured for CPU+GPU as well. The number of seconds is set to 15. -.. note:: - - For the most accurate performance estimation, it is + **NOTE**: For the most accurate performance estimation, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. - .. code:: ipython3 # print the available devices on this system print("Device information:") - print(ie.get_property("CPU", "FULL_DEVICE_NAME")) - if "GPU" in ie.available_devices: - print(ie.get_property("GPU", "FULL_DEVICE_NAME")) + print(core.get_property("CPU", "FULL_DEVICE_NAME")) + if "GPU" in core.available_devices: + print(core.get_property("GPU", "FULL_DEVICE_NAME")) .. parsed-literal:: @@ -639,18 +554,18 @@ measured for CPU+GPU as well. The number of seconds is set to 15. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ INFO ] CPU - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 12.02 ms + [ INFO ] Read model took 12.32 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] sequential_1_input (node: sequential_1_input) : f32 / [...] / [1,180,180,3] @@ -664,7 +579,7 @@ measured for CPU+GPU as well. The number of seconds is set to 15. [ INFO ] Model outputs: [ INFO ] outputs (node: sequential_2/outputs/BiasAdd) : f32 / [...] / [1,5] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 76.79 ms + [ INFO ] Compile model took 61.75 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: TensorFlow_Frontend_IR @@ -681,22 +596,24 @@ measured for CPU+GPU as well. The number of seconds is set to 15. [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE [ INFO ] ENABLE_HYPER_THREADING: True [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'sequential_1_input'!. This input will be filled with random values! [ INFO ] Fill input 'sequential_1_input' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 7.22 ms + [ INFO ] First inference took 10.01 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 57276 iterations - [ INFO ] Duration: 15002.57 ms + [ INFO ] Count: 58152 iterations + [ INFO ] Duration: 15004.57 ms [ INFO ] Latency: [ INFO ] Median: 2.90 ms - [ INFO ] Average: 2.95 ms - [ INFO ] Min: 1.67 ms - [ INFO ] Max: 234.29 ms - [ INFO ] Throughput: 3817.75 FPS + [ INFO ] Average: 2.91 ms + [ INFO ] Min: 2.15 ms + [ INFO ] Max: 11.75 ms + [ INFO ] Throughput: 3875.62 FPS .. code:: ipython3 @@ -711,18 +628,18 @@ measured for CPU+GPU as well. The number of seconds is set to 15. [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] Device info: [ INFO ] CPU - [ INFO ] Build ................................. 2023.0.0-10926-b4452d56304-releases/2023/0 + [ INFO ] Build ................................. 2023.1.0-12185-9e6b00e51cd-releases/2023/1 [ INFO ] [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 12.35 ms + [ INFO ] Read model took 13.50 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] sequential_1_input (node: sequential_1_input) : f32 / [...] / [1,180,180,3] @@ -736,7 +653,7 @@ measured for CPU+GPU as well. The number of seconds is set to 15. [ INFO ] Model outputs: [ INFO ] outputs (node: sequential_2/outputs/BiasAdd) : f32 / [...] / [1,5] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 54.95 ms + [ INFO ] Compile model took 57.59 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: TensorFlow_Frontend_IR @@ -753,22 +670,24 @@ measured for CPU+GPU as well. The number of seconds is set to 15. [ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE [ INFO ] ENABLE_HYPER_THREADING: True [ INFO ] EXECUTION_DEVICES: ['CPU'] + [ INFO ] CPU_DENORMALS_OPTIMIZATION: False + [ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0 [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'sequential_1_input'!. This input will be filled with random values! [ INFO ] Fill input 'sequential_1_input' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 2.06 ms + [ INFO ] First inference took 1.99 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 178752 iterations - [ INFO ] Duration: 15001.22 ms + [ INFO ] Count: 178968 iterations + [ INFO ] Duration: 15001.10 ms [ INFO ] Latency: [ INFO ] Median: 0.92 ms [ INFO ] Average: 0.92 ms - [ INFO ] Min: 0.54 ms - [ INFO ] Max: 4.90 ms - [ INFO ] Throughput: 11915.83 FPS + [ INFO ] Min: 0.58 ms + [ INFO ] Max: 6.18 ms + [ INFO ] Throughput: 11930.32 FPS **Benchmark on MULTI:CPU,GPU** @@ -786,7 +705,7 @@ cached to the ``model_cache`` directory. .. code:: ipython3 # Original model - MULTI:CPU,GPU - if "GPU" in ie.available_devices: + if "GPU" in core.available_devices: ! benchmark_app -m $model_xml -d MULTI:CPU,GPU -t 15 -api async else: print("A supported integrated GPU is not available on this system.") @@ -800,7 +719,7 @@ cached to the ``model_cache`` directory. .. code:: ipython3 # Quantized model - MULTI:CPU,GPU - if "GPU" in ie.available_devices: + if "GPU" in core.available_devices: ! benchmark_app -m $compressed_model_xml -d MULTI:CPU,GPU -t 15 -api async else: print("A supported integrated GPU is not available on this system.") @@ -815,9 +734,9 @@ cached to the ``model_cache`` directory. # print the available devices on this system print("Device information:") - print(ie.get_property("CPU", "FULL_DEVICE_NAME")) - if "GPU" in ie.available_devices: - print(ie.get_property("GPU", "FULL_DEVICE_NAME")) + print(core.get_property("CPU", "FULL_DEVICE_NAME")) + if "GPU" in core.available_devices: + print(core.get_property("GPU", "FULL_DEVICE_NAME")) .. parsed-literal:: @@ -838,14 +757,14 @@ cached to the ``model_cache`` directory. .. parsed-literal:: - [ INFO ] Count: 58332 iterations - [ INFO ] Duration: 15005.08 ms + [ INFO ] Count: 58680 iterations + [ INFO ] Duration: 15004.60 ms [ INFO ] Latency: [ INFO ] Median: 2.88 ms - [ INFO ] Average: 2.89 ms - [ INFO ] Min: 2.02 ms - [ INFO ] Max: 8.94 ms - [ INFO ] Throughput: 3887.48 FPS + [ INFO ] Average: 2.87 ms + [ INFO ] Min: 2.00 ms + [ INFO ] Max: 11.71 ms + [ INFO ] Throughput: 3910.80 FPS **Quantized IR model - CPU** @@ -860,14 +779,14 @@ cached to the ``model_cache`` directory. .. parsed-literal:: - [ INFO ] Count: 179124 iterations - [ INFO ] Duration: 15001.17 ms + [ INFO ] Count: 179220 iterations + [ INFO ] Duration: 15000.72 ms [ INFO ] Latency: [ INFO ] Median: 0.92 ms [ INFO ] Average: 0.92 ms [ INFO ] Min: 0.56 ms - [ INFO ] Max: 4.33 ms - [ INFO ] Throughput: 11940.67 FPS + [ INFO ] Max: 6.53 ms + [ INFO ] Throughput: 11947.42 FPS **Original IR model - MULTI:CPU,GPU** @@ -875,13 +794,13 @@ cached to the ``model_cache`` directory. With a recent Intel CPU, the best performance can often be achieved by doing inference on both the CPU and the iGPU, with OpenVINO’s `Multi Device -Plugin `__. +Plugin `__. It takes a bit longer to load a model on GPU than on CPU, so this benchmark will take a bit longer to complete than the CPU benchmark. .. code:: ipython3 - if "GPU" in ie.available_devices: + if "GPU" in core.available_devices: benchmark_output = %sx benchmark_app -m $model_xml -d MULTI:CPU,GPU -t 15 -api async # Remove logging info from benchmark_app output and show only the results benchmark_result = benchmark_output[-8:] @@ -899,7 +818,7 @@ benchmark will take a bit longer to complete than the CPU benchmark. .. code:: ipython3 - if "GPU" in ie.available_devices: + if "GPU" in core.available_devices: benchmark_output = %sx benchmark_app -m $compressed_model_xml -d MULTI:CPU,GPU -t 15 -api async # Remove logging info from benchmark_app output and show only the results benchmark_result = benchmark_output[-8:] diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_24_1.png b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_26_1.png similarity index 100% rename from docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_24_1.png rename to docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_26_1.png diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_22.png b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_13.png similarity index 100% rename from docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_22.png rename to docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_13.png diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_15.png b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_15.png deleted file mode 100644 index 236759738b3bf6..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_15.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e3af8d92a72b6dfb54c116ab9a052ffc4d6783058db964f3b9ac4c63eb246a4f -size 55498 diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_5.png b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_4.png similarity index 100% rename from docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_5.png rename to docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_4.png diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_6.png b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_6.png new file mode 100644 index 00000000000000..a911275a32ea15 --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_6.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8302cee83409eb402c9f8c3cb9cf9fbe23040c150441df5bef3dfe53ce1fd2b4 +size 1023105 diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_8.png b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_8.png new file mode 100644 index 00000000000000..9fc7aaa2c7d523 --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_8.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e712ca63dc0da92137baa0d0f1e414932bccd22cfe332c0864c16ec2a58c034 +size 56298 diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_9.png b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_9.png deleted file mode 100644 index eeb03a119261b9..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/301-tensorflow-training-openvino-nncf-with-output_2_9.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a62d60162298ff48fa9224f12ef371e8a62ec7778c10eadca436339f74aa9253 -size 433486 diff --git a/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/index.html b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/index.html new file mode 100644 index 00000000000000..eb2637e5c8ce5c --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-nncf-with-output_files/index.html @@ -0,0 +1,11 @@ + +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/301-tensorflow-training-openvino-nncf-with-output_files/ + +

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/301-tensorflow-training-openvino-nncf-with-output_files/


../
+301-tensorflow-training-openvino-nncf-with-outp..> 31-Oct-2023 00:35              143412
+301-tensorflow-training-openvino-nncf-with-outp..> 31-Oct-2023 00:35              143412
+301-tensorflow-training-openvino-nncf-with-outp..> 31-Oct-2023 00:35              941151
+301-tensorflow-training-openvino-nncf-with-outp..> 31-Oct-2023 00:35             1023105
+301-tensorflow-training-openvino-nncf-with-outp..> 31-Oct-2023 00:35               56298
+

+ diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output.rst b/docs/notebooks/301-tensorflow-training-openvino-with-output.rst index 6bb74ee2b89df4..657b6a1cd7bc48 100644 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output.rst +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output.rst @@ -1,40 +1,45 @@ From Training to Deployment with TensorFlow and OpenVINO™ ========================================================= - - -.. _top: - -**Table of contents**: - -- `TensorFlow Image Classification Training <#tensorflow-image-classification-training>`__ -- `Import TensorFlow and Other Libraries <#import-tensorflow-and-other-libraries>`__ -- `Download and Explore the Dataset <#download-and-explore-the-dataset>`__ -- `Load Using keras.preprocessing <#load-using-keras.preprocessing>`__ -- `Create a Dataset <#create-a-dataset>`__ -- `Visualize the Data <#visualize-the-data>`__ -- `Configure the Dataset for Performance <#configure-the-dataset-for-performance>`__ -- `Standardize the Data <#standardize-the-data>`__ -- `Create the Model <#create-the-model>`__ -- `Compile the Model <#compile-the-model>`__ -- `Model Summary <#model-summary>`__ -- `Train the Model <#train-the-model>`__ -- `Visualize Training Results <#visualize-training-results>`__ -- `Overfitting <#overfitting>`__ -- `Data Augmentation <#data-augmentation>`__ -- `Dropout <#dropout>`__ -- `Compile and Train the Model <#compile-and-train-the-model>`__ -- `Visualize Training Results <#visualize-training-results>`__ -- `Predict on New Data <#predict-on-new-data>`__ -- `Save the TensorFlow Model <#save-the-tensorflow-model>`__ -- `Convert the TensorFlow model with OpenVINO Model Optimizer <#convert-the-tensorflow-model-with-openvino-model-optimizer>`__ -- `Preprocessing Image Function <#preprocessing-image-function>`__ -- `OpenVINO Runtime Setup <#openvino-runtime-setup>`__ - - - `Select inference device <#select-inference-device>`__ - -- `Run the Inference Step <#run-the-inference-step>`__ -- `The Next Steps <#the-next-steps>`__ +**Table of contents:** + + +- `TensorFlow Image Classification + Training <#tensorflow-image-classification-training>`__ +- `Import TensorFlow and Other + Libraries <#import-tensorflow-and-other-libraries>`__ +- `Download and Explore the + Dataset <#download-and-explore-the-dataset>`__ +- `Load Using + keras.preprocessing <#load-using-keraspreprocessing>`__ +- `Create a Dataset <#create-a-dataset>`__ +- `Visualize the Data <#visualize-the-data>`__ +- `Configure the Dataset for + Performance <#configure-the-dataset-for-performance>`__ +- `Standardize the Data <#standardize-the-data>`__ +- `Create the Model <#create-the-model>`__ +- `Compile the Model <#compile-the-model>`__ +- `Model Summary <#model-summary>`__ +- `Train the Model <#train-the-model>`__ +- `Visualize Training Results <#visualize-training-results>`__ +- `Overfitting <#overfitting>`__ +- `Data Augmentation <#data-augmentation>`__ +- `Dropout <#dropout>`__ +- `Compile and Train the + Model <#compile-and-train-the-model>`__ +- `Visualize Training Results <#visualize-training-results>`__ +- `Predict on New Data <#predict-on-new-data>`__ +- `Save the TensorFlow Model <#save-the-tensorflow-model>`__ +- `Convert the TensorFlow model with OpenVINO Model Conversion + API <#convert-the-tensorflow-model-with-openvino-model-conversion-api>`__ +- `Preprocessing Image + Function <#preprocessing-image-function>`__ +- `OpenVINO Runtime Setup <#openvino-runtime-setup>`__ + + - `Select inference device <#select-inference-device>`__ + +- `Run the Inference Step <#run-the-inference-step>`__ +- `The Next Steps <#the-next-steps>`__ .. code:: ipython3 @@ -72,10 +77,20 @@ The ``flower_ir.bin`` and ``flower_ir.xml`` (pre-trained models) can be obtained by executing the code with ‘Runtime->Run All’ or the ``Ctrl+F9`` command. -TensorFlow Image Classification Training `⇑ <#top>`__ -############################################################################################################################### +.. code:: ipython3 + + %pip install -q "openvino>=2023.1.0" + + +.. parsed-literal:: + + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. +TensorFlow Image Classification Training +---------------------------------------------------------------------------------- + The first part of the tutorial shows how to classify images of flowers (based on the TensorFlow’s official tutorial). It creates an image classifier using a ``keras.Sequential`` model, and loads data using @@ -94,9 +109,8 @@ This tutorial follows a basic machine learning workflow: 4. Train the model 5. Test the model -Import TensorFlow and Other Libraries `⇑ <#top>`__ -############################################################################################################################### - +Import TensorFlow and Other Libraries +------------------------------------------------------------------------------- .. code:: ipython3 @@ -109,9 +123,7 @@ Import TensorFlow and Other Libraries `⇑ <#top>`__ import numpy as np import tensorflow as tf from PIL import Image - from openvino.runtime import Core - from openvino.tools import mo - from openvino.runtime import serialize + import openvino as ov from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.models import Sequential @@ -122,15 +134,14 @@ Import TensorFlow and Other Libraries `⇑ <#top>`__ .. parsed-literal:: - 2023-08-16 01:08:54.169184: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-16 01:08:54.203604: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-31 00:13:25.408072: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-31 00:13:25.442949: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-16 01:08:54.707315: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-31 00:13:25.953408: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -Download and Explore the Dataset `⇑ <#top>`__ -############################################################################################################################### - +Download and Explore the Dataset +-------------------------------------------------------------------------- This tutorial uses a dataset of about 3,700 photos of flowers. The dataset contains 5 sub-directories, one per class: @@ -175,7 +186,7 @@ Here are some roses: -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.png +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.png @@ -186,7 +197,7 @@ Here are some roses: -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.png +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.png @@ -200,7 +211,7 @@ And some tulips: -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.png +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.png @@ -211,13 +222,12 @@ And some tulips: -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.png - +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.png -Load Using keras.preprocessing `⇑ <#top>`__ -############################################################################################################################### +Load Using keras.preprocessing +------------------------------------------------------------------------ Let’s load these images off disk using the helpful `image_dataset_from_directory `__ @@ -227,9 +237,8 @@ also write your own data loading code from scratch by visiting the `load images `__ tutorial. -Create a Dataset `⇑ <#top>`__ -############################################################################################################################### - +Create a Dataset +---------------------------------------------------------- Define some parameters for the loader: @@ -261,7 +270,7 @@ Let’s use 80% of the images for training, and 20% for validation. .. parsed-literal:: - 2023-08-16 01:08:56.066599: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2023-10-31 00:13:27.260838: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... @@ -296,9 +305,8 @@ datasets. These correspond to the directory names in alphabetical order. ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'] -Visualize the Data `⇑ <#top>`__ -############################################################################################################################### - +Visualize the Data +------------------------------------------------------------ Here are the first 9 images from the training dataset. @@ -313,16 +321,8 @@ Here are the first 9 images from the training dataset. plt.axis("off") -.. parsed-literal:: - - 2023-08-16 01:08:56.428488: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - 2023-08-16 01:08:56.429092: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - - -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_28_1.png +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_29_0.png You will train a model using these datasets by passing them to @@ -343,14 +343,6 @@ over the dataset and retrieve batches of images: (32,) -.. parsed-literal:: - - 2023-08-16 01:08:56.917347: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - 2023-08-16 01:08:56.917776: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2936] - [[{{node Placeholder/_0}}]] - - The ``image_batch`` is a tensor of the shape ``(32, 180, 180, 3)``. This is a batch of 32 images of shape ``180x180x3`` (the last dimension refers to color channels RGB). The ``label_batch`` is a tensor of the @@ -359,9 +351,8 @@ shape ``(32,)``, these are corresponding labels to the 32 images. You can call ``.numpy()`` on the ``image_batch`` and ``labels_batch`` tensors to convert them to a ``numpy.ndarray``. -Configure the Dataset for Performance `⇑ <#top>`__ -############################################################################################################################### - +Configure the Dataset for Performance +------------------------------------------------------------------------------- Let’s make sure to use buffered prefetching so you can yield data from disk without having I/O become blocking. These are two important methods @@ -386,9 +377,8 @@ guide `__. train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE) val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) -Standardize the Data `⇑ <#top>`__ -############################################################################################################################### - +Standardize the Data +-------------------------------------------------------------- The RGB channel values are in the ``[0, 255]`` range. This is not ideal for a neural network; in general you should seek to make your input @@ -399,10 +389,8 @@ range by using a Rescaling layer. normalization_layer = layers.Rescaling(1./255) - -.. note:: - - The Keras Preprocessing utilities and layers introduced in this section are currently experimental and may change. +Note: The Keras Preprocessing utilities and layers introduced in this +section are currently experimental and may change. There are two ways to use this layer. You can apply it to the dataset by calling map: @@ -418,31 +406,20 @@ calling map: .. parsed-literal:: - 2023-08-16 01:08:57.116807: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2936] - [[{{node Placeholder/_0}}]] - 2023-08-16 01:08:57.117197: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2936] - [[{{node Placeholder/_0}}]] - - -.. parsed-literal:: - - 0.0 0.9891067 + 0.0 1.0 Or, you can include the layer inside your model definition, which can simplify deployment. Let’s use the second approach here. -.. note:: - - You previously resized images using the ``image_size`` argument of - ``image_dataset_from_directory``. If you want to include the resizing - logic in your model as well, you can use the - `Resizing `__ - layer. - -Create the Model `⇑ <#top>`__ -############################################################################################################################### +Note: you previously resized images using the ``image_size`` argument of +``image_dataset_from_directory``. If you want to include the resizing +logic in your model as well, you can use the +`Resizing `__ +layer. +Create the Model +---------------------------------------------------------- The model consists of three convolution blocks with a max pool layer in each of them. There’s a fully connected layer with 128 units on top of @@ -467,9 +444,8 @@ standard approach. layers.Dense(num_classes) ]) -Compile the Model `⇑ <#top>`__ -############################################################################################################################### - +Compile the Model +----------------------------------------------------------- For this tutorial, choose the ``optimizers.Adam`` optimizer and ``losses.SparseCategoricalCrossentropy`` loss function. To view training @@ -482,24 +458,20 @@ argument. loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) -Model Summary `⇑ <#top>`__ -############################################################################################################################### - +Model Summary +------------------------------------------------------- View all the layers of the network using the model’s ``summary`` method. -.. note:: - - This section is commented out for performance reasons. + **NOTE:** This section is commented out for performance reasons. Please feel free to uncomment these to compare the results. .. code:: ipython3 # model.summary() -Train the Model `⇑ <#top>`__ -############################################################################################################################### - +Train the Model +--------------------------------------------------------- .. code:: ipython3 @@ -510,9 +482,8 @@ Train the Model `⇑ <#top>`__ # epochs=epochs # ) -Visualize Training Results `⇑ <#top>`__ -############################################################################################################################### - +Visualize Training Results +-------------------------------------------------------------------- Create plots of loss and accuracy on the training and validation sets. @@ -547,9 +518,8 @@ accuracy on the validation set. Let’s look at what went wrong and try to increase the overall performance of the model. -Overfitting `⇑ <#top>`__ -############################################################################################################################### - +Overfitting +----------------------------------------------------- In the plots above, the training accuracy is increasing linearly over time, whereas validation accuracy stalls around 60% in the training @@ -567,9 +537,8 @@ There are multiple ways to fight overfitting in the training process. In this tutorial, you’ll use *data augmentation* and add *Dropout* to your model. -Data Augmentation `⇑ <#top>`__ -############################################################################################################################### - +Data Augmentation +----------------------------------------------------------- Overfitting generally occurs when there are a small number of training examples. `Data @@ -610,23 +579,14 @@ augmentation to the same image several times: plt.axis("off") -.. parsed-literal:: - - 2023-08-16 01:08:57.956457: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - 2023-08-16 01:08:57.956841: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - - -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_56_1.png +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_57_0.png You will use data augmentation to train a model in a moment. -Dropout `⇑ <#top>`__ -############################################################################################################################### - +Dropout +------------------------------------------------- Another technique to reduce overfitting is to introduce `Dropout `__ @@ -658,9 +618,8 @@ it using augmented images. layers.Dense(num_classes, name="outputs") ]) -Compile and Train the Model `⇑ <#top>`__ -############################################################################################################################### - +Compile and Train the Model +--------------------------------------------------------------------- .. code:: ipython3 @@ -685,18 +644,18 @@ Compile and Train the Model `⇑ <#top>`__ conv2d_3 (Conv2D) (None, 180, 180, 16) 448 - max_pooling2d_3 (MaxPooling (None, 90, 90, 16) 0 - 2D) + max_pooling2d_3 (MaxPoolin (None, 90, 90, 16) 0 + g2D) conv2d_4 (Conv2D) (None, 90, 90, 32) 4640 - max_pooling2d_4 (MaxPooling (None, 45, 45, 32) 0 - 2D) + max_pooling2d_4 (MaxPoolin (None, 45, 45, 32) 0 + g2D) conv2d_5 (Conv2D) (None, 45, 45, 64) 18496 - max_pooling2d_5 (MaxPooling (None, 22, 22, 64) 0 - 2D) + max_pooling2d_5 (MaxPoolin (None, 22, 22, 64) 0 + g2D) dropout (Dropout) (None, 22, 22, 64) 0 @@ -707,9 +666,9 @@ Compile and Train the Model `⇑ <#top>`__ outputs (Dense) (None, 5) 645 ================================================================= - Total params: 3,989,285 - Trainable params: 3,989,285 - Non-trainable params: 0 + Total params: 3989285 (15.22 MB) + Trainable params: 3989285 (15.22 MB) + Non-trainable params: 0 (0.00 Byte) _________________________________________________________________ @@ -726,64 +685,39 @@ Compile and Train the Model `⇑ <#top>`__ .. parsed-literal:: Epoch 1/15 - - -.. parsed-literal:: - - 2023-08-16 01:08:58.847518: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [2936] - [[{{node Placeholder/_0}}]] - 2023-08-16 01:08:58.847798: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2936] - [[{{node Placeholder/_4}}]] - - -.. parsed-literal:: - - 92/92 [==============================] - ETA: 0s - loss: 1.3880 - accuracy: 0.4196 - -.. parsed-literal:: - - 2023-08-16 01:09:05.080237: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [734] - [[{{node Placeholder/_0}}]] - 2023-08-16 01:09:05.080525: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [734] - [[{{node Placeholder/_4}}]] - - -.. parsed-literal:: - - 92/92 [==============================] - 7s 65ms/step - loss: 1.3880 - accuracy: 0.4196 - val_loss: 1.1062 - val_accuracy: 0.5313 + 92/92 [==============================] - 6s 60ms/step - loss: 1.2433 - accuracy: 0.4673 - val_loss: 1.1335 - val_accuracy: 0.5627 Epoch 2/15 - 92/92 [==============================] - 6s 63ms/step - loss: 1.0828 - accuracy: 0.5746 - val_loss: 0.9974 - val_accuracy: 0.5981 + 92/92 [==============================] - 5s 57ms/step - loss: 1.0251 - accuracy: 0.5974 - val_loss: 0.9890 - val_accuracy: 0.5995 Epoch 3/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.9947 - accuracy: 0.6015 - val_loss: 0.9455 - val_accuracy: 0.6267 + 92/92 [==============================] - 5s 57ms/step - loss: 0.9141 - accuracy: 0.6451 - val_loss: 0.8673 - val_accuracy: 0.6580 Epoch 4/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.9154 - accuracy: 0.6482 - val_loss: 0.8459 - val_accuracy: 0.6771 + 92/92 [==============================] - 5s 58ms/step - loss: 0.8439 - accuracy: 0.6829 - val_loss: 0.8107 - val_accuracy: 0.6798 Epoch 5/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.8525 - accuracy: 0.6812 - val_loss: 0.8378 - val_accuracy: 0.6717 + 92/92 [==============================] - 5s 57ms/step - loss: 0.7845 - accuracy: 0.6962 - val_loss: 0.8639 - val_accuracy: 0.6798 Epoch 6/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.8104 - accuracy: 0.6948 - val_loss: 0.8545 - val_accuracy: 0.6567 + 92/92 [==============================] - 5s 58ms/step - loss: 0.7458 - accuracy: 0.7231 - val_loss: 0.7516 - val_accuracy: 0.7125 Epoch 7/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.7598 - accuracy: 0.6999 - val_loss: 0.8096 - val_accuracy: 0.6921 + 92/92 [==============================] - 5s 57ms/step - loss: 0.7045 - accuracy: 0.7299 - val_loss: 0.7731 - val_accuracy: 0.7016 Epoch 8/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.7397 - accuracy: 0.7166 - val_loss: 0.8358 - val_accuracy: 0.6812 + 92/92 [==============================] - 5s 58ms/step - loss: 0.6876 - accuracy: 0.7265 - val_loss: 0.7341 - val_accuracy: 0.7153 Epoch 9/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.7121 - accuracy: 0.7333 - val_loss: 0.7644 - val_accuracy: 0.6880 + 92/92 [==============================] - 5s 57ms/step - loss: 0.6440 - accuracy: 0.7514 - val_loss: 0.7189 - val_accuracy: 0.7289 Epoch 10/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.6739 - accuracy: 0.7449 - val_loss: 0.7528 - val_accuracy: 0.7084 + 92/92 [==============================] - 5s 57ms/step - loss: 0.6063 - accuracy: 0.7660 - val_loss: 0.8212 - val_accuracy: 0.6975 Epoch 11/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.6442 - accuracy: 0.7568 - val_loss: 0.7190 - val_accuracy: 0.7207 + 92/92 [==============================] - 5s 57ms/step - loss: 0.5727 - accuracy: 0.7830 - val_loss: 0.7362 - val_accuracy: 0.7330 Epoch 12/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.6113 - accuracy: 0.7715 - val_loss: 0.7588 - val_accuracy: 0.7057 + 92/92 [==============================] - 5s 57ms/step - loss: 0.5634 - accuracy: 0.7888 - val_loss: 0.7458 - val_accuracy: 0.7153 Epoch 13/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.5751 - accuracy: 0.7800 - val_loss: 0.7641 - val_accuracy: 0.7112 + 92/92 [==============================] - 5s 58ms/step - loss: 0.5492 - accuracy: 0.7922 - val_loss: 0.7176 - val_accuracy: 0.7439 Epoch 14/15 - 92/92 [==============================] - 6s 64ms/step - loss: 0.5595 - accuracy: 0.7847 - val_loss: 0.6969 - val_accuracy: 0.7357 + 92/92 [==============================] - 5s 58ms/step - loss: 0.5193 - accuracy: 0.8025 - val_loss: 0.7529 - val_accuracy: 0.7371 Epoch 15/15 - 92/92 [==============================] - 6s 63ms/step - loss: 0.5338 - accuracy: 0.8001 - val_loss: 0.7533 - val_accuracy: 0.7193 - + 92/92 [==============================] - 5s 57ms/step - loss: 0.4890 - accuracy: 0.8123 - val_loss: 0.7434 - val_accuracy: 0.7302 -Visualize Training Results `⇑ <#top>`__ -############################################################################################################################### +Visualize Training Results +-------------------------------------------------------------------- After applying data augmentation and Dropout, there is less overfitting than before, and training and validation accuracy are closer aligned. @@ -814,20 +748,17 @@ than before, and training and validation accuracy are closer aligned. -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_65_0.png +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_66_0.png -Predict on New Data `⇑ <#top>`__ -############################################################################################################################### - +Predict on New Data +------------------------------------------------------------- Finally, let us use the model to classify an image that was not included in the training or validation sets. -.. note:: - - Data augmentation and Dropout layers are inactive at inference time. - + **Note**: Data augmentation and Dropout layers are inactive at + inference time. .. code:: ipython3 @@ -851,13 +782,12 @@ in the training or validation sets. .. parsed-literal:: - 1/1 [==============================] - 0s 71ms/step - This image most likely belongs to sunflowers with a 88.60 percent confidence. + 1/1 [==============================] - 0s 72ms/step + This image most likely belongs to sunflowers with a 99.00 percent confidence. -Save the TensorFlow Model `⇑ <#top>`__ -############################################################################################################################### - +Save the TensorFlow Model +------------------------------------------------------------------- .. code:: ipython3 @@ -868,47 +798,6 @@ Save the TensorFlow Model `⇑ <#top>`__ model.save(saved_model_dir) -.. parsed-literal:: - - 2023-08-16 01:10:28.122100: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'random_flip_input' with dtype float and shape [?,180,180,3] - [[{{node random_flip_input}}]] - 2023-08-16 01:10:28.230661: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.240529: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'random_flip_input' with dtype float and shape [?,180,180,3] - [[{{node random_flip_input}}]] - 2023-08-16 01:10:28.251530: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.258320: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.265208: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.275900: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.314815: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'sequential_1_input' with dtype float and shape [?,180,180,3] - [[{{node sequential_1_input}}]] - 2023-08-16 01:10:28.381415: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.401720: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'sequential_1_input' with dtype float and shape [?,180,180,3] - [[{{node sequential_1_input}}]] - 2023-08-16 01:10:28.440601: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,22,22,64] - [[{{node inputs}}]] - 2023-08-16 01:10:28.464020: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.537546: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.678691: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.815557: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,22,22,64] - [[{{node inputs}}]] - 2023-08-16 01:10:28.849161: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.877177: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - 2023-08-16 01:10:28.923274: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,180,180,3] - [[{{node inputs}}]] - WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _update_step_xla while saving (showing 4 of 4). These functions will not be directly callable after loading. - - .. parsed-literal:: INFO:tensorflow:Assets written to: model/flower/saved_model/assets @@ -919,24 +808,22 @@ Save the TensorFlow Model `⇑ <#top>`__ INFO:tensorflow:Assets written to: model/flower/saved_model/assets -Convert the TensorFlow model with OpenVINO Model Optimizer `⇑ <#top>`__ -############################################################################################################################### +Convert the TensorFlow model with OpenVINO Model Conversion API +--------------------------------------------------------------------------------------------------------- To convert the model to OpenVINO IR with ``FP16`` precision, use model -conversion Python API. For more information, see this -`page `__. +conversion Python API. .. code:: ipython3 # Convert the model to ir model format and save it. ir_model_path = Path("model/flower") ir_model_path.mkdir(parents=True, exist_ok=True) - ir_model = mo.convert_model(saved_model_dir=saved_model_dir, input_shape=[1,180,180,3], compress_to_fp16=True) - serialize(ir_model, str(ir_model_path / "flower_ir.xml")) - -Preprocessing Image Function `⇑ <#top>`__ -############################################################################################################################### + ir_model = ov.convert_model(saved_model_dir, input=[1,180,180,3]) + ov.save_model(ir_model, ir_model_path / "flower_ir.xml") +Preprocessing Image Function +---------------------------------------------------------------------- .. code:: ipython3 @@ -952,21 +839,20 @@ Preprocessing Image Function `⇑ <#top>`__ return input_image -OpenVINO Runtime Setup `⇑ <#top>`__ -############################################################################################################################### - +OpenVINO Runtime Setup +---------------------------------------------------------------- -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - core = Core() + # Initialize OpenVINO runtime + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], value='AUTO', @@ -989,8 +875,6 @@ Select device from dropdown list for running inference using OpenVINO: class_names=["daisy", "dandelion", "roses", "sunflowers", "tulips"] - # Initialize OpenVINO runtime - core = Core() compiled_model = core.compile_model(model=ir_model, device_name=device.value) del ir_model @@ -998,9 +882,8 @@ Select device from dropdown list for running inference using OpenVINO: input_layer = compiled_model.input(0) output_layer = compiled_model.output(0) -Run the Inference Step `⇑ <#top>`__ -############################################################################################################################### - +Run the Inference Step +---------------------------------------------------------------- .. code:: ipython3 @@ -1038,16 +921,15 @@ Run the Inference Step `⇑ <#top>`__ 'output/A_Close_Up_Photo_of_a_Dandelion.jpg' already exists. (1, 180, 180, 3) [1,180,180,3] - This image most likely belongs to dandelion with a 98.50 percent confidence. - + This image most likely belongs to dandelion with a 99.82 percent confidence. -.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_78_1.png +.. image:: 301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_79_1.png -The Next Steps `⇑ <#top>`__ -############################################################################################################################### +The Next Steps +-------------------------------------------------------- This tutorial showed how to train a TensorFlow model, how to convert that model to OpenVINO’s IR format, and how to do inference on the diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.jpg b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.jpg deleted file mode 100644 index 532ff55c1d94fc..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43949a084d6557772310458a3ff6a6921a4752faf0d74ac20fc81204efaf9434 -size 7042 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.png deleted file mode 100644 index 3ea370c52289f6..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_13_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a969ea86bf49ca484394adedc3bfc631e125c1c54472a37089ef3b094651e1cf -size 64525 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.jpg b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.jpg index 87ae42741c0fc7..532ff55c1d94fc 100644 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.jpg +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5c4ddb54a36fe095f708f3da0093643f629c4c45f80df539a24db47b849def9 -size 20653 +oid sha256:43949a084d6557772310458a3ff6a6921a4752faf0d74ac20fc81204efaf9434 +size 7042 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.png index b60e204aeb1f95..3ea370c52289f6 100644 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.png +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_14_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70ae783b513ce08e778cf53b0f0daea47c6032a737fa07330e66bcfd6f742943 -size 167334 +oid sha256:a969ea86bf49ca484394adedc3bfc631e125c1c54472a37089ef3b094651e1cf +size 64525 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.jpg b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.jpg new file mode 100644 index 00000000000000..87ae42741c0fc7 --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c4ddb54a36fe095f708f3da0093643f629c4c45f80df539a24db47b849def9 +size 20653 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.png new file mode 100644 index 00000000000000..b60e204aeb1f95 --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_15_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ae783b513ce08e778cf53b0f0daea47c6032a737fa07330e66bcfd6f742943 +size 167334 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.jpg b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.jpg deleted file mode 100644 index c398ad4d168401..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:744d21693b7566bd1dfeef72f10f6e208b40fed0778b08c031860d41324e6eb1 -size 15872 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.png deleted file mode 100644 index 7a1e3c16793d57..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_16_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5213720caf7341165b7dc44f2f492d93f14a1542d2cd39ffbb807a8938adfdf8 -size 225545 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.jpg b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.jpg index 4e33a6cc9f0f1c..c398ad4d168401 100644 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.jpg +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9fe90ab463c6396fe1a5cdc3e42168b4be4e3454a7695b93ac624b78fa2c5a8 -size 23154 +oid sha256:744d21693b7566bd1dfeef72f10f6e208b40fed0778b08c031860d41324e6eb1 +size 15872 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.png index f43f12f10342a0..7a1e3c16793d57 100644 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.png +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_17_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd366a9bac123cbd99f91c71c19ebd2f23816b6a48f93b34e185163f7bd52cb9 -size 154227 +oid sha256:5213720caf7341165b7dc44f2f492d93f14a1542d2cd39ffbb807a8938adfdf8 +size 225545 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.jpg b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.jpg new file mode 100644 index 00000000000000..4e33a6cc9f0f1c --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9fe90ab463c6396fe1a5cdc3e42168b4be4e3454a7695b93ac624b78fa2c5a8 +size 23154 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.png new file mode 100644 index 00000000000000..f43f12f10342a0 --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_18_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd366a9bac123cbd99f91c71c19ebd2f23816b6a48f93b34e185163f7bd52cb9 +size 154227 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_28_1.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_29_0.png similarity index 100% rename from docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_28_1.png rename to docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_29_0.png diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_56_1.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_56_1.png deleted file mode 100644 index 120bd04601b75c..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_56_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d3c1e81c182d56bc4a02c606b3599e6e93dce43deb38f42554c0b42d1db83bf6 -size 360658 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_57_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_57_0.png new file mode 100644 index 00000000000000..164d702a3f1841 --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_57_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6740b40acf1f7bb69f36d2628bf714cec62f93554bc756f9ffabc321867d179 +size 530251 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_65_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_65_0.png deleted file mode 100644 index 4467d51e6ec7ef..00000000000000 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_65_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:54d4971bb60ae07f689a9f0a9c9938c0ba9ae7610cf75a03358e78e8d4afa002 -size 55759 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_66_0.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_66_0.png new file mode 100644 index 00000000000000..46b74805f68f73 --- /dev/null +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_66_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2105df5b195b2a9257d53c0dac7350af567654ada65c25d535ffced47e9ede16 +size 57480 diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_78_1.png b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_79_1.png similarity index 100% rename from docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_78_1.png rename to docs/notebooks/301-tensorflow-training-openvino-with-output_files/301-tensorflow-training-openvino-with-output_79_1.png diff --git a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/index.html b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/index.html index e260abcfe3cd14..880a6d8aaddb53 100644 --- a/docs/notebooks/301-tensorflow-training-openvino-with-output_files/index.html +++ b/docs/notebooks/301-tensorflow-training-openvino-with-output_files/index.html @@ -1,18 +1,18 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/301-tensorflow-training-openvino-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/301-tensorflow-training-openvino-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/301-tensorflow-training-openvino-with-output_files/


../
-301-tensorflow-training-openvino-with-output_13..> 16-Aug-2023 01:31                7042
-301-tensorflow-training-openvino-with-output_13..> 16-Aug-2023 01:31               64525
-301-tensorflow-training-openvino-with-output_14..> 16-Aug-2023 01:31               20653
-301-tensorflow-training-openvino-with-output_14..> 16-Aug-2023 01:31              167334
-301-tensorflow-training-openvino-with-output_16..> 16-Aug-2023 01:31               15872
-301-tensorflow-training-openvino-with-output_16..> 16-Aug-2023 01:31              225545
-301-tensorflow-training-openvino-with-output_17..> 16-Aug-2023 01:31               23154
-301-tensorflow-training-openvino-with-output_17..> 16-Aug-2023 01:31              154227
-301-tensorflow-training-openvino-with-output_28..> 16-Aug-2023 01:31              941151
-301-tensorflow-training-openvino-with-output_56..> 16-Aug-2023 01:31              360658
-301-tensorflow-training-openvino-with-output_65..> 16-Aug-2023 01:31               55759
-301-tensorflow-training-openvino-with-output_78..> 16-Aug-2023 01:31              143412
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/301-tensorflow-training-openvino-with-output_files/


../
+301-tensorflow-training-openvino-with-output_14..> 31-Oct-2023 00:35                7042
+301-tensorflow-training-openvino-with-output_14..> 31-Oct-2023 00:35               64525
+301-tensorflow-training-openvino-with-output_15..> 31-Oct-2023 00:35               20653
+301-tensorflow-training-openvino-with-output_15..> 31-Oct-2023 00:35              167334
+301-tensorflow-training-openvino-with-output_17..> 31-Oct-2023 00:35               15872
+301-tensorflow-training-openvino-with-output_17..> 31-Oct-2023 00:35              225545
+301-tensorflow-training-openvino-with-output_18..> 31-Oct-2023 00:35               23154
+301-tensorflow-training-openvino-with-output_18..> 31-Oct-2023 00:35              154227
+301-tensorflow-training-openvino-with-output_29..> 31-Oct-2023 00:35              941151
+301-tensorflow-training-openvino-with-output_57..> 31-Oct-2023 00:35              530251
+301-tensorflow-training-openvino-with-output_66..> 31-Oct-2023 00:35               57480
+301-tensorflow-training-openvino-with-output_79..> 31-Oct-2023 00:35              143412
 

diff --git a/docs/notebooks/302-pytorch-quantization-aware-training-with-output.rst b/docs/notebooks/302-pytorch-quantization-aware-training-with-output.rst index 0448bcbd2c1793..1d30ba14ae86e6 100644 --- a/docs/notebooks/302-pytorch-quantization-aware-training-with-output.rst +++ b/docs/notebooks/302-pytorch-quantization-aware-training-with-output.rst @@ -1,8 +1,6 @@ Quantization Aware Training with NNCF, using PyTorch framework ============================================================== - - This notebook is based on `ImageNet training in PyTorch `__. @@ -14,8 +12,7 @@ optimization process contains the following steps: - Transforming the original ``FP32`` model to ``INT8`` - Using fine-tuning to restore the accuracy. -- Exporting optimized and original models to ONNX and then to OpenVINO - IR +- Exporting optimized and original models to OpenVINO IR - Measuring and comparing the performance of models. For more advanced usage, refer to these @@ -29,43 +26,55 @@ notebook. Using the smaller model and dataset will speed up training and download time. To see other ResNet models, visit `PyTorch hub `__. -.. note:: + **NOTE**: This notebook requires a C++ compiler. + +**Table of contents:** - This notebook requires a C++ compiler. +- `Imports and Settings <#imports-and-settings>`__ +- `Pre-train Floating-Point + Model <#pre-train-floating-point-model>`__ -.. _top: + - `Train Function <#train-function>`__ + - `Validate Function <#validate-function>`__ + - `Helpers <#helpers>`__ + - `Get a Pre-trained FP32 + Model <#get-a-pre-trained-fp-model>`__ + +- `Create and Initialize + Quantization <#create-and-initialize-quantization>`__ +- `Fine-tune the Compressed + Model <#fine-tune-the-compressed-model>`__ +- `Export INT8 Model to OpenVINO + IR <#export-int-model-to-openvino-ir>`__ +- `Benchmark Model Performance by Computing Inference + Time <#benchmark-model-performance-by-computing-inference-time>`__ + +.. code:: ipython3 -**Table of contents**: + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "openvino>=2023.1.0" "torch" "torchvision" + %pip install -q "nncf>=2.6.0" -- `Imports and Settings <#imports-and-settings>`__ -- `Pre-train Floating-Point Model <#pre-train-floating-point-model>`__ - - `Train Function <#train-function>`__ - - `Validate Function <#validate-function>`__ - - `Helpers <#helpers>`__ - - `Get a Pre-trained FP32 Model <#get-a-pre-trained-fp32-model>`__ +.. parsed-literal:: -- `Create and Initialize Quantization <#create-and-initialize-quantization>`__ -- `Fine-tune the Compressed Model <#fine-tune-the-compressed-model>`__ -- `Export INT8 Model to ONNX <#export-int8-model-to-onnx>`__ -- `Convert ONNX models to OpenVINO Intermediate Representation (IR) <#convert-onnx-models-to-openvino-intermediate-representation-ir>`__ -- `Benchmark Model Performance by Computing Inference Time <#benchmark-model-performance-by-computing-inference-time>`__ + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. -Imports and Settings `⇑ <#top>`__ -############################################################################################################################### +Imports and Settings +-------------------------------------------------------------- On Windows, add the required C++ directories to the system PATH. Import NNCF and all auxiliary packages from your Python code. Set a name for the model, and the image width and height that will be used for the -network. Also define paths where PyTorch, ONNX and OpenVINO IR versions -of the models will be stored. - -.. note:: +network. Also define paths where PyTorch and OpenVINO IR versions of the +models will be stored. - All NNCF logging messages below ERROR level (INFO and + **NOTE**: All NNCF logging messages below ERROR level (INFO and WARNING) are disabled to simplify the tutorial. For production use, it is recommended to enable logging by removing ``set_log_level(logging.ERROR)``. @@ -110,7 +119,7 @@ of the models will be stored. import sys import time - import warnings # To disable warnings on export to ONNX. + import warnings # To disable warnings on export model import zipfile from pathlib import Path import logging @@ -131,8 +140,7 @@ of the models will be stored. set_log_level(logging.ERROR) # Disables all NNCF info and warning messages. from nncf import NNCFConfig from nncf.torch import create_compressed_model, register_default_init_args - from openvino.runtime import Core, serialize - from openvino.tools import mo + import openvino as ov from torch.jit import TracerWarning sys.path.append("../utils") @@ -152,12 +160,10 @@ of the models will be stored. MODEL_DIR.mkdir(exist_ok=True) DATA_DIR.mkdir(exist_ok=True) - # Paths where PyTorch, ONNX and OpenVINO IR models will be stored. + # Paths where PyTorch and OpenVINO IR models will be stored. fp32_pth_path = Path(MODEL_DIR / (BASE_MODEL_NAME + "_fp32")).with_suffix(".pth") - fp32_onnx_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + "_fp32")).with_suffix(".onnx") - fp32_ir_path = fp32_onnx_path.with_suffix(".xml") - int8_onnx_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + "_int8")).with_suffix(".onnx") - int8_ir_path = int8_onnx_path.with_suffix(".xml") + fp32_ir_path = fp32_pth_path.with_suffix(".xml") + int8_ir_path = Path(MODEL_DIR / (BASE_MODEL_NAME + "_int8")).with_suffix(".xml") # It is possible to train FP32 model from scratch, but it might be slow. Therefore, the pre-trained weights are downloaded by default. pretrained_on_tiny_imagenet = True @@ -165,14 +171,6 @@ of the models will be stored. download_file(fp32_pth_url, directory=MODEL_DIR, filename=fp32_pth_path.name) -.. parsed-literal:: - - 2023-08-16 01:10:37.605341: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-16 01:10:37.639047: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-16 01:10:38.206632: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -198,7 +196,7 @@ of the models will be stored. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/302-pytorch-quantization-aware-training/model/resnet18_fp32.pth') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/302-pytorch-quantization-aware-training/model/resnet18_fp32.pth') @@ -257,11 +255,11 @@ Download Tiny ImageNet dataset Successfully downloaded and prepared dataset at: data/tiny-imagenet-200 -Pre-train Floating-Point Model `⇑ <#top>`__ -############################################################################################################################### +Pre-train Floating-Point Model +------------------------------------------------------------------------ -Using NNCF for model compression assumes that a pre-trained model and a training pipeline are -already in use. +Using NNCF for model compression assumes that a pre-trained model and a +training pipeline are already in use. This tutorial demonstrates one possible training pipeline: a ResNet-18 model pre-trained on 1000 classes from ImageNet is fine-tuned with 200 @@ -270,9 +268,8 @@ classes from Tiny-ImageNet. Subsequently, the training and validation functions will be reused as is for quantization-aware training. -Train Function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Train Function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -316,9 +313,8 @@ Train Function `⇑ <#top>`__ if i % print_frequency == 0: progress.display(i) -Validate Function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Validate Function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -359,9 +355,8 @@ Validate Function `⇑ <#top>`__ print(" * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5)) return top1.avg -Helpers `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Helpers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -423,9 +418,8 @@ Helpers `⇑ <#top>`__ res.append(correct_k.mul_(100.0 / batch_size)) return res -Get a Pre-trained FP32 Model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Get a Pre-trained FP32 Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ А pre-trained floating-point model is a prerequisite for quantization. It can be obtained by tuning from scratch with the code below. However, @@ -491,9 +485,9 @@ section at the top of this notebook. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`. warnings.warn(msg) @@ -534,25 +528,25 @@ section at the top of this notebook. Accuracy of FP32 model: 55.520 -Export the ``FP32`` model to ONNX, which is supported by OpenVINO™ -Toolkit, to benchmark it in comparison with the ``INT8`` model. +Export the ``FP32`` model to OpenVINO™ Intermediate Representation, to +benchmark it in comparison with the ``INT8`` model. .. code:: ipython3 dummy_input = torch.randn(1, 3, image_size, image_size).to(device) - torch.onnx.export(model, dummy_input, fp32_onnx_path) - print(f"FP32 ONNX model was exported to {fp32_onnx_path}.") + ov_model = ov.convert_model(model, example_input=dummy_input, input=[1, 3, image_size, image_size]) + ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) + print(f"FP32 model was exported to {fp32_ir_path}.") .. parsed-literal:: - FP32 ONNX model was exported to output/resnet18_fp32.onnx. - + FP32 model was exported to model/resnet18_fp32.xml. -Create and Initialize Quantization `⇑ <#top>`__ -############################################################################################################################### +Create and Initialize Quantization +---------------------------------------------------------------------------- NNCF enables compression-aware training by integrating into regular training pipelines. The framework is designed so that modifications to @@ -587,6 +581,15 @@ scenario and requires only 3 modifications. compression_ctrl, model = create_compressed_model(model, nncf_config) + +.. parsed-literal:: + + 2023-10-31 00:15:43.733728: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-31 00:15:43.767038: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2023-10-31 00:15:44.314131: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + Evaluate the new model on the validation set after initialization of quantization. The accuracy should be close to the accuracy of the floating-point ``FP32`` model for a simple case like the one being @@ -600,21 +603,20 @@ demonstrated here. .. parsed-literal:: - Test: [ 0/79] Time 0.161 (0.161) Loss 0.981 (0.981) Acc@1 78.91 (78.91) Acc@5 89.84 (89.84) - Test: [10/79] Time 0.145 (0.152) Loss 1.905 (1.623) Acc@1 46.88 (60.51) Acc@5 82.03 (84.09) - Test: [20/79] Time 0.149 (0.150) Loss 1.734 (1.692) Acc@1 63.28 (58.63) Acc@5 79.69 (83.04) - Test: [30/79] Time 0.148 (0.150) Loss 2.282 (1.781) Acc@1 50.00 (57.31) Acc@5 69.53 (81.50) - Test: [40/79] Time 0.148 (0.150) Loss 1.540 (1.825) Acc@1 62.50 (55.83) Acc@5 85.94 (80.96) - Test: [50/79] Time 0.146 (0.150) Loss 1.972 (1.820) Acc@1 57.03 (56.05) Acc@5 75.00 (80.73) - Test: [60/79] Time 0.147 (0.150) Loss 1.731 (1.846) Acc@1 57.81 (55.51) Acc@5 85.16 (80.21) - Test: [70/79] Time 0.151 (0.150) Loss 2.412 (1.872) Acc@1 47.66 (55.15) Acc@5 71.88 (79.61) + Test: [ 0/79] Time 0.168 (0.168) Loss 0.981 (0.981) Acc@1 78.91 (78.91) Acc@5 89.84 (89.84) + Test: [10/79] Time 0.156 (0.165) Loss 1.905 (1.623) Acc@1 46.88 (60.51) Acc@5 82.03 (84.09) + Test: [20/79] Time 0.155 (0.160) Loss 1.734 (1.692) Acc@1 63.28 (58.63) Acc@5 79.69 (83.04) + Test: [30/79] Time 0.154 (0.157) Loss 2.282 (1.781) Acc@1 50.00 (57.31) Acc@5 69.53 (81.50) + Test: [40/79] Time 0.152 (0.156) Loss 1.540 (1.825) Acc@1 62.50 (55.83) Acc@5 85.94 (80.96) + Test: [50/79] Time 0.152 (0.156) Loss 1.972 (1.820) Acc@1 57.03 (56.05) Acc@5 75.00 (80.73) + Test: [60/79] Time 0.152 (0.156) Loss 1.731 (1.846) Acc@1 57.81 (55.51) Acc@5 85.16 (80.21) + Test: [70/79] Time 0.154 (0.155) Loss 2.412 (1.872) Acc@1 47.66 (55.15) Acc@5 71.88 (79.61) * Acc@1 55.540 Acc@5 80.200 Accuracy of initialized INT8 model: 55.540 -Fine-tune the Compressed Model `⇑ <#top>`__ -############################################################################################################################### - +Fine-tune the Compressed Model +------------------------------------------------------------------------ At this step, a regular fine-tuning process is applied to further improve quantized model accuracy. Normally, several epochs of tuning are @@ -639,109 +641,67 @@ training pipeline are required. Here is a simple example. .. parsed-literal:: - Epoch:[0][ 0/782] Time 0.391 (0.391) Loss 0.740 (0.740) Acc@1 84.38 (84.38) Acc@5 96.88 (96.88) - Epoch:[0][ 50/782] Time 0.387 (0.383) Loss 0.911 (0.802) Acc@1 78.91 (80.15) Acc@5 92.97 (94.42) - Epoch:[0][100/782] Time 0.387 (0.384) Loss 0.631 (0.798) Acc@1 84.38 (80.24) Acc@5 95.31 (94.38) - Epoch:[0][150/782] Time 0.377 (0.383) Loss 0.836 (0.792) Acc@1 80.47 (80.48) Acc@5 94.53 (94.43) - Epoch:[0][200/782] Time 0.431 (0.385) Loss 0.873 (0.780) Acc@1 75.00 (80.65) Acc@5 94.53 (94.59) + Epoch:[0][ 0/782] Time 0.412 (0.412) Loss 0.740 (0.740) Acc@1 84.38 (84.38) Acc@5 96.88 (96.88) + Epoch:[0][ 50/782] Time 0.383 (0.387) Loss 0.911 (0.802) Acc@1 78.91 (80.15) Acc@5 92.97 (94.42) + Epoch:[0][100/782] Time 0.387 (0.388) Loss 0.631 (0.798) Acc@1 84.38 (80.24) Acc@5 95.31 (94.38) + Epoch:[0][150/782] Time 0.381 (0.388) Loss 0.836 (0.792) Acc@1 80.47 (80.48) Acc@5 94.53 (94.43) + Epoch:[0][200/782] Time 0.369 (0.386) Loss 0.873 (0.780) Acc@1 75.00 (80.65) Acc@5 94.53 (94.59) Epoch:[0][250/782] Time 0.385 (0.386) Loss 0.735 (0.778) Acc@1 84.38 (80.77) Acc@5 95.31 (94.53) - Epoch:[0][300/782] Time 0.411 (0.386) Loss 0.615 (0.771) Acc@1 85.16 (80.99) Acc@5 97.66 (94.58) - Epoch:[0][350/782] Time 0.386 (0.386) Loss 0.599 (0.767) Acc@1 85.16 (81.14) Acc@5 95.31 (94.58) - Epoch:[0][400/782] Time 0.385 (0.386) Loss 0.798 (0.765) Acc@1 82.03 (81.21) Acc@5 92.97 (94.56) - Epoch:[0][450/782] Time 0.432 (0.386) Loss 0.630 (0.762) Acc@1 85.16 (81.26) Acc@5 96.88 (94.58) - Epoch:[0][500/782] Time 0.397 (0.386) Loss 0.633 (0.757) Acc@1 85.94 (81.45) Acc@5 96.88 (94.63) - Epoch:[0][550/782] Time 0.383 (0.387) Loss 0.749 (0.755) Acc@1 82.03 (81.49) Acc@5 92.97 (94.65) - Epoch:[0][600/782] Time 0.394 (0.387) Loss 0.927 (0.753) Acc@1 78.12 (81.53) Acc@5 88.28 (94.67) - Epoch:[0][650/782] Time 0.384 (0.387) Loss 0.645 (0.749) Acc@1 84.38 (81.60) Acc@5 95.31 (94.71) - Epoch:[0][700/782] Time 0.383 (0.387) Loss 0.816 (0.749) Acc@1 82.03 (81.62) Acc@5 91.41 (94.69) - Epoch:[0][750/782] Time 0.385 (0.387) Loss 0.811 (0.746) Acc@1 80.47 (81.69) Acc@5 94.53 (94.72) - Test: [ 0/79] Time 0.189 (0.189) Loss 1.092 (1.092) Acc@1 75.00 (75.00) Acc@5 86.72 (86.72) - Test: [10/79] Time 0.145 (0.154) Loss 1.917 (1.526) Acc@1 48.44 (62.64) Acc@5 78.12 (83.88) - Test: [20/79] Time 0.144 (0.149) Loss 1.631 (1.602) Acc@1 64.06 (60.68) Acc@5 81.25 (83.71) - Test: [30/79] Time 0.145 (0.148) Loss 2.037 (1.691) Acc@1 57.81 (59.25) Acc@5 71.09 (82.23) - Test: [40/79] Time 0.144 (0.147) Loss 1.563 (1.743) Acc@1 64.84 (58.02) Acc@5 82.81 (81.33) - Test: [50/79] Time 0.146 (0.147) Loss 1.926 (1.750) Acc@1 52.34 (57.77) Acc@5 76.56 (81.04) - Test: [60/79] Time 0.144 (0.146) Loss 1.559 (1.781) Acc@1 67.19 (57.24) Acc@5 84.38 (80.58) - Test: [70/79] Time 0.144 (0.146) Loss 2.353 (1.806) Acc@1 46.88 (56.81) Acc@5 72.66 (80.08) + Epoch:[0][300/782] Time 0.368 (0.386) Loss 0.615 (0.771) Acc@1 85.16 (80.99) Acc@5 97.66 (94.58) + Epoch:[0][350/782] Time 0.392 (0.386) Loss 0.599 (0.767) Acc@1 85.16 (81.14) Acc@5 95.31 (94.58) + Epoch:[0][400/782] Time 0.382 (0.386) Loss 0.798 (0.765) Acc@1 82.03 (81.21) Acc@5 92.97 (94.56) + Epoch:[0][450/782] Time 0.377 (0.386) Loss 0.630 (0.762) Acc@1 85.16 (81.26) Acc@5 96.88 (94.58) + Epoch:[0][500/782] Time 0.367 (0.386) Loss 0.633 (0.757) Acc@1 85.94 (81.45) Acc@5 96.88 (94.63) + Epoch:[0][550/782] Time 0.406 (0.386) Loss 0.749 (0.755) Acc@1 82.03 (81.49) Acc@5 92.97 (94.65) + Epoch:[0][600/782] Time 0.397 (0.385) Loss 0.927 (0.753) Acc@1 78.12 (81.53) Acc@5 88.28 (94.67) + Epoch:[0][650/782] Time 0.392 (0.385) Loss 0.645 (0.749) Acc@1 84.38 (81.60) Acc@5 95.31 (94.71) + Epoch:[0][700/782] Time 0.399 (0.386) Loss 0.816 (0.749) Acc@1 82.03 (81.62) Acc@5 91.41 (94.69) + Epoch:[0][750/782] Time 0.404 (0.386) Loss 0.811 (0.746) Acc@1 80.47 (81.69) Acc@5 94.53 (94.72) + Test: [ 0/79] Time 0.166 (0.166) Loss 1.092 (1.092) Acc@1 75.00 (75.00) Acc@5 86.72 (86.72) + Test: [10/79] Time 0.150 (0.140) Loss 1.917 (1.526) Acc@1 48.44 (62.64) Acc@5 78.12 (83.88) + Test: [20/79] Time 0.137 (0.138) Loss 1.631 (1.602) Acc@1 64.06 (60.68) Acc@5 81.25 (83.71) + Test: [30/79] Time 0.136 (0.138) Loss 2.037 (1.691) Acc@1 57.81 (59.25) Acc@5 71.09 (82.23) + Test: [40/79] Time 0.134 (0.137) Loss 1.563 (1.743) Acc@1 64.84 (58.02) Acc@5 82.81 (81.33) + Test: [50/79] Time 0.137 (0.137) Loss 1.926 (1.750) Acc@1 52.34 (57.77) Acc@5 76.56 (81.04) + Test: [60/79] Time 0.137 (0.137) Loss 1.559 (1.781) Acc@1 67.19 (57.24) Acc@5 84.38 (80.58) + Test: [70/79] Time 0.137 (0.137) Loss 2.353 (1.806) Acc@1 46.88 (56.81) Acc@5 72.66 (80.08) * Acc@1 57.320 Acc@5 80.730 Accuracy of tuned INT8 model: 57.320 Accuracy drop of tuned INT8 model over pre-trained FP32 model: -1.800 -Export INT8 Model to ONNX `⇑ <#top>`__ -############################################################################################################################### - +Export INT8 Model to OpenVINO IR +-------------------------------------------------------------------------- .. code:: ipython3 - if not int8_onnx_path.exists(): + if not int8_ir_path.exists(): warnings.filterwarnings("ignore", category=TracerWarning) warnings.filterwarnings("ignore", category=UserWarning) - # Export INT8 model to ONNX that is supported by OpenVINO™ Toolkit - compression_ctrl.export_model(int8_onnx_path) - print(f"INT8 ONNX model exported to {int8_onnx_path}.") - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/quantize_functions.py:140: FutureWarning: 'torch.onnx._patch_torch._graph_op' is deprecated in version 1.13 and will be removed in version 1.14. Please note 'g.op()' is to be removed from torch.Graph. Please open a GitHub issue if you need this functionality.. - output = g.op( + # Export INT8 model to OpenVINO™ IR + ov_model = ov.convert_model(model, example_input=dummy_input, input=[1, 3, image_size, image_size]) + ov.save_model(ov_model, int8_ir_path) + print(f"INT8 Omodel exported to {int8_ir_path}.") .. parsed-literal:: - INT8 ONNX model exported to output/resnet18_int8.onnx. - + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + INT8 Omodel exported to model/resnet18_int8.xml. -Convert ONNX models to OpenVINO Intermediate Representation (IR). `⇑ <#top>`__ -############################################################################################################################### - -Use model conversion Python API to convert the ONNX model to OpenVINO -IR, with ``FP16`` precision. Then, add the mean values to the model and -scale the input with the standard deviation by the ``mean_values`` and -``scale_values`` parameters. It is not necessary to normalize input data -before propagating it through the network with these options. - -For more information about model conversion, see this -`page `__. - -.. code:: ipython3 - - if not fp32_ir_path.exists(): - model = mo.convert_model( - input_model=fp32_onnx_path, - input_shape=[1, 3, image_size, image_size], - mean_values=[123.675, 116.28, 103.53], - scale_values=[58.395, 57.12, 57.375], - compress_to_fp16=True, - ) - serialize(model, str(fp32_ir_path)) - -.. code:: ipython3 - - if not int8_ir_path.exists(): - model = mo.convert_model( - input_model=int8_onnx_path, - input_shape=[1, 3, image_size, image_size], - compress_to_fp16=True, - ) - serialize(model, str(int8_ir_path)) - -Benchmark Model Performance by Computing Inference Time `⇑ <#top>`__ -############################################################################################################################### +Benchmark Model Performance by Computing Inference Time +------------------------------------------------------------------------------------------------- Finally, measure the inference performance of the ``FP32`` and ``INT8`` models, using `Benchmark -Tool `__ +Tool `__ - inference performance measurement tool in OpenVINO. By default, Benchmark Tool runs inference for 60 seconds in asynchronous mode on CPU. It returns inference speed as latency (milliseconds per image) and throughput (frames per second) values. -.. note:: - - This notebook runs ``benchmark_app`` for 15 seconds to give + **NOTE**: This notebook runs ``benchmark_app`` for 15 seconds to give a quick indication of performance. For more accurate performance, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run @@ -750,7 +710,6 @@ throughput (frames per second) values. ``benchmark_app --help`` to see an overview of all command-line options. - .. code:: ipython3 def parse_benchmark_output(benchmark_output): @@ -770,16 +729,16 @@ throughput (frames per second) values. .. parsed-literal:: Benchmark FP32 model (IR) - [ INFO ] Throughput: 2896.36 FPS + [ INFO ] Throughput: 2952.65 FPS Benchmark INT8 model (IR) - [ INFO ] Throughput: 12326.44 FPS + [ INFO ] Throughput: 11986.34 FPS Show CPU Information for reference. .. code:: ipython3 - ie = Core() + ie = ov.Core() ie.get_property("CPU", "FULL_DEVICE_NAME") diff --git a/docs/notebooks/305-tensorflow-quantization-aware-training-with-output.rst b/docs/notebooks/305-tensorflow-quantization-aware-training-with-output.rst index 7d6e7934675d69..05108191ba577b 100644 --- a/docs/notebooks/305-tensorflow-quantization-aware-training-with-output.rst +++ b/docs/notebooks/305-tensorflow-quantization-aware-training-with-output.rst @@ -1,8 +1,6 @@ Quantization Aware Training with NNCF, using TensorFlow Framework ================================================================= - - The goal of this notebook to demonstrate how to use the Neural Network Compression Framework `NNCF `__ 8-bit quantization to optimize a TensorFlow model for inference with @@ -23,47 +21,62 @@ Imagenette is a subset of 10 easily classified classes from the ImageNet dataset. Using the smaller model and dataset will speed up training and download time. -.. _top: - -**Table of contents**: - -- `Imports and Settings <#imports-and-settings>`__ -- `Dataset Preprocessing <#dataset-preprocessing>`__ -- `Define a Floating-Point Model <#define-a-floating-point-model>`__ -- `Pre-train a Floating-Point Model <#pre-train-a-floating-point-model>`__ -- `Create and Initialize Quantization <#create-and-initialize-quantization>`__ -- `Fine-tune the Compressed Model <#fine-tune-the-compressed-model>`__ -- `Export Models to OpenVINO Intermediate Representation (IR) <#export-models-to-openvino-intermediate-representation-ir>`__ -- `Benchmark Model Performance by Computing Inference Time <#benchmark-model-performance-by-computing-inference-time>`__ - -Imports and Settings `⇑ <#top>`__ -############################################################################################################################### - -Import NNCF and all auxiliary packages from your Python code. Set a name for the model, input image -size, used batch size, and the learning rate. Also, define paths where -Frozen Graph and OpenVINO IR versions of the models will be stored. - -.. note:: - - All NNCF logging messages below ERROR level (INFO and +**Table of contents:** + + +- `Imports and Settings <#imports-and-settings>`__ +- `Dataset Preprocessing <#dataset-preprocessing>`__ +- `Define a Floating-Point + Model <#define-a-floating-point-model>`__ +- `Pre-train a Floating-Point + Model <#pre-train-a-floating-point-model>`__ +- `Create and Initialize + Quantization <#create-and-initialize-quantization>`__ +- `Fine-tune the Compressed + Model <#fine-tune-the-compressed-model>`__ +- `Export Models to OpenVINO Intermediate Representation + (IR) <#export-models-to-openvino-intermediate-representation-ir>`__ +- `Benchmark Model Performance by Computing Inference + Time <#benchmark-model-performance-by-computing-inference-time>`__ + +Imports and Settings +-------------------------------------------------------------- + +Import NNCF and all auxiliary packages from your Python code. Set a name +for the model, input image size, used batch size, and the learning rate. +Also, define paths where Frozen Graph and OpenVINO IR versions of the +models will be stored. + + **NOTE**: All NNCF logging messages below ERROR level (INFO and WARNING) are disabled to simplify the tutorial. For production use, it is recommended to enable logging by removing ``set_log_level(logging.ERROR)``. - .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" "nncf>=2.5.0" - !pip install -q "tensorflow-datasets>=4.8.0" + import sys + import importlib.util + + %pip install -q "openvino>=2023.1.0" "nncf>=2.5.0" + if sys.platform == "win32": + if importlib.util.find_spec("tensorflow_datasets"): + %pip uninstall -q tensorflow-datasets + %pip install -q --upgrade "tfds-nightly" + else: + %pip install -q "tensorflow-datasets>=4.8.0" .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + onnxconverter-common 1.14.0 requires protobuf==3.20.2, but you have protobuf 3.20.3 which is incompatible. pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 3.20.3 which is incompatible. - + tensorflow 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.8.0 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -79,8 +92,7 @@ Frozen Graph and OpenVINO IR versions of the models will be stored. from nncf.tensorflow.helpers.model_creation import create_compressed_model from nncf.tensorflow.initialization import register_default_init_args from nncf.common.logging.logger import set_log_level - from openvino.runtime import serialize - from openvino.tools import mo + import openvino as ov set_log_level(logging.ERROR) @@ -112,25 +124,24 @@ Frozen Graph and OpenVINO IR versions of the models will be stored. .. parsed-literal:: - 2023-08-16 01:17:34.103410: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2023-08-16 01:17:34.137361: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2023-10-31 00:22:02.092134: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2023-10-31 00:22:02.126560: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2023-08-16 01:17:34.726614: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2023-10-31 00:22:02.723114: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - WARNING:nncf:NNCF provides best results with tensorflow==2.11.*, while current tensorflow version is 2.12.0. If you encounter issues, consider switching to tensorflow==2.11.* + WARNING:nncf:NNCF provides best results with tensorflow==2.12.*, while current tensorflow version is 2.13.1. If you encounter issues, consider switching to tensorflow==2.12.* Downloading data from https://storage.openvinotoolkit.org/repositories/nncf/openvino_notebook_ckpts/305_resnet18_imagenette_fp32_v1.h5 - 134604992/134604992 [==============================] - 30s 0us/step + 134604992/134604992 [==============================] - 36s 0us/step Absolute path where the model weights are saved: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/305-tensorflow-quantization-aware-training/model/ResNet-18_fp32.h5 - + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/305-tensorflow-quantization-aware-training/model/ResNet-18_fp32.h5 -Dataset Preprocessing `⇑ <#top>`__ -############################################################################################################################### +Dataset Preprocessing +--------------------------------------------------------------- Download and prepare Imagenette 160px dataset. - Number of classes: 10 - Download size: 94.18 MiB @@ -152,13 +163,9 @@ Download size: 94.18 MiB .. parsed-literal:: - 2023-08-16 01:18:08.016585: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2023-10-31 00:22:41.251776: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... - 2023-08-16 01:18:08.132762: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1] - [[{{node Placeholder/_1}}]] - 2023-08-16 01:18:08.133087: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [1] - [[{{node Placeholder/_0}}]] - 2023-08-16 01:18:08.170026: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2023-10-31 00:22:41.423281: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. @@ -183,9 +190,8 @@ Download size: 94.18 MiB .batch(BATCH_SIZE) .prefetch(tf.data.experimental.AUTOTUNE)) -Define a Floating-Point Model `⇑ <#top>`__ -############################################################################################################################### - +Define a Floating-Point Model +----------------------------------------------------------------------- .. code:: ipython3 @@ -259,20 +265,16 @@ Define a Floating-Point Model `⇑ <#top>`__ IMG_SHAPE = IMG_SIZE + (3,) fp32_model = ResNet18(input_shape=IMG_SHAPE) -Pre-train a Floating-Point Model `⇑ <#top>`__ -############################################################################################################################### - +Pre-train a Floating-Point Model +-------------------------------------------------------------------------- Using NNCF for model compression assumes that the user has a pre-trained model and a training pipeline. -.. note:: - - For the sake of simplicity of the tutorial, it is + **NOTE** For the sake of simplicity of the tutorial, it is recommended to skip ``FP32`` model training and load the weights that are provided. - .. code:: ipython3 # Load the floating-point weights. @@ -294,22 +296,13 @@ model and a training pipeline. .. parsed-literal:: - 2023-08-16 01:18:09.025847: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [1] - [[{{node Placeholder/_1}}]] - 2023-08-16 01:18:09.026203: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [1] - [[{{node Placeholder/_0}}]] - - -.. parsed-literal:: - - 4/4 [==============================] - 1s 229ms/sample - loss: 0.9807 - acc@1: 0.8220 + 4/4 [==============================] - 1s 161ms/sample - loss: 0.9807 - acc@1: 0.8220 Accuracy of FP32 model: 0.822 -Create and Initialize Quantization `⇑ <#top>`__ -############################################################################################################################### - +Create and Initialize Quantization +---------------------------------------------------------------------------- NNCF enables compression-aware training by integrating into regular training pipelines. The framework is designed so that modifications to @@ -349,13 +342,9 @@ scenario and requires only 3 modifications. .. parsed-literal:: - 2023-08-16 01:18:11.729441: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int64 and shape [1] - [[{{node Placeholder/_4}}]] - 2023-08-16 01:18:11.729828: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_3' with dtype int64 and shape [1] - [[{{node Placeholder/_3}}]] - 2023-08-16 01:18:12.738622: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. - 2023-08-16 01:18:13.389616: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. - 2023-08-16 01:18:21.360841: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2023-10-31 00:22:45.577314: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2023-10-31 00:22:46.107962: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + 2023-10-31 00:22:52.452611: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. Evaluate the new model on the validation set after initialization of @@ -381,12 +370,11 @@ demonstrated here. .. parsed-literal:: - 4/4 [==============================] - 1s 301ms/sample - loss: 0.9766 - acc@1: 0.8120 + 4/4 [==============================] - 1s 254ms/sample - loss: 0.9773 - acc@1: 0.8060 -Fine-tune the Compressed Model `⇑ <#top>`__ -############################################################################################################################### - +Fine-tune the Compressed Model +------------------------------------------------------------------------ At this step, a regular fine-tuning process is applied to further improve quantized model accuracy. Normally, several epochs of tuning are @@ -412,76 +400,59 @@ training pipeline are required. Here is a simple example. .. parsed-literal:: - Accuracy of INT8 model after initialization: 0.812 + Accuracy of INT8 model after initialization: 0.806 Epoch 1/2 - 101/101 [==============================] - 49s 417ms/step - loss: 0.7134 - acc@1: 0.9299 + 101/101 [==============================] - 41s 341ms/step - loss: 0.7136 - acc@1: 0.9297 Epoch 2/2 - 101/101 [==============================] - 42s 414ms/step - loss: 0.6807 - acc@1: 0.9489 - 4/4 [==============================] - 1s 144ms/sample - loss: 0.9760 - acc@1: 0.8160 + 101/101 [==============================] - 33s 327ms/step - loss: 0.6803 - acc@1: 0.9500 + 4/4 [==============================] - 0s 92ms/sample - loss: 0.9780 - acc@1: 0.8220 - Accuracy of INT8 model after fine-tuning: 0.816 + Accuracy of INT8 model after fine-tuning: 0.822 - Accuracy drop of tuned INT8 model over pre-trained FP32 model: 0.006 - + Accuracy drop of tuned INT8 model over pre-trained FP32 model: 0.000 -Export Models to OpenVINO Intermediate Representation (IR) `⇑ <#top>`__ -############################################################################################################################### +Export Models to OpenVINO Intermediate Representation (IR) +---------------------------------------------------------------------------------------------------- Use model conversion Python API to convert the models to OpenVINO IR. For more information about model conversion, see this -`page `__. +`page `__. Executing this command may take a while. .. code:: ipython3 - model_ir_fp32 = mo.convert_model( - fp32_model, - input_shape=[1, 64, 64, 3], - ) + model_ir_fp32 = ov.convert_model(fp32_model) .. parsed-literal:: - 2023-08-16 01:19:54.530759: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2 - 2023-08-16 01:19:54.530838: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session - 2023-08-16 01:19:54.651453: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. - Skipping registering GPU devices... - - -.. code:: ipython3 - - model_ir_int8 = mo.convert_model( - int8_model, - input_shape=[1, 64, 64, 3], - ) + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. .. parsed-literal:: - 2023-08-16 01:19:56.200644: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2 - 2023-08-16 01:19:56.200714: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session - 2023-08-16 01:19:56.202200: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. - Skipping registering GPU devices... + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. -Benchmark Model Performance by Computing Inference Time `⇑ <#top>`__ -############################################################################################################################### +.. code:: ipython3 + model_ir_int8 = ov.convert_model(int8_model) + +Benchmark Model Performance by Computing Inference Time +------------------------------------------------------------------------------------------------- Finally, measure the inference performance of the ``FP32`` and ``INT8`` models, using `Benchmark -Tool `__ +Tool `__ - an inference performance measurement tool in OpenVINO. By default, Benchmark Tool runs inference for 60 seconds in asynchronous mode on CPU. It returns inference speed as latency (milliseconds per image) and throughput (frames per second) values. -.. note:: - - This notebook runs ``benchmark_app`` for 15 seconds to give + **NOTE**: This notebook runs ``benchmark_app`` for 15 seconds to give a quick indication of performance. For more accurate performance, it is recommended to run ``benchmark_app`` in a terminal/command prompt after closing other applications. Run @@ -490,11 +461,10 @@ throughput (frames per second) values. ``benchmark_app --help`` to see an overview of all command-line options. - .. code:: ipython3 - serialize(model_ir_fp32, str(fp32_ir_path)) - serialize(model_ir_int8, str(int8_ir_path)) + ov.save_model(model_ir_fp32, fp32_ir_path, compress_to_fp16=False) + ov.save_model(model_ir_int8, int8_ir_path, compress_to_fp16=False) def parse_benchmark_output(benchmark_output): @@ -503,31 +473,29 @@ throughput (frames per second) values. print('Benchmark FP32 model (IR)') - benchmark_output = ! benchmark_app -m $fp32_ir_path -d CPU -api async -t 15 + benchmark_output = ! benchmark_app -m $fp32_ir_path -d CPU -api async -t 15 -shape [1,64,64,3] parse_benchmark_output(benchmark_output) print('\nBenchmark INT8 model (IR)') - benchmark_output = ! benchmark_app -m $int8_ir_path -d CPU -api async -t 15 + benchmark_output = ! benchmark_app -m $int8_ir_path -d CPU -api async -t 15 -shape [1,64,64,3] parse_benchmark_output(benchmark_output) .. parsed-literal:: Benchmark FP32 model (IR) - [ INFO ] Throughput: 2831.57 FPS + [ INFO ] Throughput: 2851.18 FPS Benchmark INT8 model (IR) - [ INFO ] Throughput: 11769.65 FPS + [ INFO ] Throughput: 11461.97 FPS Show CPU Information for reference. .. code:: ipython3 - from openvino.runtime import Core - - ie = Core() - ie.get_property('CPU', "FULL_DEVICE_NAME") + core = ov.Core() + core.get_property('CPU', "FULL_DEVICE_NAME") diff --git a/docs/notebooks/305-tensorflow-quantization-aware-training-with-output_files/index.html b/docs/notebooks/305-tensorflow-quantization-aware-training-with-output_files/index.html index 015ff50edb4840..7cc6dbfbfb7b98 100644 --- a/docs/notebooks/305-tensorflow-quantization-aware-training-with-output_files/index.html +++ b/docs/notebooks/305-tensorflow-quantization-aware-training-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/305-tensorflow-quantization-aware-training-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/305-tensorflow-quantization-aware-training-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/305-tensorflow-quantization-aware-training-with-output_files/


../
-305-tensorflow-quantization-aware-training-with..> 16-Aug-2023 01:31              519560
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/305-tensorflow-quantization-aware-training-with-output_files/


../
+305-tensorflow-quantization-aware-training-with..> 31-Oct-2023 00:35              519560
 

diff --git a/docs/notebooks/401-object-detection-with-output.rst b/docs/notebooks/401-object-detection-with-output.rst index da6f2e47f99c40..fa57f7eae4cc92 100644 --- a/docs/notebooks/401-object-detection-with-output.rst +++ b/docs/notebooks/401-object-detection-with-output.rst @@ -1,8 +1,6 @@ Live Object Detection with OpenVINO™ ==================================== - - This notebook demonstrates live object detection with OpenVINO, using the `SSDLite MobileNetV2 `__ @@ -11,52 +9,49 @@ Zoo `__. Final part of this notebook shows live inference results from a webcam. Additionally, you can also upload a video file. -.. note:: - - To use this notebook with a webcam, you need to run the notebook on a computer - with a webcam. If you run the notebook on a server, the webcam will not work. - However, you can still do inference on a video. + **NOTE**: To use this notebook with a webcam, you need to run the + notebook on a computer with a webcam. If you run the notebook on a + server, the webcam will not work. However, you can still do inference + on a video. -.. _top: +**Table of contents:** -**Table of contents**: -- `Preparation <#preparation>`__ +- `Preparation <#preparation>`__ - - `Install requirements <#install-requirements>`__ - - `Imports <#imports>`__ + - `Install requirements <#install-requirements>`__ + - `Imports <#imports>`__ -- `The Model <#the-model>`__ +- `The Model <#the-model>`__ - - `Download the Model <#download-the-model>`__ - - `Convert the Model <#convert-the-model>`__ - - `Load the Model <#load-the-model>`__ + - `Download the Model <#download-the-model>`__ + - `Convert the Model <#convert-the-model>`__ + - `Load the Model <#load-the-model>`__ -- `Processing <#processing>`__ +- `Processing <#processing>`__ - - `Process Results <#process-results>`__ - - `Main Processing Function <#main-processing-function>`__ + - `Process Results <#process-results>`__ + - `Main Processing Function <#main-processing-function>`__ -- `Run <#run>`__ +- `Run <#run>`__ - - `Run Live Object Detection <#run-live-object-detection>`__ - - `Run Object Detection on a Video File <#run-object-detection-on-a-video-file>`__ + - `Run Live Object Detection <#run-live-object-detection>`__ + - `Run Object Detection on a Video + File <#run-object-detection-on-a-video-file>`__ -- `References <#references>`__ +- `References <#references>`__ -Preparation `⇑ <#top>`__ -############################################################################################################################### - - -Install requirements `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" - !pip install -q tensorflow - !pip install -q opencv-python requests tqdm + %pip install -q "openvino-dev>=2023.1.0" + %pip install -q tensorflow + %pip install -q opencv-python requests tqdm # Fetch `notebook_utils` module import urllib.request @@ -68,22 +63,29 @@ Install requirements `⇑ <#top>`__ .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + fastapi 0.104.1 requires typing-extensions>=4.8.0, but you have typing-extensions 4.5.0 which is incompatible. + pydantic 2.4.2 requires typing-extensions>=4.6.1, but you have typing-extensions 4.5.0 which is incompatible. + pydantic-core 2.10.1 requires typing-extensions!=4.7.0,>=4.6.0, but you have typing-extensions 4.5.0 which is incompatible. + pytorch-lightning 1.6.5 requires protobuf<=3.20.1, but you have protobuf 3.20.3 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. -.. parsed-literal:: - ('notebook_utils.py', ) +.. parsed-literal:: + ('notebook_utils.py', ) -Imports `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -95,19 +97,17 @@ Imports `⇑ <#top>`__ import cv2 import numpy as np from IPython import display - from openvino import runtime as ov + import openvino as ov from openvino.tools.mo.front import tf as ov_tf_front from openvino.tools import mo import notebook_utils as utils -The Model `⇑ <#top>`__ -############################################################################################################################### - - -Download the Model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The Model +--------------------------------------------------- +Download the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the ``download_file``, a function from the ``notebook_utils`` file. It automatically creates a directory structure and downloads the @@ -116,10 +116,9 @@ downloaded and unpacked. The chosen model comes from the public directory, which means it must be converted into OpenVINO Intermediate Representation (OpenVINO IR). -.. note:: - - Using a model other than ``ssdlite_mobilenet_v2`` may require different - conversion parameters as well as pre- and post-processing. + **NOTE**: Using a model other than ``ssdlite_mobilenet_v2`` may + require different conversion parameters as well as pre- and + post-processing. .. code:: ipython3 @@ -150,13 +149,12 @@ Representation (OpenVINO IR). model/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz: 0%| | 0.00/48.7M [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Convert the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The pre-trained model is in TensorFlow format. To use it with OpenVINO, convert it to OpenVINO IR format, using `model conversion Python -API `__ +API `__ (``mo.convert_model`` function). If the model has been already converted, this step is skipped. @@ -176,7 +174,7 @@ converted, this step is skipped. tensorflow_object_detection_api_pipeline_config=tf_model_path.parent / "pipeline.config", reverse_input_channels=True ) - ov.serialize(ov_model, converted_model_path) + ov.save_model(ov_model, converted_model_path) del ov_model @@ -185,9 +183,8 @@ converted, this step is skipped. [ WARNING ] The Preprocessor block has been removed. Only nodes performing mean value subtraction and scaling (if applicable) are kept. -Load the Model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Load the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Only a few lines of code are required to run the model. First, initialize OpenVINO Runtime. Then, read the network architecture and @@ -224,8 +221,6 @@ best performance. For that purpose, just use ``AUTO``. .. code:: ipython3 - # Initialize OpenVINO Runtime. - core = ov.Core() # Read the network and corresponding weights from a file. model = core.read_model(model=converted_model_path) # Compile the model for CPU (you can choose manually CPU, GPU etc.) @@ -256,13 +251,11 @@ output. -Processing `⇑ <#top>`__ -############################################################################################################################### - - -Process Results `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Processing +---------------------------------------------------- +Process Results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ First, list all available classes and create colors for them. Then, in the post-process stage, transform boxes with normalized coordinates @@ -351,9 +344,8 @@ threshold (0.5). Finally, draw boxes and labels inside them. return frame -Main Processing Function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Main Processing Function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Run object detection on the specified source. Either a webcam or a video file. @@ -463,13 +455,11 @@ file. if use_popup: cv2.destroyAllWindows() -Run `⇑ <#top>`__ -############################################################################################################################### - - -Run Live Object Detection `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run +--------------------------------------------- +Run Live Object Detection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use a webcam as the video input. By default, the primary webcam is set with ``source=0``. If you have multiple webcams, each one will be @@ -478,12 +468,10 @@ using a front-facing camera. Some web browsers, especially Mozilla Firefox, may cause flickering. If you experience flickering, set ``use_popup=True``. -.. note:: - - To use this notebook with a webcam, you need to run the - notebook on a computer with a webcam. If you run the notebook on a - server (for example, Binder), the webcam will not work. Popup mode - may not work if you run this notebook on a remote computer (for + **NOTE**: To use this notebook with a webcam, you need to run the + notebook on a computer with a webcam. If you run the notebook on a + server (for example, Binder), the webcam will not work. Popup mode + may not work if you run this notebook on a remote computer (for example, Binder). Run the object detection: @@ -500,13 +488,12 @@ Run the object detection: .. parsed-literal:: - [ WARN:0@44.255] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index - [ERROR:0@44.255] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range + [ WARN:0@44.947] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index + [ERROR:0@44.947] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range -Run Object Detection on a Video File `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Object Detection on a Video File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you do not have a webcam, you can still run this demo with a video file. Any `format supported by @@ -529,9 +516,8 @@ will work. Source ended -References `⇑ <#top>`__ -############################################################################################################################### - +References +---------------------------------------------------- 1. `SSDLite MobileNetV2 `__ diff --git a/docs/notebooks/401-object-detection-with-output_files/401-object-detection-with-output_21_0.png b/docs/notebooks/401-object-detection-with-output_files/401-object-detection-with-output_21_0.png index 8f1c9d1ae95a58..d8eaf4f81b4009 100644 --- a/docs/notebooks/401-object-detection-with-output_files/401-object-detection-with-output_21_0.png +++ b/docs/notebooks/401-object-detection-with-output_files/401-object-detection-with-output_21_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae3c173441be8e7cfd682e02f750cb4d02dc0b3678ad37c8c7bb8f41d15d4440 -size 174850 +oid sha256:1222910d70679a85c4479c9a9e5ccad9c47cd4bf95ef123f5db32f3f12896e57 +size 174868 diff --git a/docs/notebooks/401-object-detection-with-output_files/index.html b/docs/notebooks/401-object-detection-with-output_files/index.html index 67469b5a0eb894..7c8c50da01c69a 100644 --- a/docs/notebooks/401-object-detection-with-output_files/index.html +++ b/docs/notebooks/401-object-detection-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/401-object-detection-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/401-object-detection-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/401-object-detection-with-output_files/


../
-401-object-detection-with-output_21_0.png          16-Aug-2023 01:31              174850
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/401-object-detection-with-output_files/


../
+401-object-detection-with-output_21_0.png          31-Oct-2023 00:35              174868
 

diff --git a/docs/notebooks/402-pose-estimation-with-output.rst b/docs/notebooks/402-pose-estimation-with-output.rst index efe0ffcdd5564c..05c0c65e80db80 100644 --- a/docs/notebooks/402-pose-estimation-with-output.rst +++ b/docs/notebooks/402-pose-estimation-with-output.rst @@ -1,8 +1,6 @@ Live Human Pose Estimation with OpenVINO™ ========================================= - - This notebook demonstrates live pose estimation with OpenVINO, using the OpenPose `human-pose-estimation-0001 `__ @@ -11,38 +9,41 @@ Zoo `__. Final part of this notebook shows live inference results from a webcam. Additionally, you can also upload a video file. -.. note:: - - To use a webcam, you must run this Jupyter notebook on a + **NOTE**: To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run on a server, the webcam will not work. However, you can still do inference on a video in the final step. -.. _top: -**Table of contents**: +**Table of contents:** +--- - `Imports <#imports>`__ - `The model <#the-model>`__ +- `Download the model <#download-the-model>`__ +- `Load the model <#load-the-model>`__ +- `Processing <#processing>`__ +- `OpenPose Decoder <#openpose-decoder>`__ +- `Process Results <#process-results>`__ +- `Draw Pose Overlays <#draw-pose-overlays>`__ +- `Main Processing Function <#main-processing-function>`__ +- `Run <#run>`__ +- `Run Live Pose Estimation <#run-live-pose-estimation>`__ +- `Run Pose Estimation on a Video File <#run-pose-estimation-on-a-video-file>`__ - - `Download the model <#download-the-model>`__ - - `Load the model <#load-the-model>`__ +.. code:: ipython3 -- `Processing <#processing>`__ + %pip install -q "openvino>=2023.1.0" - - `OpenPose Decoder <#openpose-decoder>`__ - - `Process Results <#process-results>`__ - - `Draw Pose Overlays <#draw-pose-overlays>`__ - - `Main Processing Function <#main-processing-function>`__ -- `Run <#run>`__ +.. parsed-literal:: - - `Run Live Pose Estimation <#run-live-pose-estimation>`__ - - `Run Pose Estimation on a Video File <#run-pose-estimation-on-a-video-file>`__ + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. -Imports `⇑ <#top>`__ -############################################################################################################################### +Imports +------------------------------------------------- .. code:: ipython3 @@ -55,20 +56,18 @@ Imports `⇑ <#top>`__ import numpy as np from IPython import display from numpy.lib.stride_tricks import as_strided - from openvino.runtime import Core + import openvino as ov from decoder import OpenPoseDecoder sys.path.append("../utils") import notebook_utils as utils -The model `⇑ <#top>`__ -############################################################################################################################### - - -Download the model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The model +--------------------------------------------------- +Download the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the ``download_file``, a function from the ``notebook_utils`` file. It automatically creates a directory structure and downloads the @@ -77,10 +76,7 @@ selected model. If you want to download another model, replace the name of the model and precision in the code below. -.. note:: - - This may require a different pose decoder. - + **NOTE**: This may require a different pose decoder. .. code:: ipython3 @@ -112,9 +108,8 @@ precision in the code below. model/intel/human-pose-estimation-0001/FP16-INT8/human-pose-estimation-0001.bin: 0%| | 0.00/4.03M [… -Load the model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Load the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Downloaded models are located in a fixed structure, which indicates a vendor, the name of the model and a precision. @@ -129,7 +124,7 @@ using OpenVINO. import ipywidgets as widgets - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -152,7 +147,7 @@ using OpenVINO. .. code:: ipython3 # Initialize OpenVINO Runtime - core = Core() + core = ov.Core() # Read the network from a file. model = core.read_model(model_path) # Let the AUTO device decide where to load the model (you can use CPU, GPU as well). @@ -182,13 +177,11 @@ there is 1 input and 2 outputs: PAFs and keypoints heatmap. -Processing `⇑ <#top>`__ -############################################################################################################################### - - -OpenPose Decoder `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Processing +---------------------------------------------------- +OpenPose Decoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To transform the raw results from the neural network into pose estimations, you need OpenPose Decoder. It is provided in the `Open @@ -206,9 +199,8 @@ of Open Model Zoo. decoder = OpenPoseDecoder() -Process Results `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Process Results +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A bunch of useful functions to transform results into poses. @@ -278,9 +270,8 @@ factor. poses[:, :, :2] *= output_scale return poses, scores -Draw Pose Overlays `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Draw Pose Overlays +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Draw pose overlays on the image to visualize estimated poses. Joints are drawn as circles and limbs are drawn as lines. The code is based on the @@ -317,9 +308,8 @@ from Open Model Zoo. cv2.addWeighted(img, 0.4, img_limbs, 0.6, 0, dst=img) return img -Main Processing Function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Main Processing Function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Run pose estimation on the specified source. Either a webcam or a video file. @@ -413,13 +403,11 @@ file. if use_popup: cv2.destroyAllWindows() -Run `⇑ <#top>`__ -############################################################################################################################### - - -Run Live Pose Estimation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run +--------------------------------------------- +Run Live Pose Estimation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use a webcam as the video input. By default, the primary webcam is set with ``source=0``. If you have multiple webcams, each one will be @@ -428,15 +416,12 @@ using a front-facing camera. Some web browsers, especially Mozilla Firefox, may cause flickering. If you experience flickering, set ``use_popup=True``. -.. note:: - - To use this notebook with a webcam, you need to run the + **NOTE**: To use this notebook with a webcam, you need to run the notebook on a computer with a webcam. If you run the notebook on a server (for example, Binder), the webcam will not work. Popup mode may not work if you run this notebook on a remote computer (for example, Binder). - Run the pose estimation: .. code:: ipython3 @@ -451,13 +436,12 @@ Run the pose estimation: .. parsed-literal:: - [ WARN:0@2.649] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index - [ERROR:0@2.649] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range - + [ WARN:0@2.988] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index + [ERROR:0@2.988] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range -Run Pose Estimation on a Video File `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run Pose Estimation on a Video File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you do not have a webcam, you can still run this demo with a video file. Any `format supported by @@ -472,7 +456,7 @@ will work. You can skip first ``N`` frames to fast forward video. -.. image:: 402-pose-estimation-with-output_files/402-pose-estimation-with-output_21_0.png +.. image:: 402-pose-estimation-with-output_files/402-pose-estimation-with-output_22_0.png .. parsed-literal:: diff --git a/docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_21_0.png b/docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_21_0.png deleted file mode 100644 index 450f6ed81d6400..00000000000000 --- a/docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_21_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e78567346df5ffd2550196f70571f59da85af3d23cb2d916967b1bcd84e9dd8c -size 107992 diff --git a/docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_22_0.png b/docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_22_0.png new file mode 100644 index 00000000000000..c5d56d0cfbc96a --- /dev/null +++ b/docs/notebooks/402-pose-estimation-with-output_files/402-pose-estimation-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145f30f97a4052b5decd5603293adb80facf4481117a7140003b950dd1e34455 +size 107979 diff --git a/docs/notebooks/402-pose-estimation-with-output_files/index.html b/docs/notebooks/402-pose-estimation-with-output_files/index.html index a8595cfe92a04b..48f328997194ab 100644 --- a/docs/notebooks/402-pose-estimation-with-output_files/index.html +++ b/docs/notebooks/402-pose-estimation-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/402-pose-estimation-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/402-pose-estimation-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/402-pose-estimation-with-output_files/


../
-402-pose-estimation-with-output_21_0.png           16-Aug-2023 01:31              107992
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/402-pose-estimation-with-output_files/


../
+402-pose-estimation-with-output_22_0.png           31-Oct-2023 00:35              107979
 

diff --git a/docs/notebooks/403-action-recognition-webcam-with-output.rst b/docs/notebooks/403-action-recognition-webcam-with-output.rst index d6755518701ca1..255e4a1e8f2d87 100644 --- a/docs/notebooks/403-action-recognition-webcam-with-output.rst +++ b/docs/notebooks/403-action-recognition-webcam-with-output.rst @@ -1,8 +1,6 @@ Human Action Recognition with OpenVINO™ ======================================= - - This notebook demonstrates live human action recognition with OpenVINO, using the `Action Recognition Models `__ from `Open @@ -20,15 +18,13 @@ notebook shows how to create the following pipeline: Final part of this notebook shows live inference results from a webcam. Additionally, you can also upload a video file. -.. note:: - - To use a webcam, you must run this Jupyter notebook on a computer with a webcam. - If you run on a server, the webcam will not work. However, you can still do - inference on a video in the final step. +**NOTE**: To use a webcam, you must run this Jupyter notebook on a +computer with a webcam. If you run on a server, the webcam will not +work. However, you can still do inference on a video in the final step. -------------- -[1] ``seq2seq``: Deep learning models that take a sequence of items to the +[1] seq2seq: Deep learning models that take a sequence of items to the input and output. In this case, input: video frames, output: actions sequence. This ``"seq2seq"`` is composed of an encoder and a decoder. The encoder captures ``"context"`` of the inputs to be analyzed by the @@ -39,54 +35,69 @@ Transformer and `ResNet34 `__. -.. _top: - -**Table of contents**: +**Table of contents:** - `Imports <#imports>`__ -- `The models <#the-models>`__ - - `Download the models <#download-the-models>`__ - - `Load your labels <#load-your-labels>`__ - - `Load the models <#load-the-models>`__ +- `The models <#the-models>`__ + + - `Download the models <#download-the-models>`__ + - `Load your labels <#load-your-labels>`__ + - `Load the models <#load-the-models>`__ + + - `Model Initialization + function <#model-initialization-function>`__ + - `Initialization for Encoder and + Decoder <#initialization-for-encoder-and-decoder>`__ + + - `Helper functions <#helper-functions>`__ + - `AI Functions <#ai-functions>`__ + - `Main Processing Function <#main-processing-function>`__ + - `Run Action Recognition on a Video + File <#run-action-recognition-on-a-video-file>`__ + - `Run Action Recognition Using a + Webcam <#run-action-recognition-using-a-webcam>`__ + +.. code:: ipython3 + + %pip install -q "openvino-dev>=2023.1.0" + - - `Model Initialization function <#model-initialization-function>`__ - - `Initialization for Encoder and Decoder <#initialization-for-encoder-and-decoder>`__ +.. parsed-literal:: - - `Helper functions <#helper-functions>`__ - - `AI Functions <#ai-functions>`__ - - `Main Processing Function <#main-processing-function>`__ - - `Run Action Recognition on a Video File <#run-action-recognition-on-a-video-file>`__ - - `Run Action Recognition Using a Webcam <#run-action-recognition-using-a-webcam>`__ + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. -Imports `⇑ <#top>`__ -############################################################################################################################### +Imports +------------------------------------------------- .. code:: ipython3 import collections import os - import sys import time from typing import Tuple, List import cv2 import numpy as np from IPython import display - from openvino.runtime import Core + import openvino as ov from openvino.runtime.ie_api import CompiledModel - sys.path.append("../utils") + # Fetch `notebook_utils` module + import urllib.request + urllib.request.urlretrieve( + url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/notebook_utils.py', + filename='notebook_utils.py' + ) import notebook_utils as utils -The models `⇑ <#top>`__ -############################################################################################################################### - - -Download the models `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The models +---------------------------------------------------- +Download the models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use ``omz_downloader``, which is a command-line tool from the ``openvino-dev`` package. It automatically creates a directory structure @@ -97,11 +108,10 @@ and the system automatically downloads the two models ``"action-recognition-0001-encoder"`` and ``"action-recognition-0001-decoder"`` -.. note:: - - If you want to download another model, such as - ``"driver-action-recognition-adas-0002"`` (``"driver-action-recognition-adas-0002-encoder"`` - + ``"driver-action-recognition-adas-0002-decoder"``), replace the name + **NOTE**: If you want to download another model, such as + ``"driver-action-recognition-adas-0002"`` + (``"driver-action-recognition-adas-0002-encoder"`` + + ``"driver-action-recognition-adas-0002-decoder"``), replace the name of the model in the code below. Using a model outside the list can require different pre- and post-processing. @@ -147,40 +157,45 @@ and the system automatically downloads the two models -Load your labels `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Load your labels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tutorial uses `Kinetics-400 dataset `__, and also provides the text file embedded into this notebook. -.. note:: - - If you want to run + **NOTE**: If you want to run ``"driver-action-recognition-adas-0002"`` model, replace the ``kinetics.txt`` file to ``driver_actions.txt``. - .. code:: ipython3 - labels = "../data/text/kinetics.txt" + # Download the text from the openvino_notebooks storage + vocab_file_path = utils.download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/kinetics.txt", + directory="data" + ) - with open(labels) as f: + with vocab_file_path.open(mode='r') as f: labels = [line.strip() for line in f] print(labels[0:9], np.shape(labels)) + .. parsed-literal:: - ['abseiling', 'air drumming', 'answering questions', 'applauding', 'applying cream', 'archery', 'arm wrestling', 'arranging flowers', 'assembling computer'] (400,) + data/kinetics.txt: 0%| | 0.00/5.82k [00:00`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. parsed-literal:: + + ['abseiling', 'air drumming', 'answering questions', 'applauding', 'applying cream', 'archery', 'arm wrestling', 'arranging flowers', 'assembling computer'] (400,) +Load the models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Load the two models for this particular architecture, Encoder and Decoder. Downloaded models are located in a fixed structure, indicating a vendor, the name of the model, and a precision. @@ -199,7 +214,7 @@ Select device from dropdown list for running inference using OpenVINO import ipywidgets as widgets - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], value='AUTO', @@ -218,14 +233,13 @@ Select device from dropdown list for running inference using OpenVINO -Model Initialization function `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Model Initialization function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 # Initialize OpenVINO Runtime. - core = Core() + core = ov.Core() def model_init(model_path: str, device: str) -> Tuple: @@ -251,9 +265,8 @@ Model Initialization function `⇑ <#top>`__ output_keys = compiled_model.output(0) return input_keys, output_keys, compiled_model -Initialization for Encoder and Decoder `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Initialization for Encoder and Decoder +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -267,9 +280,8 @@ Initialization for Encoder and Decoder `⇑ <#top>`__ # Get input size - Decoder. frames2decode = list(input_key_de.shape)[0:][1] -Helper functions `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Helper functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the following helper functions for preprocessing and postprocessing frames: @@ -389,9 +401,8 @@ frames: cv2.putText(frame, display_text, text_loc2, FONT_STYLE, FONT_SIZE, FONT_COLOR2) cv2.putText(frame, display_text, text_loc, FONT_STYLE, FONT_SIZE, FONT_COLOR) -AI Functions `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +AI Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Following the pipeline above, you will use the next functions to: @@ -480,9 +491,8 @@ Following the pipeline above, you will use the next functions to: exp = np.exp(x) return exp / np.sum(exp, axis=None) -Main Processing Function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Main Processing Function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Running action recognition function will run in different operations, either a webcam or a video file. See the list of procedures below: @@ -632,9 +642,8 @@ either a webcam or a video file. See the list of procedures below: if use_popup: cv2.destroyAllWindows() -Run Action Recognition on a Video File `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Action Recognition on a Video File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Find out how the model works in a video file. `Any format supported `__ @@ -642,11 +651,10 @@ by OpenCV will work. You can press the stop button anytime while the video file is running, and it will activate the webcam for the next step. -.. note:: - - Sometimes, the video can be cut off if there are corrupted frames. In that - case, you can convert it. If you experience any problems with your video, - use the `HandBrake `__ and select the MPEG format. + **NOTE**: Sometimes, the video can be cut off if there are corrupted + frames. In that case, you can convert it. If you experience any + problems with your video, use the + `HandBrake `__ and select the MPEG format. .. code:: ipython3 @@ -655,7 +663,7 @@ step. -.. image:: 403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_21_0.png +.. image:: 403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_22_0.png .. parsed-literal:: @@ -663,20 +671,16 @@ step. Source ended -Run Action Recognition Using a Webcam `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Action Recognition Using a Webcam +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, try to see yourself in your webcam. -.. note:: - - To use a webcam, you must run this Jupyter notebook on a + **NOTE**: To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run on a server, the webcam will not work. However, you can still do inference on a video file in the final step. - .. code:: ipython3 run_action_recognition(source=0, flip=False, use_popup=False, skip_first_frames=0) @@ -689,6 +693,6 @@ Now, try to see yourself in your webcam. .. parsed-literal:: - [ WARN:0@319.035] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index - [ERROR:0@319.035] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range + [ WARN:0@320.581] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index + [ERROR:0@320.581] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range diff --git a/docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_21_0.png b/docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_21_0.png deleted file mode 100644 index c1b7138364d485..00000000000000 --- a/docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_21_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:789f37b6428ab4468a32c7ea70838dcc6eeea93650647a75d4202d814e5af43e -size 68035 diff --git a/docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_22_0.png b/docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_22_0.png new file mode 100644 index 00000000000000..4e728399b2fe2c --- /dev/null +++ b/docs/notebooks/403-action-recognition-webcam-with-output_files/403-action-recognition-webcam-with-output_22_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3abdeeb2ddfb70c070fd4a38ec2492f59824ac180fc9d86ffcb40df36e2dbb +size 68195 diff --git a/docs/notebooks/403-action-recognition-webcam-with-output_files/index.html b/docs/notebooks/403-action-recognition-webcam-with-output_files/index.html index 80637667b61cc3..9ccc73f9bcfe4e 100644 --- a/docs/notebooks/403-action-recognition-webcam-with-output_files/index.html +++ b/docs/notebooks/403-action-recognition-webcam-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/403-action-recognition-webcam-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/403-action-recognition-webcam-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/403-action-recognition-webcam-with-output_files/


../
-403-action-recognition-webcam-with-output_21_0.png 16-Aug-2023 01:31               68035
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/403-action-recognition-webcam-with-output_files/


../
+403-action-recognition-webcam-with-output_22_0.png 31-Oct-2023 00:35               68195
 

diff --git a/docs/notebooks/404-style-transfer-with-output.rst b/docs/notebooks/404-style-transfer-with-output.rst index 630aca385b8d84..6c719a0f1b8070 100644 --- a/docs/notebooks/404-style-transfer-with-output.rst +++ b/docs/notebooks/404-style-transfer-with-output.rst @@ -1,8 +1,6 @@ Style Transfer with OpenVINO™ ============================= - - This notebook demonstrates style transfer with OpenVINO, using the Style Transfer Models from `ONNX Model Repository `__. Specifically, `Fast @@ -25,46 +23,37 @@ and Super-Resolution `__ along with part of this notebook shows live inference results from a webcam. Additionally, you can also upload a video file. -.. note:: - - If you have a webcam on your computer, you can see live results streaming in - the notebook. If you run the notebook on a server, the webcam will not work - but you can run inference, using a video file. + **NOTE**: If you have a webcam on your computer, you can see live + results streaming in the notebook. If you run the notebook on a + server, the webcam will not work but you can run inference, using a + video file. - -.. _top: - -**Table of contents**: +**Table of contents:** +--- - `Preparation <#preparation>`__ - - - `Install requirements <#install-requirements>`__ - - `Imports <#imports>`__ - +- `Install requirements <#install-requirements>`__ +- `Imports <#imports>`__ - `The Model <#the-model>`__ - - - `Download the Model <#download-the-model>`__ - - `Convert ONNX Model to OpenVINO IR Format <#convert-onnx-model-to-openvino-ir-format>`__ - - `Load the Model <#load-the-model>`__ - - `Preprocess the image <#preprocess-the-image>`__ - - `Helper function to postprocess the stylized image <#helper-function-to-postprocess-the-stylized-image>`__ - - `Main Processing Function <#main-processing-function>`__ - - `Run Style Transfer Using a Webcam <#run-style-transfer-using-a-webcam>`__ - - `Run Style Transfer on a Video File <#run-style-transfer-on-a-video-file>`__ - +- `Download the Model <#download-the-model>`__ +- `Convert ONNXModel to OpenVINO IR Format <#convert-onnx-model-to-openvino-ir-format>`__ +- `Load the Model <#load-the-model>`__ +- `Preprocess the image <#preprocess-the-image>`__ +- `Helper function to postprocess the stylized image <#helper-function-to-postprocess-the-stylized-image>`__ +- `Main Processing Function <#main-processing-function>`__ +- `Run Style Transfer Using a Webcam <#run-style-transfer-using-a-webcam>`__ +- `Run Style Transfer on a Video File <#run-style-transfer-on-a-video-file>`__ - `References <#references>`__ -Preparation `⇑ <#top>`__ -############################################################################################################################### - - -Install requirements `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preparation +----------------------------------------------------- +Install requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" + !pip install -q "openvino>=2023.1.0" !pip install -q opencv-python requests tqdm # Fetch `notebook_utils` module @@ -74,9 +63,8 @@ Install requirements `⇑ <#top>`__ filename='notebook_utils.py' ) -Imports `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Imports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -88,7 +76,7 @@ Imports `⇑ <#top>`__ from pathlib import Path from IPython import display from ipywidgets import interactive, ToggleButtons - from openvino.runtime import Core + import openvino as ov import notebook_utils as utils @@ -106,19 +94,17 @@ Pointilism to do the style transfer. interactive(lambda option: print(option), option=styleButtons) -The Model `⇑ <#top>`__ -############################################################################################################################### - - -Download the Model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The Model +--------------------------------------------------- +Download the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The style transfer model, selected in the previous step, will be downloaded to ``model_path`` if you have not already downloaded it. The models are provided by the ONNX Model Zoo in ``.onnx`` format, which means it could be used with OpenVINO directly. However, this notebook -will also show how you can use the Model Optimizer to convert ONNX to +will also show how you can use the Conversion API to convert ONNX to OpenVINO Intermediate Representation (IR) with ``FP16`` precision. .. code:: ipython3 @@ -133,15 +119,14 @@ OpenVINO Intermediate Representation (IR) with ``FP16`` precision. style_url = f"{base_url}/{model_path}" utils.download_file(style_url, directory=base_model_dir) -Convert ONNX Model to OpenVINO IR Format `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Convert ONNX Model to OpenVINO IR Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In the next step, you will convert the ONNX model to OpenVINO IR format with ``FP16`` precision. While ONNX models are directly supported by OpenVINO runtime, it can be useful to convert them to IR format to take advantage of OpenVINO optimization tools and features. The -``mo.convert_model`` Python function of model conversion API can be +``ov.convert_model`` Python function of model conversion API can be used. The converted model is saved to the model directory. The function returns instance of OpenVINO Model class, which is ready to use in Python interface but can also be serialized to OpenVINO IR format for @@ -151,11 +136,9 @@ this step. .. code:: ipython3 # Construct the command for model conversion API. - from openvino.runtime import serialize - from openvino.tools import mo - ov_model = mo.convert_model(f"model/{styleButtons.value.lower()}-9.onnx", compress_to_fp16=True) - serialize(ov_model, f"model/{styleButtons.value.lower()}-9.xml") + ov_model = ov.convert_model(f"model/{styleButtons.value.lower()}-9.onnx") + ov.save_model(ov_model, f"model/{styleButtons.value.lower()}-9.xml") .. code:: ipython3 @@ -163,9 +146,8 @@ this step. ir_path = Path(f"model/{styleButtons.value.lower()}-9.xml") onnx_path = Path(f"model/{model_path}") -Load the Model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Load the Model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Both the ONNX model(s) and converted IR model(s) are stored in the ``model`` directory. @@ -190,7 +172,7 @@ results. .. code:: ipython3 # Initialize OpenVINO Runtime. - core = Core() + core = ov.Core() # Read the network and corresponding weights from ONNX Model. # model = ie_core.read_model(model=onnx_path) @@ -235,11 +217,12 @@ respectively. For *fast-neural-style-mosaic-onnx*, there is 1 input and # Get the input size. N, C, H, W = list(input_layer.shape) -Preprocess the image `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Preprocess the image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Preprocess the input image before running the model. Prepare the dimensions and channel order for the -image to match the original image with the input tensor +Preprocess the input image before running the model. Prepare the +dimensions and channel order for the image to match the original image +with the input tensor 1. Preprocess a frame to convert from ``unit8`` to ``float32``. 2. Transpose the array to match with the network input size @@ -264,11 +247,12 @@ image to match the original image with the input tensor image = np.expand_dims(image, axis=0) return image -Helper function to postprocess the stylized image `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Helper function to postprocess the stylized image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The converted IR model outputs a NumPy ``float32`` array of the -`(1, 3, 224,224) `__ +The converted IR model outputs a NumPy ``float32`` array of the `(1, 3, +224, +224) `__ shape . .. code:: ipython3 @@ -290,9 +274,8 @@ shape . stylized_image = cv2.cvtColor(stylized_image, cv2.COLOR_BGR2RGB) return stylized_image -Main Processing Function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Main Processing Function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The style transfer function can be run in different operating modes, either using a webcam or a video file. @@ -388,9 +371,8 @@ either using a webcam or a video file. if use_popup: cv2.destroyAllWindows() -Run Style Transfer Using a Webcam `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Style Transfer Using a Webcam +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now, try to apply the style transfer model using video from your webcam. By default, the primary webcam is set with ``source=0``. If you have @@ -399,34 +381,28 @@ starting at 0. Set ``flip=True`` when using a front-facing camera. Some web browsers, especially Mozilla Firefox, may cause flickering. If you experience flickering, set ``use_popup=True``. -.. note:: - - To use a webcam, you must run this Jupyter notebook on a + **NOTE**: To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run it on a server, you will not be able to access the webcam. However, you can still perform inference on a video file in the final step. - .. code:: ipython3 run_style_transfer(source=0, flip=True, use_popup=False) -Run Style Transfer on a Video File `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Style Transfer on a Video File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can find out how the model works with a video file. For that, use -any `formats supported by OpenCV `__. +any `formats supported by +OpenCV `__. You can press the stop button to terminate anytime while the video file is running. -.. note:: - - Sometimes, the video will be cut off when frames are corrupted. If this - happens, or you experience any other problems with your video, use the - `HandBrake `__ encoder tool to create a video file in - MPEG format. - + **NOTE**: Sometimes, the video will be cut off when frames are + corrupted. If this happens, or you experience any other problems with + your video, use the `HandBrake `__ encoder + tool to create a video file in MPEG format. .. code:: ipython3 @@ -443,9 +419,8 @@ is running. Source ended -References `⇑ <#top>`__ -############################################################################################################################### - +References +---------------------------------------------------- 1. `ONNX Model Zoo `__ 2. `Fast Neural Style diff --git a/docs/notebooks/404-style-transfer-with-output_files/index.html b/docs/notebooks/404-style-transfer-with-output_files/index.html index a8ea8167a1fe42..782177ddeb3a37 100644 --- a/docs/notebooks/404-style-transfer-with-output_files/index.html +++ b/docs/notebooks/404-style-transfer-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/404-style-transfer-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/404-style-transfer-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/404-style-transfer-with-output_files/


../
-404-style-transfer-with-output_27_0.png            16-Aug-2023 01:31               81832
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/404-style-transfer-with-output_files/


../
+404-style-transfer-with-output_27_0.png            31-Oct-2023 00:35               81832
 

diff --git a/docs/notebooks/405-paddle-ocr-webcam-with-output.rst b/docs/notebooks/405-paddle-ocr-webcam-with-output.rst index 8f11e078ae975e..569ce9ed964455 100644 --- a/docs/notebooks/405-paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/405-paddle-ocr-webcam-with-output.rst @@ -1,8 +1,6 @@ PaddleOCR with OpenVINO™ ======================== - - This demo shows how to run PP-OCR model on OpenVINO natively. Instead of exporting the PaddlePaddle model to ONNX and then converting to the OpenVINO Intermediate Representation (OpenVINO IR) format with model @@ -19,49 +17,57 @@ GitHub `__ or `PaddleOCR Gitee `__. Working pipeline of the PaddleOCR is as follows: -.. note:: + **NOTE**: To use this notebook with a webcam, you need to run the + notebook on a computer with a webcam. If you run the notebook on a + server, the webcam will not work. You can still do inference on a + video file. - To use this notebook with a webcam, you need to run the notebook on a computer - with a webcam. If you run the notebook on a server, the webcam will not work. - You can still do inference on a video file. +**Table of contents:** -.. _top: -**Table of contents**: +- `Imports <#imports>`__ -- `Imports <#imports>`__ + - `Select inference device <#select-inference-device>`__ + - `Models for PaddleOCR <#models-for-paddleocr>`__ - - `Select inference device <#select-inference-device>`__ - - `Models for PaddleOCR <#models-for-paddleocr>`__ + - `Download the Model for Text + Detection <#download-the-model-for-text-detection>`__ + - `Load the Model for Text + Detection <#load-the-model-for-text-detection>`__ + - `Download the Model for Text + Recognition <#download-the-model-for-text-recognition>`__ + - `Load the Model for Text Recognition with Dynamic + Shape <#load-the-model-for-text-recognition-with-dynamic-shape>`__ - - `Download the Model for Text Detection <#download-the-model-for-text-detection>`__ - - `Load the Model for Text Detection <#load-the-model-for-text-detection>`__ - - `Download the Model for Text Recognition <#download-the-model-for-text-recognition>`__ - - `Load the Model for Text Recognition with Dynamic Shape <#load-the-model-for-text-recognition-with-dynamic-shape>`__ + - `Preprocessing Image Functions for Text Detection and + Recognition <#preprocessing-image-functions-for-text-detection-and-recognition>`__ + - `Postprocessing Image for Text + Detection <#postprocessing-image-for-text-detection>`__ + - `Main Processing Function for + PaddleOCR <#main-processing-function-for-paddleocr>`__ - - `Preprocessing Image Functions for Text Detection and Recognition <#preprocessing-image-functions-for-text-detection-and-recognition>`__ - - `Postprocessing Image for Text Detection <#postprocessing-image-for-text-detection>`__ - - `Main Processing Function for PaddleOCR <#main-processing-function-for-paddleocr>`__ - -- `Run Live PaddleOCR with OpenVINO <#run-live-paddleocr-with-openvino>`__ +- `Run Live PaddleOCR with + OpenVINO <#run-live-paddleocr-with-openvino>`__ .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" - !pip install -q "paddlepaddle==2.5.0" - !pip install -q "pyclipper>=1.2.1" "shapely>=1.7.1" + %pip install -q "openvino>=2023.1.0" + %pip install -q "paddlepaddle==2.5.0" + %pip install -q "pyclipper>=1.2.1" "shapely>=1.7.1" .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. -Imports `⇑ <#top>`__ -############################################################################################################################### +Imports +------------------------------------------------- .. code:: ipython3 @@ -76,7 +82,7 @@ Imports `⇑ <#top>`__ from pathlib import Path import tarfile - from openvino.runtime import Core + import openvino as ov from IPython import display import copy @@ -98,17 +104,16 @@ Imports `⇑ <#top>`__ import notebook_utils as utils import pre_post_processing as processing -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 import ipywidgets as widgets - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -128,9 +133,8 @@ Select device from dropdown list for running inference using OpenVINO: -Models for PaddleOCR `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Models for PaddleOCR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PaddleOCR includes two parts of deep learning models, text detection and text recognition. Pre-trained models used in the demo are downloaded and @@ -173,9 +177,8 @@ files to load to CPU/GPU. else: print("Error Extracting the model. Please check the network.") -Download the Model for Text **Detection** `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Download the Model for Text **Detection** +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -195,7 +198,7 @@ Download the Model for Text **Detection** `⇑ <#top>`__ .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/405-padd… + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/405-padd… .. parsed-literal:: @@ -204,14 +207,13 @@ Download the Model for Text **Detection** `⇑ <#top>`__ Model Extracted to model/ch_PP-OCRv3_det_infer/inference.pdmodel. -Load the Model for Text **Detection** `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Load the Model for Text **Detection** +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 # Initialize OpenVINO Runtime for text detection. - core = Core() + core = ov.Core() det_model = core.read_model(model=det_model_file_path) det_compiled_model = core.compile_model(model=det_model, device_name=device.value) @@ -219,9 +221,8 @@ Load the Model for Text **Detection** `⇑ <#top>`__ det_input_layer = det_compiled_model.input(0) det_output_layer = det_compiled_model.output(0) -Download the Model for Text **Recognition** `⇑ <#top>`__ -------------------------------------------------------------------------------------------------------------------------------- - +Download the Model for Text **Recognition** +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: ipython3 @@ -239,7 +240,7 @@ Download the Model for Text **Recognition** `⇑ <#top>`__ .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/405-padd… + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/405-padd… .. parsed-literal:: @@ -248,8 +249,8 @@ Download the Model for Text **Recognition** `⇑ <#top>`__ Model Extracted to model/ch_PP-OCRv3_rec_infer/inference.pdmodel. -Load the Model for Text **Recognition** with Dynamic Shape -`⇑ <#top>`__ +Load the Model for Text **Recognition** with Dynamic Shape +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Input to text recognition model refers to detected bounding boxes with different image sizes, for example, dynamic input shapes. Hence: @@ -277,9 +278,8 @@ different image sizes, for example, dynamic input shapes. Hence: rec_input_layer = rec_compiled_model.input(0) rec_output_layer = rec_compiled_model.output(0) -Preprocessing Image Functions for Text Detection and Recognition. `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Preprocessing Image Functions for Text Detection and Recognition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Define preprocessing functions for text detection and recognition: 1. Preprocessing for text detection: resize and normalize input images. 2. @@ -394,9 +394,8 @@ with Chinese text) for easy batching in inference. norm_img_batch = norm_img_batch.copy() return norm_img_batch -Postprocessing Image for Text Detection `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Postprocessing Image for Text Detection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -434,9 +433,8 @@ Postprocessing Image for Text Detection `⇑ <#top>`__ dt_boxes = processing.filter_tag_det_res(dt_boxes, ori_im.shape) return dt_boxes -Main Processing Function for PaddleOCR `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Main Processing Function for PaddleOCR +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Run ``paddleOCR`` function in different operations, either a webcam or a video file. See the list of procedures below: @@ -608,9 +606,8 @@ video file. See the list of procedures below: if use_popup: cv2.destroyAllWindows() -Run Live PaddleOCR with OpenVINO `⇑ <#top>`__ -############################################################################################################################### - +Run Live PaddleOCR with OpenVINO +-------------------------------------------------------------------------- Use a webcam as the video input. By default, the primary webcam is set with ``source=0``. If you have multiple webcams, each one will be @@ -619,10 +616,8 @@ using a front-facing camera. Some web browsers, especially Mozilla Firefox, may cause flickering. If you experience flickering, set ``use_popup=True``. -.. note:: - - Popup mode may not work if you run this notebook on a remote computer. - + **NOTE**: Popup mode may not work if you run this notebook on a + remote computer. Run live PaddleOCR: @@ -638,12 +633,13 @@ Run live PaddleOCR: .. parsed-literal:: - [ WARN:0@10.144] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index - [ERROR:0@10.145] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range + [ WARN:0@10.896] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index + [ERROR:0@10.896] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range If you do not have a webcam, you can still run this demo with a video -file. Any `format supported by OpenCV `__ +file. Any `format supported by +OpenCV `__ will work. .. code:: ipython3 diff --git a/docs/notebooks/405-paddle-ocr-webcam-with-output_files/405-paddle-ocr-webcam-with-output_32_0.png b/docs/notebooks/405-paddle-ocr-webcam-with-output_files/405-paddle-ocr-webcam-with-output_32_0.png index 972ebfac5c743e..9898a0e1a32db0 100644 --- a/docs/notebooks/405-paddle-ocr-webcam-with-output_files/405-paddle-ocr-webcam-with-output_32_0.png +++ b/docs/notebooks/405-paddle-ocr-webcam-with-output_files/405-paddle-ocr-webcam-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a31272403eaeed49ca89304491a237c433dc80dd497c1b813a385baa0056e09f -size 590865 +oid sha256:7db1344607796de9ad2d984fc8becf0e32b1942a0e120d27476e26afe73d5cb0 +size 597986 diff --git a/docs/notebooks/405-paddle-ocr-webcam-with-output_files/index.html b/docs/notebooks/405-paddle-ocr-webcam-with-output_files/index.html index 103c98d3c8c649..792a5895be29d8 100644 --- a/docs/notebooks/405-paddle-ocr-webcam-with-output_files/index.html +++ b/docs/notebooks/405-paddle-ocr-webcam-with-output_files/index.html @@ -1,7 +1,7 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/405-paddle-ocr-webcam-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/405-paddle-ocr-webcam-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/405-paddle-ocr-webcam-with-output_files/


../
-405-paddle-ocr-webcam-with-output_32_0.png         16-Aug-2023 01:31              590865
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/405-paddle-ocr-webcam-with-output_files/


../
+405-paddle-ocr-webcam-with-output_32_0.png         31-Oct-2023 00:35              597986
 

diff --git a/docs/notebooks/406-3D-pose-estimation-with-output.rst b/docs/notebooks/406-3D-pose-estimation-with-output.rst index 4cef53e5c7fb38..1d99bdf996b7de 100644 --- a/docs/notebooks/406-3D-pose-estimation-with-output.rst +++ b/docs/notebooks/406-3D-pose-estimation-with-output.rst @@ -1,8 +1,6 @@ Live 3D Human Pose Estimation with OpenVINO =========================================== - - This notebook demonstrates live 3D Human Pose Estimation with OpenVINO via a webcam. We utilize the model `human-pose-estimation-3d-0001 `__ @@ -16,101 +14,114 @@ extension `__\ **and been using JupyterLab to run the demo as suggested in the ``README.md``** -.. note:: - - To use a webcam, you must run this Jupyter notebook on a + **NOTE**: *To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run on a remote server, the webcam will not work. However, you can still do inference on a video file in the final step. This demo utilizes the Python interface in ``Three.js`` integrated with WebGL to process data from the model inference. These results are processed and displayed in the - notebook. - -To ensure that the results are displayed correctly, run the code in a -recommended browser on one of the following operating systems: Ubuntu, -Windows: Chrome, macOS: Safari. + notebook.* -.. _top: +*To ensure that the results are displayed correctly, run the code in a +recommended browser on one of the following operating systems:* *Ubuntu, +Windows: Chrome* *macOS: Safari* -**Table of contents**: +**Table of contents:** -- `Prerequisites <#prerequisites>`__ -- `Imports <#imports>`__ -- `The model <#the-model>`__ - - `Download the model <#download-the-model>`__ - - `Convert Model to OpenVINO IR format <#convert-model-to-openvino-ir-format>`__ - - `Select inference device <#select-inference-device>`__ - - `Load the model <#load-the-model>`__ +- `Prerequisites <#prerequisites>`__ +- `Imports <#imports>`__ +- `The model <#the-model>`__ -- `Processing <#processing>`__ + - `Download the model <#download-the-model>`__ + - `Convert Model to OpenVINO IR + format <#convert-model-to-openvino-ir-format>`__ + - `Select inference device <#select-inference-device>`__ + - `Load the model <#load-the-model>`__ - - `Model Inference <#model-inference>`__ - - `Draw 2D Pose Overlays <#draw-2d-pose-overlays>`__ - - `Main Processing Function <#main-processing-function>`__ +- `Processing <#processing>`__ -- `Run <#run>`__ + - `Model Inference <#model-inference>`__ + - `Draw 2D Pose Overlays <#draw-d-pose-overlays>`__ + - `Main Processing Function <#main-processing-function>`__ - - `Run Live Pose Estimation <#run-live-pose-estimation>`__ - - `Run Pose Estimation on a Video File <#run-pose-estimation-on-a-video-file>`__ +- `Run <#run>`__ -Prerequisites `⇑ <#top>`__ -############################################################################################################################### + - `Run Live Pose Estimation <#run-live-pose-estimation>`__ + - `Run Pose Estimation on a Video + File <#run-pose-estimation-on-a-video-file>`__ +Prerequisites +------------------------------------------------------- -.. note:: - - The ``pythreejs`` extension may not display properly when using the latest - Jupyter Notebook release (2.4.1). Therefore, it is recommended to use - Jupyter Lab instead. +**The ``pythreejs`` extension may not display properly when using the +latest Jupyter Notebook release (2.4.1). Therefore, it is recommended to +use Jupyter Lab instead.** .. code:: ipython3 - !pip install pythreejs + %pip install pythreejs "openvino-dev==2023.1.0" .. parsed-literal:: Collecting pythreejs Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.0) + Requirement already satisfied: openvino-dev==2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2023.1.0) + Requirement already satisfied: addict>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (2.4.0) + Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (0.7.1) + Requirement already satisfied: jstyleson>=0.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (0.0.2) + Requirement already satisfied: networkx<=3.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (2.8.2) + Requirement already satisfied: numpy>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (1.24.3) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (4.8.1.78) + Requirement already satisfied: openvino-telemetry>=2022.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (2023.2.1) + Requirement already satisfied: pillow>=8.1.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (10.0.1) + Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (6.0.1) + Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (2.31.0) + Requirement already satisfied: texttable>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (1.7.0) + Requirement already satisfied: tqdm>=4.54.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (4.66.1) + Requirement already satisfied: openvino==2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (2023.1.0) + Requirement already satisfied: scipy<1.11,>=1.8 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev==2023.1.0) (1.10.1) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.1) Collecting ipydatawidgets>=1.1.1 (from pythreejs) - Obtaining dependency information for ipydatawidgets>=1.1.1 from https://files.pythonhosted.org/packages/f1/5b/e63c877c4c94382b66de5045e08ec8cd960e8a4d22f0d62a4dfb1f9e5ac6/ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (1.23.5) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.9.0) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.13.0) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.1.4) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.2) - Requirement already satisfied: widgetsnbextension~=4.0.7 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.8) - Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.8) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.4.2) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.0) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.6) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.39) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.16.1) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.2) - Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.7.1) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.8.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.3) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.6) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.2.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.2.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: six in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.1.4) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.9 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.9) + Requirement already satisfied: jupyterlab-widgets~=3.0.9 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.9) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev==2023.1.0) (3.3.1) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev==2023.1.0) (3.4) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev==2023.1.0) (2.0.7) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev==2023.1.0) (2023.7.22) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.6) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.39) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.16.1) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.5.0) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.8.0) + Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.3) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.9) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.0.1) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 Installing collected packages: traittypes, ipydatawidgets, pythreejs Successfully installed ipydatawidgets-4.3.5 pythreejs-2.4.2 traittypes-0.2.1 + Note: you may need to restart the kernel to use updated packages. -Imports `⇑ <#top>`__ -############################################################################################################################### - +Imports +------------------------------------------------- .. code:: ipython3 @@ -123,7 +134,7 @@ Imports `⇑ <#top>`__ import ipywidgets as widgets import numpy as np from IPython.display import clear_output, display - from openvino.runtime import Core + import openvino as ov sys.path.append("../utils") import notebook_utils as utils @@ -132,13 +143,11 @@ Imports `⇑ <#top>`__ import engine.engine3js as engine from engine.parse_poses import parse_poses -The model `⇑ <#top>`__ -############################################################################################################################### - - -Download the model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The model +--------------------------------------------------- +Download the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We use ``omz_downloader``, which is a command line tool from the ``openvino-dev`` package. ``omz_downloader`` automatically creates a @@ -179,14 +188,13 @@ directory structure and downloads the selected model. -Convert Model to OpenVINO IR format `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Convert Model to OpenVINO IR format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The selected model -comes from the public directory, which means it must be converted into -OpenVINO Intermediate Representation (OpenVINO IR). We use -``omz_converter`` to convert the ONNX format model to the OpenVINO IR -format. +The selected model comes from the public directory, which means it must +be converted into OpenVINO Intermediate Representation (OpenVINO IR). We +use ``omz_converter`` to convert the ONNX format model to the OpenVINO +IR format. .. code:: ipython3 @@ -204,30 +212,29 @@ format. .. parsed-literal:: ========== Converting human-pose-estimation-3d-0001 to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx + Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/model_zoo/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx ONNX check passed successfully. ========== Converting human-pose-estimation-3d-0001 to IR (FP32) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=/tmp/tmpgwxi10io --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False + Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False [ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11. - Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.1/openvino_2_0_transition_guide.html + Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /tmp/tmpgwxi10io/human-pose-estimation-3d-0001.xml - [ SUCCESS ] BIN file: /tmp/tmpgwxi10io/human-pose-estimation-3d-0001.bin + [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/406-3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml + [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/406-3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - core = Core() + core = ov.Core() device = widgets.Dropdown( options=core.available_devices + ["AUTO"], @@ -247,9 +254,8 @@ Select device from dropdown list for running inference using OpenVINO: -Load the model `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Load the model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Converted models are located in a fixed structure, which indicates vendor, model name and precision. @@ -262,11 +268,11 @@ created to infer the compiled model. .. code:: ipython3 # initialize inference engine - ie_core = Core() + core = ov.Core() # read the network and corresponding weights from file - model = ie_core.read_model(model=ir_model_path, weights=model_weights_path) + model = core.read_model(model=ir_model_path, weights=model_weights_path) # load the model on the specified device - compiled_model = ie_core.compile_model(model=model, device_name=device.value) + compiled_model = core.compile_model(model=model, device_name=device.value) infer_request = compiled_model.create_infer_request() input_tensor_name = model.inputs[0].get_any_name() @@ -290,15 +296,15 @@ heat maps, PAF (part affinity fields) and features. -Processing `⇑ <#top>`__ -############################################################################################################################### +Processing +---------------------------------------------------- -Model Inference `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Model Inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Frames captured from video files or the live webcam are used as the input for the 3D -model. This is how you obtain the output heat maps, PAF (part affinity -fields) and features. +Frames captured from video files or the live webcam are used as the +input for the 3D model. This is how you obtain the output heat maps, PAF +(part affinity fields) and features. .. code:: ipython3 @@ -331,13 +337,14 @@ fields) and features. return results -Draw 2D Pose Overlays `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Draw 2D Pose Overlays +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We need to define some connections between the joints in advance, so that we can draw the structure of the -human body in the resulting image after obtaining the inference results. -Joints are drawn as circles and limbs are drawn as lines. The code is -based on the `3D Human Pose Estimation +We need to define some connections between the joints in advance, so +that we can draw the structure of the human body in the resulting image +after obtaining the inference results. Joints are drawn as circles and +limbs are drawn as lines. The code is based on the `3D Human Pose +Estimation Demo `__ from Open Model Zoo. @@ -412,9 +419,8 @@ from Open Model Zoo. return frame -Main Processing Function `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Main Processing Function +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Run 3D pose estimation on the specified source. It could be either a webcam feed or a video file. @@ -577,13 +583,11 @@ webcam feed or a video file. if skeleton_set: engine3D.scene_remove(skeleton_set) -Run `⇑ <#top>`__ -############################################################################################################################### - - -Run Live Pose Estimation `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run +--------------------------------------------- +Run Live Pose Estimation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Run, using a webcam as the video input. By default, the primary webcam is set with ``source=0``. If you have multiple webcams, each one will be @@ -592,7 +596,7 @@ using a front-facing camera. Some web browsers, especially Mozilla Firefox, may cause flickering. If you experience flickering, set ``use_popup=True``. -.. note:: + **NOTE**: *1. To use this notebook with a webcam, you need to run the notebook on a computer with a webcam. If you run the notebook on a server @@ -601,7 +605,6 @@ Firefox, may cause flickering. If you experience flickering, set *2. Popup mode may not work if you run this notebook on a remote computer (e.g. Binder).* - Using the following method, you can click and move your mouse over the picture on the left to interact. @@ -609,9 +612,8 @@ picture on the left to interact. run_pose_estimation(source=0, flip=True, use_popup=False) -Run Pose Estimation on a Video File `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Run Pose Estimation on a Video File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you do not have a webcam, you can still run this demo with a video file. Any `format supported by diff --git a/docs/notebooks/407-person-tracking-with-output.rst b/docs/notebooks/407-person-tracking-with-output.rst index 9e11051c3f436c..067bc2c26bdd9b 100644 --- a/docs/notebooks/407-person-tracking-with-output.rst +++ b/docs/notebooks/407-person-tracking-with-output.rst @@ -1,8 +1,6 @@ Person Tracking with OpenVINO™ ============================== - - This notebook demonstrates live person tracking with OpenVINO: it reads frames from an input video sequence, detects people in the frames, uniquely identifies each one of them and tracks all of them until they @@ -95,44 +93,46 @@ realtime tracking,” in ICIP, 2016, pp. 3464–3468. .. |deepsort| image:: https://user-images.githubusercontent.com/91237924/221744683-0042eff8-2c41-43b8-b3ad-b5929bafb60b.png -.. _top: +**Table of contents:** -**Table of contents**: -- `Imports <#imports>`__ -- `Download the Model <#download-the-model>`__ -- `Load model <#load-model>`__ +- `Imports <#imports>`__ +- `Download the Model <#download-the-model>`__ +- `Load model <#load-model>`__ - - `Select inference device <#select-inference-device>`__ + - `Select inference device <#select-inference-device>`__ -- `Data Processing <#data-processing>`__ -- `Test person reidentification model <#test-person-reidentification-model>`__ +- `Data Processing <#data-processing>`__ +- `Test person reidentification + model <#test-person-reidentification-model>`__ - - `Visualize data <#visualize-data>`__ - - `Compare two persons <#compare-two-persons>`__ + - `Visualize data <#visualize-data>`__ + - `Compare two persons <#compare-two-persons>`__ -- `Main Processing Function <#main-processing-function>`__ -- `Run <#run>`__ +- `Main Processing Function <#main-processing-function>`__ +- `Run <#run>`__ - - `Initialize tracker <#initialize-tracker>`__ - - `Run Live Person Tracking <#run-live-person-tracking>`__ - - `Run Person Tracking on a Video File <#run-person-tracking-on-a-video-file>`__ + - `Initialize tracker <#initialize-tracker>`__ + - `Run Live Person Tracking <#run-live-person-tracking>`__ + - `Run Person Tracking on a Video + File <#run-person-tracking-on-a-video-file>`__ .. code:: ipython3 - !pip install -q "openvino-dev>=2023.0.0" - !pip install -q opencv-python matplotlib requests scipy + %pip install -q "openvino-dev>=2023.1.0" + %pip install -q opencv-python matplotlib requests scipy .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + Note: you may need to restart the kernel to use updated packages. -Imports `⇑ <#top>`__ -############################################################################################################################### +Imports +------------------------------------------------- .. code:: ipython3 @@ -145,7 +145,7 @@ Imports `⇑ <#top>`__ import cv2 from IPython import display import matplotlib.pyplot as plt - from openvino.runtime import Core + import openvino as ov .. code:: ipython3 @@ -167,11 +167,12 @@ Imports `⇑ <#top>`__ from deepsort_utils.nn_matching import NearestNeighborDistanceMetric from deepsort_utils.detection import Detection, compute_color_for_labels, xywh_to_xyxy, xywh_to_tlwh, tlwh_to_xyxy -Download the Model `⇑ <#top>`__ -############################################################################################################################### +Download the Model +------------------------------------------------------------ -We will use pre-trained models from OpenVINO’s `Open Model Zoo `__ -to start the test. +We will use pre-trained models from OpenVINO’s `Open Model +Zoo `__ to start the +test. Use ``omz_downloader``, which is a command-line tool from the ``openvino-dev`` package. It automatically creates a directory structure @@ -180,19 +181,22 @@ already downloaded. The selected model comes from the public directory, which means it must be converted into OpenVINO Intermediate Representation (OpenVINO IR). -.. note:: + **NOTE**: Using a model outside the list can require different pre- + and post-processing. - Using a model outside the list can require different pre- and post-processing. - -In this case, `person detection model `__ +In this case, `person detection +model `__ is deployed to detect the person in each frame of the video, and -`reidentification model `__ +`reidentification +model `__ is used to output embedding vector to match a pair of images of a person by the cosine distance. If you want to download another model (``person-detection-xxx`` from -`Object Detection Models list `__, -``person-reidentification-retail-xxx`` from `Reidentification Models list `__), +`Object Detection Models +list `__, +``person-reidentification-retail-xxx`` from `Reidentification Models +list `__), replace the name of the model in the code below. .. code:: ipython3 @@ -248,18 +252,16 @@ replace the name of the model in the code below. -Load model `⇑ <#top>`__ -############################################################################################################################### +Load model +---------------------------------------------------- Define a common class for model loading and predicting. There are four main steps for OpenVINO model initialization, and they -are required to run for only once before inference loop. - -1. Initialize OpenVINO Runtime. -2. Read the network from ``*.bin`` and ``*.xml`` files (weights and architecture). -3. Compile the model for device. -4. Get input and output names of nodes. +are required to run for only once before inference loop. 1. Initialize +OpenVINO Runtime. 2. Read the network from ``*.bin`` and ``*.xml`` files +(weights and architecture). 3. Compile the model for device. 4. Get +input and output names of nodes. In this case, we can put them all in a class constructor function. @@ -269,7 +271,7 @@ performance, but slightly longer startup time). .. code:: ipython3 - core = Core() + core = ov.Core() class Model: @@ -311,11 +313,10 @@ performance, but slightly longer startup time). result = self.compiled_model(input)[self.output_layer] return result -Select inference device `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO: +select device from dropdown list for running inference using OpenVINO .. code:: ipython3 @@ -345,15 +346,14 @@ Select device from dropdown list for running inference using OpenVINO: # since the number of detection object is uncertain, the input batch size of reid model should be dynamic extractor = Model(reidentification_model_path, -1, device.value) -Data Processing `⇑ <#top>`__ -############################################################################################################################### +Data Processing +--------------------------------------------------------- -Data Processing includes data preprocess and postprocess functions. - -- Data preprocess function is used to change the layout and shape of input data, - according to requirement of the network input format. -- Data postprocess function is used to extract the useful information from - network’s original output and visualize it. +Data Processing includes data preprocess and postprocess functions. - +Data preprocess function is used to change the layout and shape of input +data, according to requirement of the network input format. - Data +postprocess function is used to extract the useful information from +network’s original output and visualize it. .. code:: ipython3 @@ -465,16 +465,15 @@ Data Processing includes data preprocess and postprocess functions. """ return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2)) -Test person reidentification model `⇑ <#top>`__ -############################################################################################################################### - -The reidentification network outputs a blob with the ``(1, 256)`` shape named -``reid_embedding``, which can be compared with other descriptors using -the cosine distance. +Test person reidentification model +---------------------------------------------------------------------------- -Visualize data `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The reidentification network outputs a blob with the ``(1, 256)`` shape +named ``reid_embedding``, which can be compared with other descriptors +using the cosine distance. +Visualize data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -521,9 +520,8 @@ Visualize data `⇑ <#top>`__ .. image:: 407-person-tracking-with-output_files/407-person-tracking-with-output_17_3.png -Compare two persons `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - +Compare two persons +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 @@ -541,12 +539,11 @@ Compare two persons `⇑ <#top>`__ .. parsed-literal:: - Different person (confidence: 0.02726622298359871) + Different person (confidence: 0.02726624347269535) -Main Processing Function `⇑ <#top>`__ -############################################################################################################################### - +Main Processing Function +------------------------------------------------------------------ Run person tracking on the specified source. Either a webcam feed or a video file. @@ -700,14 +697,14 @@ video file. if use_popup: cv2.destroyAllWindows() -Run `⇑ <#top>`__ -############################################################################################################################### - +Run +--------------------------------------------- -Initialize tracker `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Initialize tracker +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Before running a new tracking task, we have to reinitialize a Tracker object +Before running a new tracking task, we have to reinitialize a Tracker +object .. code:: ipython3 @@ -723,14 +720,15 @@ Before running a new tracking task, we have to reinitialize a Tracker object n_init=3 ) -Run Live Person Tracking `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run Live Person Tracking +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Use a webcam as the video input. By default, the primary webcam is set with ``source=0``. If you have -multiple webcams, each one will be assigned a consecutive number -starting at 0. Set ``flip=True`` when using a front-facing camera. Some -web browsers, especially Mozilla Firefox, may cause flickering. If you -experience flickering, set ``use_popup=True``. +Use a webcam as the video input. By default, the primary webcam is set +with ``source=0``. If you have multiple webcams, each one will be +assigned a consecutive number starting at 0. Set ``flip=True`` when +using a front-facing camera. Some web browsers, especially Mozilla +Firefox, may cause flickering. If you experience flickering, set +``use_popup=True``. .. code:: ipython3 @@ -744,16 +742,16 @@ experience flickering, set ``use_popup=True``. .. parsed-literal:: - [ WARN:0@10.127] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index - [ERROR:0@10.127] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range - + [ WARN:0@10.524] global cap_v4l.cpp:982 open VIDEOIO(V4L2:/dev/video0): can't open camera by index + [ERROR:0@10.524] global obsensor_uvc_stream_channel.cpp:156 getStreamChannelGroup Camera index out of range -Run Person Tracking on a Video File `⇑ <#top>`__ -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Run Person Tracking on a Video File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you do not have a webcam, you can still run this demo with a video -file. Any `format supported by OpenCV `__ +file. Any `format supported by +OpenCV `__ will work. .. code:: ipython3 diff --git a/docs/notebooks/407-person-tracking-with-output_files/407-person-tracking-with-output_27_0.png b/docs/notebooks/407-person-tracking-with-output_files/407-person-tracking-with-output_27_0.png index 019e835e5d8f44..6b5f72a2b89b7c 100644 --- a/docs/notebooks/407-person-tracking-with-output_files/407-person-tracking-with-output_27_0.png +++ b/docs/notebooks/407-person-tracking-with-output_files/407-person-tracking-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9516ecd60d4a653c7f96eb72800e085c6ba50f45c8f192b41d2e431e38e2678 -size 218848 +oid sha256:6c0eaaa11158bb80f27922fcc4558301f457b8ec45c754c77c3515e67569b509 +size 218751 diff --git a/docs/notebooks/407-person-tracking-with-output_files/index.html b/docs/notebooks/407-person-tracking-with-output_files/index.html index 905ad6438206df..930b6759f65407 100644 --- a/docs/notebooks/407-person-tracking-with-output_files/index.html +++ b/docs/notebooks/407-person-tracking-with-output_files/index.html @@ -1,8 +1,8 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/407-person-tracking-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/407-person-tracking-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/407-person-tracking-with-output_files/


../
-407-person-tracking-with-output_17_3.png           16-Aug-2023 01:31              106259
-407-person-tracking-with-output_27_0.png           16-Aug-2023 01:31              218848
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/407-person-tracking-with-output_files/


../
+407-person-tracking-with-output_17_3.png           31-Oct-2023 00:35              106259
+407-person-tracking-with-output_27_0.png           31-Oct-2023 00:35              218751
 

diff --git a/docs/notebooks/index.html b/docs/notebooks/index.html index cd9a21671a42af..9ee8ff8112511e 100644 --- a/docs/notebooks/index.html +++ b/docs/notebooks/index.html @@ -1,166 +1,207 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/


../
-001-hello-world-with-output_files/                 16-Aug-2023 01:31                   -
-003-hello-segmentation-with-output_files/          16-Aug-2023 01:31                   -
-004-hello-detection-with-output_files/             16-Aug-2023 01:31                   -
-101-tensorflow-classification-to-openvino-with-..> 16-Aug-2023 01:31                   -
-102-pytorch-onnx-to-openvino-with-output_files/    16-Aug-2023 01:31                   -
-102-pytorch-to-openvino-with-output_files/         16-Aug-2023 01:31                   -
-103-paddle-to-openvino-classification-with-outp..> 16-Aug-2023 01:31                   -
-106-auto-device-with-output_files/                 16-Aug-2023 01:31                   -
-109-latency-tricks-with-output_files/              16-Aug-2023 01:31                   -
-109-throughput-tricks-with-output_files/           16-Aug-2023 01:31                   -
-110-ct-scan-live-inference-with-output_files/      16-Aug-2023 01:31                   -
-110-ct-segmentation-quantize-nncf-with-output_f..> 16-Aug-2023 01:31                   -
-111-yolov5-quantization-migration-with-output_f..> 16-Aug-2023 01:31                   -
-113-image-classification-quantization-with-outp..> 16-Aug-2023 01:31                   -
-115-async-api-with-output_files/                   16-Aug-2023 01:31                   -
-117-model-server-with-output_files/                16-Aug-2023 01:31                   -
-118-optimize-preprocessing-with-output_files/      16-Aug-2023 01:31                   -
-119-tflite-to-openvino-with-output_files/          16-Aug-2023 01:31                   -
-120-tensorflow-object-detection-to-openvino-wit..> 16-Aug-2023 01:31                   -
-201-vision-monodepth-with-output_files/            16-Aug-2023 01:31                   -
-202-vision-superresolution-image-with-output_files 16-Aug-2023 01:31                   -
-203-meter-reader-with-output_files/                16-Aug-2023 01:31                   -
-204-segmenter-semantic-segmentation-with-output..> 16-Aug-2023 01:31                   -
-205-vision-background-removal-with-output_files/   16-Aug-2023 01:31                   -
-206-vision-paddlegan-anime-with-output_files/      16-Aug-2023 01:31                   -
-207-vision-paddlegan-superresolution-with-outpu..> 16-Aug-2023 01:31                   -
-208-optical-character-recognition-with-output_f..> 16-Aug-2023 01:31                   -
-209-handwritten-ocr-with-output_files/             16-Aug-2023 01:31                   -
-211-speech-to-text-with-output_files/              16-Aug-2023 01:31                   -
-212-pyannote-speaker-diarization-with-output_files 16-Aug-2023 01:31                   -
-215-image-inpainting-with-output_files/            16-Aug-2023 01:31                   -
-216-attention-center-with-output_files/            16-Aug-2023 01:31                   -
-217-vision-deblur-with-output_files/               16-Aug-2023 01:31                   -
-218-vehicle-detection-and-recognition-with-outp..> 16-Aug-2023 01:31                   -
-220-cross-lingual-books-alignment-with-output_f..> 16-Aug-2023 01:31                   -
-222-vision-image-colorization-with-output_files/   16-Aug-2023 01:31                   -
-224-3D-segmentation-point-clouds-with-output_files 16-Aug-2023 01:31                   -
-225-stable-diffusion-text-to-image-with-output_..> 16-Aug-2023 01:31                   -
-226-yolov7-optimization-with-output_files/         16-Aug-2023 01:31                   -
-228-clip-zero-shot-convert-with-output_files/      16-Aug-2023 01:31                   -
-228-clip-zero-shot-quantize-with-output_files/     16-Aug-2023 01:31                   -
-230-yolov8-optimization-with-output_files/         16-Aug-2023 01:31                   -
-231-instruct-pix2pix-image-editing-with-output_..> 16-Aug-2023 01:31                   -
-233-blip-visual-language-processing-with-output..> 16-Aug-2023 01:31                   -
-234-encodec-audio-compression-with-output_files/   16-Aug-2023 01:31                   -
-235-controlnet-stable-diffusion-with-output_files/ 16-Aug-2023 01:31                   -
-236-stable-diffusion-v2-optimum-demo-comparison..> 16-Aug-2023 01:31                   -
-236-stable-diffusion-v2-optimum-demo-with-outpu..> 16-Aug-2023 01:31                   -
-236-stable-diffusion-v2-text-to-image-demo-with..> 16-Aug-2023 01:31                   -
-237-segment-anything-with-output_files/            16-Aug-2023 01:31                   -
-238-deep-floyd-if-with-output_files/               16-Aug-2023 01:31                   -
-239-image-bind-convert-with-output_files/          16-Aug-2023 01:31                   -
-241-riffusion-text-to-music-with-output_files/     16-Aug-2023 01:31                   -
-243-tflite-selfie-segmentation-with-output_files/  16-Aug-2023 01:31                   -
-246-depth-estimation-videpth-with-output_files/    16-Aug-2023 01:31                   -
-248-stable-diffusion-xl-with-output_files/         16-Aug-2023 01:31                   -
-249-oneformer-segmentation-with-output_files/      16-Aug-2023 01:31                   -
-301-tensorflow-training-openvino-with-output_files 16-Aug-2023 01:31                   -
-305-tensorflow-quantization-aware-training-with..> 16-Aug-2023 01:31                   -
-401-object-detection-with-output_files/            16-Aug-2023 01:31                   -
-402-pose-estimation-with-output_files/             16-Aug-2023 01:31                   -
-403-action-recognition-webcam-with-output_files/   16-Aug-2023 01:31                   -
-404-style-transfer-with-output_files/              16-Aug-2023 01:31                   -
-405-paddle-ocr-webcam-with-output_files/           16-Aug-2023 01:31                   -
-407-person-tracking-with-output_files/             16-Aug-2023 01:31                   -
-notebook_utils-with-output_files/                  16-Aug-2023 01:31                   -
-001-hello-world-with-output.rst                    16-Aug-2023 01:31                3886
-002-openvino-api-with-output.rst                   16-Aug-2023 01:31               32160
-003-hello-segmentation-with-output.rst             16-Aug-2023 01:31                5603
-004-hello-detection-with-output.rst                16-Aug-2023 01:31                6847
-101-tensorflow-classification-to-openvino-with-..> 16-Aug-2023 01:31                8858
-102-pytorch-onnx-to-openvino-with-output.rst       16-Aug-2023 01:31               17662
-102-pytorch-to-openvino-with-output.rst            16-Aug-2023 01:31               22399
-103-paddle-to-openvino-classification-with-outp..> 16-Aug-2023 01:31               15022
-104-model-tools-with-output.rst                    16-Aug-2023 01:31               20907
-105-language-quantize-bert-with-output.rst         16-Aug-2023 01:31               27178
-106-auto-device-with-output.rst                    16-Aug-2023 01:31               21069
-107-speech-recognition-quantization-data2vec-wi..> 16-Aug-2023 01:31              537722
-108-gpu-device-with-output.rst                     16-Aug-2023 01:31               53377
-109-latency-tricks-with-output.rst                 16-Aug-2023 01:31               22521
-109-throughput-tricks-with-output.rst              16-Aug-2023 01:31               25678
-110-ct-scan-live-inference-with-output.rst         16-Aug-2023 01:31               16024
-110-ct-segmentation-quantize-nncf-with-output.rst  16-Aug-2023 01:31               35892
-111-yolov5-quantization-migration-with-output.rst  16-Aug-2023 01:31               46306
-112-pytorch-post-training-quantization-nncf-wit..> 16-Aug-2023 01:31               28796
-113-image-classification-quantization-with-outp..> 16-Aug-2023 01:31               20160
-115-async-api-with-output.rst                      16-Aug-2023 01:31               18664
-116-sparsity-optimization-with-output.rst          16-Aug-2023 01:31               16901
-117-model-server-with-output.rst                   16-Aug-2023 01:31               20692
-118-optimize-preprocessing-with-output.rst         16-Aug-2023 01:31               22961
-119-tflite-to-openvino-with-output.rst             16-Aug-2023 01:31               10579
-120-tensorflow-object-detection-to-openvino-wit..> 16-Aug-2023 01:31               26589
-121-convert-to-openvino-with-output.rst            16-Aug-2023 01:31               83068
-201-vision-monodepth-with-output.rst               16-Aug-2023 01:31              967562
-202-vision-superresolution-image-with-output.rst   16-Aug-2023 01:31               25109
-203-meter-reader-with-output.rst                   16-Aug-2023 01:31               25544
-204-segmenter-semantic-segmentation-with-output..> 16-Aug-2023 01:31               28535
-205-vision-background-removal-with-output.rst      16-Aug-2023 01:31               14792
-206-vision-paddlegan-anime-with-output.rst         16-Aug-2023 01:31               20211
-207-vision-paddlegan-superresolution-with-outpu..> 16-Aug-2023 01:31               14872
-208-optical-character-recognition-with-output.rst  16-Aug-2023 01:31               25888
-209-handwritten-ocr-with-output.rst                16-Aug-2023 01:31               10851
-210-slowfast-video-recognition-with-output.rst     16-Aug-2023 01:31              782597
-211-speech-to-text-with-output.rst                 16-Aug-2023 01:31               87059
-212-pyannote-speaker-diarization-with-output.rst   16-Aug-2023 01:31             1293516
-213-question-answering-with-output.rst             16-Aug-2023 01:31               20417
-214-grammar-correction-with-output.rst             16-Aug-2023 01:31               19552
-215-image-inpainting-with-output.rst               16-Aug-2023 01:31                8755
-216-attention-center-with-output.rst               16-Aug-2023 01:31               11247
-217-vision-deblur-with-output.rst                  16-Aug-2023 01:31               10336
-218-vehicle-detection-and-recognition-with-outp..> 16-Aug-2023 01:31               16655
-219-knowledge-graphs-conve-with-output.rst         16-Aug-2023 01:31               20390
-220-cross-lingual-books-alignment-with-output.rst  16-Aug-2023 01:31               50713
-221-machine-translation-with-output.rst            16-Aug-2023 01:31                9632
-222-vision-image-colorization-with-output.rst      16-Aug-2023 01:31               13399
-223-text-prediction-with-output.rst                16-Aug-2023 01:31               25940
-224-3D-segmentation-point-clouds-with-output.rst   16-Aug-2023 01:31                8271
-225-stable-diffusion-text-to-image-with-output.rst 16-Aug-2023 01:31               42288
-226-yolov7-optimization-with-output.rst            16-Aug-2023 01:31               41509
-227-whisper-subtitles-generation-with-output.rst   16-Aug-2023 01:31               38576
-228-clip-zero-shot-convert-with-output.rst         16-Aug-2023 01:31               15301
-228-clip-zero-shot-quantize-with-output.rst        16-Aug-2023 01:31               11960
-229-distilbert-sequence-classification-with-out..> 16-Aug-2023 01:31                6947
-230-yolov8-optimization-with-output.rst            16-Aug-2023 01:31               75132
-231-instruct-pix2pix-image-editing-with-output.rst 16-Aug-2023 01:31               47035
-233-blip-visual-language-processing-with-output..> 16-Aug-2023 01:31               46332
-234-encodec-audio-compression-with-output.rst      16-Aug-2023 01:31             3862060
-235-controlnet-stable-diffusion-with-output.rst    16-Aug-2023 01:31               53511
-236-stable-diffusion-v2-infinite-zoom-with-outp..> 16-Aug-2023 01:31               64824
-236-stable-diffusion-v2-optimum-demo-comparison..> 16-Aug-2023 01:31                7142
-236-stable-diffusion-v2-optimum-demo-with-outpu..> 16-Aug-2023 01:31                7605
-236-stable-diffusion-v2-text-to-image-demo-with..> 16-Aug-2023 01:31               11300
-236-stable-diffusion-v2-text-to-image-with-outp..> 16-Aug-2023 01:31               44768
-237-segment-anything-with-output.rst               16-Aug-2023 01:31               97904
-238-deep-floyd-if-with-output.rst                  16-Aug-2023 01:31               29347
-239-image-bind-convert-with-output.rst             16-Aug-2023 01:31             2400458
-240-dolly-2-instruction-following-with-output.rst  16-Aug-2023 01:31               34200
-241-riffusion-text-to-music-with-output.rst        16-Aug-2023 01:31              633040
-242-freevc-voice-conversion-with-output.rst        16-Aug-2023 01:31              651968
-243-tflite-selfie-segmentation-with-output.rst     16-Aug-2023 01:31               21290
-244-named-entity-recognition-with-output.rst       16-Aug-2023 01:31               29820
-245-typo-detector-with-output.rst                  16-Aug-2023 01:31               28464
-246-depth-estimation-videpth-with-output.rst       16-Aug-2023 01:31               50251
-247-code-language-id-with-output.rst               16-Aug-2023 01:31               35934
-248-stable-diffusion-xl-with-output.rst            16-Aug-2023 01:31               21901
-249-oneformer-segmentation-with-output.rst         16-Aug-2023 01:31               14678
-301-tensorflow-training-openvino-with-output.rst   16-Aug-2023 01:31               41424
-302-pytorch-quantization-aware-training-with-ou..> 16-Aug-2023 01:31               29746
-305-tensorflow-quantization-aware-training-with..> 16-Aug-2023 01:31               23795
-401-object-detection-with-output.rst               16-Aug-2023 01:31               18234
-402-pose-estimation-with-output.rst                16-Aug-2023 01:31               15731
-403-action-recognition-webcam-with-output.rst      16-Aug-2023 01:31               25273
-404-style-transfer-with-output.rst                 16-Aug-2023 01:31               16087
-405-paddle-ocr-webcam-with-output.rst              16-Aug-2023 01:31               24119
-406-3D-pose-estimation-with-output.rst             16-Aug-2023 01:31               28035
-407-person-tracking-with-output.rst                16-Aug-2023 01:31               27202
-notebook_utils-with-output.rst                     16-Aug-2023 01:31               12520
-notebooks_tags.json                                16-Aug-2023 01:31                8554
-notebooks_with_binder_buttons.txt                  16-Aug-2023 01:31                1000
-notebooks_with_colab_buttons.txt                   16-Aug-2023 01:31                 782
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/


../
+001-hello-world-with-output_files/                 31-Oct-2023 00:35                   -
+003-hello-segmentation-with-output_files/          31-Oct-2023 00:35                   -
+004-hello-detection-with-output_files/             31-Oct-2023 00:35                   -
+101-tensorflow-classification-to-openvino-with-..> 31-Oct-2023 00:35                   -
+102-pytorch-onnx-to-openvino-with-output_files/    31-Oct-2023 00:35                   -
+102-pytorch-to-openvino-with-output_files/         31-Oct-2023 00:35                   -
+103-paddle-to-openvino-classification-with-outp..> 31-Oct-2023 00:35                   -
+106-auto-device-with-output_files/                 31-Oct-2023 00:35                   -
+109-latency-tricks-with-output_files/              31-Oct-2023 00:35                   -
+109-throughput-tricks-with-output_files/           31-Oct-2023 00:35                   -
+110-ct-scan-live-inference-with-output_files/      31-Oct-2023 00:35                   -
+110-ct-segmentation-quantize-nncf-with-output_f..> 31-Oct-2023 00:35                   -
+111-yolov5-quantization-migration-with-output_f..> 31-Oct-2023 00:35                   -
+113-image-classification-quantization-with-outp..> 31-Oct-2023 00:35                   -
+115-async-api-with-output_files/                   31-Oct-2023 00:35                   -
+117-model-server-with-output_files/                31-Oct-2023 00:35                   -
+118-optimize-preprocessing-with-output_files/      31-Oct-2023 00:35                   -
+119-tflite-to-openvino-with-output_files/          31-Oct-2023 00:35                   -
+120-tensorflow-instance-segmentation-to-openvin..> 31-Oct-2023 00:35                   -
+120-tensorflow-object-detection-to-openvino-wit..> 31-Oct-2023 00:35                   -
+123-detectron2-to-openvino-with-output_files/      31-Oct-2023 00:35                   -
+125-convnext-classification-with-output_files/     31-Oct-2023 00:35                   -
+126-tensorflow-hub-with-output_files/              31-Oct-2023 00:35                   -
+201-vision-monodepth-with-output_files/            31-Oct-2023 00:35                   -
+202-vision-superresolution-image-with-output_files 31-Oct-2023 00:35                   -
+203-meter-reader-with-output_files/                31-Oct-2023 00:35                   -
+204-segmenter-semantic-segmentation-with-output..> 31-Oct-2023 00:35                   -
+205-vision-background-removal-with-output_files/   31-Oct-2023 00:35                   -
+206-vision-paddlegan-anime-with-output_files/      31-Oct-2023 00:35                   -
+207-vision-paddlegan-superresolution-with-outpu..> 31-Oct-2023 00:35                   -
+208-optical-character-recognition-with-output_f..> 31-Oct-2023 00:35                   -
+209-handwritten-ocr-with-output_files/             31-Oct-2023 00:35                   -
+211-speech-to-text-with-output_files/              31-Oct-2023 00:35                   -
+212-pyannote-speaker-diarization-with-output_files 31-Oct-2023 00:35                   -
+215-image-inpainting-with-output_files/            31-Oct-2023 00:35                   -
+217-vision-deblur-with-output_files/               31-Oct-2023 00:35                   -
+218-vehicle-detection-and-recognition-with-outp..> 31-Oct-2023 00:35                   -
+220-cross-lingual-books-alignment-with-output_f..> 31-Oct-2023 00:35                   -
+222-vision-image-colorization-with-output_files/   31-Oct-2023 00:35                   -
+224-3D-segmentation-point-clouds-with-output_files 31-Oct-2023 00:35                   -
+225-stable-diffusion-text-to-image-with-output_..> 31-Oct-2023 00:35                   -
+226-yolov7-optimization-with-output_files/         31-Oct-2023 00:35                   -
+228-clip-zero-shot-convert-with-output_files/      31-Oct-2023 00:35                   -
+228-clip-zero-shot-quantize-with-output_files/     31-Oct-2023 00:35                   -
+230-yolov8-instance-segmentation-with-output_files 31-Oct-2023 00:35                   -
+230-yolov8-keypoint-detection-with-output_files/   31-Oct-2023 00:35                   -
+230-yolov8-object-detection-with-output_files/     31-Oct-2023 00:35                   -
+231-instruct-pix2pix-image-editing-with-output_..> 31-Oct-2023 00:35                   -
+232-clip-language-saliency-map-with-output_files/  31-Oct-2023 00:35                   -
+233-blip-convert-with-output_files/                31-Oct-2023 00:35                   -
+233-blip-optimize-with-output_files/               31-Oct-2023 00:35                   -
+234-encodec-audio-compression-with-output_files/   31-Oct-2023 00:35                   -
+235-controlnet-stable-diffusion-with-output_files/ 31-Oct-2023 00:35                   -
+236-stable-diffusion-v2-optimum-demo-comparison..> 31-Oct-2023 00:35                   -
+236-stable-diffusion-v2-optimum-demo-with-outpu..> 31-Oct-2023 00:35                   -
+236-stable-diffusion-v2-text-to-image-demo-with..> 31-Oct-2023 00:35                   -
+237-segment-anything-with-output_files/            31-Oct-2023 00:35                   -
+238-deep-floyd-if-optimize-with-output_files/      31-Oct-2023 00:35                   -
+239-image-bind-convert-with-output_files/          31-Oct-2023 00:35                   -
+241-riffusion-text-to-music-with-output_files/     31-Oct-2023 00:35                   -
+243-tflite-selfie-segmentation-with-output_files/  31-Oct-2023 00:35                   -
+246-depth-estimation-videpth-with-output_files/    31-Oct-2023 00:35                   -
+248-stable-diffusion-xl-with-output_files/         31-Oct-2023 00:35                   -
+249-oneformer-segmentation-with-output_files/      31-Oct-2023 00:35                   -
+251-tiny-sd-image-generation-with-output_files/    31-Oct-2023 00:35                   -
+257-llava-multimodal-chatbot-with-output_files/    31-Oct-2023 00:35                   -
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35                   -
+259-decidiffusion-image-generation-with-output_..> 31-Oct-2023 00:35                   -
+260-pix2struct-docvqa-with-output_files/           31-Oct-2023 00:35                   -
+261-fast-segment-anything-with-output_files/       31-Oct-2023 00:35                   -
+263-latent-consistency-models-image-generation-..> 31-Oct-2023 00:35                   -
+301-tensorflow-training-openvino-nncf-with-outp..> 31-Oct-2023 00:35                   -
+301-tensorflow-training-openvino-with-output_files 31-Oct-2023 00:35                   -
+305-tensorflow-quantization-aware-training-with..> 31-Oct-2023 00:35                   -
+401-object-detection-with-output_files/            31-Oct-2023 00:35                   -
+402-pose-estimation-with-output_files/             31-Oct-2023 00:35                   -
+403-action-recognition-webcam-with-output_files/   31-Oct-2023 00:35                   -
+404-style-transfer-with-output_files/              31-Oct-2023 00:35                   -
+405-paddle-ocr-webcam-with-output_files/           31-Oct-2023 00:35                   -
+407-person-tracking-with-output_files/             31-Oct-2023 00:35                   -
+notebook_utils-with-output_files/                  31-Oct-2023 00:35                   -
+001-hello-world-with-output.rst                    31-Oct-2023 00:35                5528
+002-openvino-api-with-output.rst                   31-Oct-2023 00:35               31911
+003-hello-segmentation-with-output.rst             31-Oct-2023 00:35                7250
+004-hello-detection-with-output.rst                31-Oct-2023 00:35                8354
+101-tensorflow-classification-to-openvino-with-..> 31-Oct-2023 00:35               10299
+102-pytorch-onnx-to-openvino-with-output.rst       31-Oct-2023 00:35               20461
+102-pytorch-to-openvino-with-output.rst            31-Oct-2023 00:35               25916
+103-paddle-to-openvino-classification-with-outp..> 31-Oct-2023 00:35               17486
+104-model-tools-with-output.rst                    31-Oct-2023 00:35               21502
+105-language-quantize-bert-with-output.rst         31-Oct-2023 00:35               20417
+106-auto-device-with-output.rst                    31-Oct-2023 00:35               24736
+107-speech-recognition-quantization-data2vec-wi..> 31-Oct-2023 00:35              970503
+108-gpu-device-with-output.rst                     31-Oct-2023 00:35               56803
+109-latency-tricks-with-output.rst                 31-Oct-2023 00:35               24713
+109-throughput-tricks-with-output.rst              31-Oct-2023 00:35               26977
+110-ct-scan-live-inference-with-output.rst         31-Oct-2023 00:35               18376
+110-ct-segmentation-quantize-nncf-with-output.rst  31-Oct-2023 00:35               38113
+111-yolov5-quantization-migration-with-output.rst  31-Oct-2023 00:35               50833
+112-pytorch-post-training-quantization-nncf-wit..> 31-Oct-2023 00:35               29881
+113-image-classification-quantization-with-outp..> 31-Oct-2023 00:35               22860
+115-async-api-with-output.rst                      31-Oct-2023 00:35               20748
+116-sparsity-optimization-with-output.rst          31-Oct-2023 00:35               18408
+117-model-server-with-output.rst                   31-Oct-2023 00:35               22391
+118-optimize-preprocessing-with-output.rst         31-Oct-2023 00:35               21598
+119-tflite-to-openvino-with-output.rst             31-Oct-2023 00:35               11796
+120-tensorflow-instance-segmentation-to-openvin..> 31-Oct-2023 00:35               25913
+120-tensorflow-object-detection-to-openvino-wit..> 31-Oct-2023 00:35               28132
+121-convert-to-openvino-with-output.rst            31-Oct-2023 00:35               90175
+122-speech-recognition-quantization-wav2vec2-wi..> 31-Oct-2023 00:35              482510
+122-yolov8-quantization-with-accuracy-control-w..> 31-Oct-2023 00:35               21676
+123-detectron2-to-openvino-with-output.rst         31-Oct-2023 00:35               16703
+124-hugging-face-hub-with-output.rst               31-Oct-2023 00:35               16482
+125-convnext-classification-with-output.rst        31-Oct-2023 00:35               10593
+126-tensorflow-hub-with-output.rst                 31-Oct-2023 00:35               15512
+201-vision-monodepth-with-output.rst               31-Oct-2023 00:35              969651
+202-vision-superresolution-image-with-output.rst   31-Oct-2023 00:35               28083
+202-vision-superresolution-video-with-output.rst   31-Oct-2023 00:35             9642402
+203-meter-reader-with-output.rst                   31-Oct-2023 00:35               27185
+204-segmenter-semantic-segmentation-with-output..> 31-Oct-2023 00:35               28012
+205-vision-background-removal-with-output.rst      31-Oct-2023 00:35               14676
+206-vision-paddlegan-anime-with-output.rst         31-Oct-2023 00:35               22317
+207-vision-paddlegan-superresolution-with-outpu..> 31-Oct-2023 00:35               18189
+208-optical-character-recognition-with-output.rst  31-Oct-2023 00:35               28750
+209-handwritten-ocr-with-output.rst                31-Oct-2023 00:35               13321
+210-slowfast-video-recognition-with-output.rst     31-Oct-2023 00:35              769241
+211-speech-to-text-with-output.rst                 31-Oct-2023 00:35               89249
+212-pyannote-speaker-diarization-with-output.rst   31-Oct-2023 00:35             1294876
+213-question-answering-with-output.rst             31-Oct-2023 00:35               23057
+214-grammar-correction-with-output.rst             31-Oct-2023 00:35               32428
+215-image-inpainting-with-output.rst               31-Oct-2023 00:35               10044
+217-vision-deblur-with-output.rst                  31-Oct-2023 00:35               12552
+218-vehicle-detection-and-recognition-with-outp..> 31-Oct-2023 00:35               18290
+219-knowledge-graphs-conve-with-output.rst         31-Oct-2023 00:35               26242
+220-cross-lingual-books-alignment-with-output.rst  31-Oct-2023 00:35               53274
+221-machine-translation-with-output.rst            31-Oct-2023 00:35               10182
+222-vision-image-colorization-with-output.rst      31-Oct-2023 00:35               19493
+224-3D-segmentation-point-clouds-with-output.rst   31-Oct-2023 00:35                9960
+225-stable-diffusion-text-to-image-with-output.rst 31-Oct-2023 00:35               56071
+226-yolov7-optimization-with-output.rst            31-Oct-2023 00:35               45974
+227-whisper-convert-with-output.rst                31-Oct-2023 00:35               18077
+227-whisper-nncf-quantize-with-output.rst          31-Oct-2023 00:35               21850
+228-clip-zero-shot-convert-with-output.rst         31-Oct-2023 00:35               15239
+228-clip-zero-shot-quantize-with-output.rst        31-Oct-2023 00:35               15739
+229-distilbert-sequence-classification-with-out..> 31-Oct-2023 00:35               13454
+230-yolov8-instance-segmentation-with-output.rst   31-Oct-2023 00:35               58539
+230-yolov8-keypoint-detection-with-output.rst      31-Oct-2023 00:35               55065
+230-yolov8-object-detection-with-output.rst        31-Oct-2023 00:35               58903
+231-instruct-pix2pix-image-editing-with-output.rst 31-Oct-2023 00:35               52684
+232-clip-language-saliency-map-with-output.rst     31-Oct-2023 00:35               34908
+233-blip-convert-with-output.rst                   31-Oct-2023 00:35               29348
+233-blip-optimize-with-output.rst                  31-Oct-2023 00:35               17131
+234-encodec-audio-compression-with-output.rst      31-Oct-2023 00:35             3864226
+235-controlnet-stable-diffusion-with-output.rst    31-Oct-2023 00:35               58531
+236-stable-diffusion-v2-infinite-zoom-with-outp..> 31-Oct-2023 00:35               52546
+236-stable-diffusion-v2-optimum-demo-comparison..> 31-Oct-2023 00:35                8059
+236-stable-diffusion-v2-optimum-demo-with-outpu..> 31-Oct-2023 00:35                8407
+236-stable-diffusion-v2-text-to-image-demo-with..> 31-Oct-2023 00:35               13860
+236-stable-diffusion-v2-text-to-image-with-outp..> 31-Oct-2023 00:35               44177
+237-segment-anything-with-output.rst               31-Oct-2023 00:35               64142
+238-deep-floyd-if-optimize-with-output.rst         31-Oct-2023 00:35               31042
+239-image-bind-convert-with-output.rst             31-Oct-2023 00:35             2397351
+240-dolly-2-instruction-following-with-output.rst  31-Oct-2023 00:35               31788
+241-riffusion-text-to-music-with-output.rst        31-Oct-2023 00:35              623769
+242-freevc-voice-conversion-with-output.rst        31-Oct-2023 00:35              653238
+243-tflite-selfie-segmentation-with-output.rst     31-Oct-2023 00:35               22673
+244-named-entity-recognition-with-output.rst       31-Oct-2023 00:35               23869
+245-typo-detector-with-output.rst                  31-Oct-2023 00:35               30340
+246-depth-estimation-videpth-with-output.rst       31-Oct-2023 00:35               53677
+247-code-language-id-with-output.rst               31-Oct-2023 00:35               38729
+248-stable-diffusion-xl-with-output.rst            31-Oct-2023 00:35               23133
+249-oneformer-segmentation-with-output.rst         31-Oct-2023 00:35               28917
+250-music-generation-with-output.rst               31-Oct-2023 00:35             1390751
+251-tiny-sd-image-generation-with-output.rst       31-Oct-2023 00:35               47330
+252-fastcomposer-image-generation-with-output.rst  31-Oct-2023 00:35               46698
+253-zeroscope-text2video-with-output.rst           31-Oct-2023 00:35             1875395
+254-llm-chatbot-with-output.rst                    31-Oct-2023 00:35               43239
+255-mms-massively-multilingual-speech-with-outp..> 31-Oct-2023 00:35             1388124
+256-bark-text-to-audio-with-output.rst             31-Oct-2023 00:35              698945
+257-llava-multimodal-chatbot-with-output.rst       31-Oct-2023 00:35               52293
+258-blip-diffusion-subject-generation-with-outp..> 31-Oct-2023 00:35               52630
+259-decidiffusion-image-generation-with-output.rst 31-Oct-2023 00:35               51032
+260-pix2struct-docvqa-with-output.rst              31-Oct-2023 00:35               14557
+261-fast-segment-anything-with-output.rst          31-Oct-2023 00:35               22553
+262-softvc-voice-conversion-with-output.rst        31-Oct-2023 00:35               10075
+263-latent-consistency-models-image-generation-..> 31-Oct-2023 00:35               34965
+301-tensorflow-training-openvino-nncf-with-outp..> 31-Oct-2023 00:35               32073
+301-tensorflow-training-openvino-with-output.rst   31-Oct-2023 00:35               34000
+302-pytorch-quantization-aware-training-with-ou..> 31-Oct-2023 00:35               30565
+305-tensorflow-quantization-aware-training-with..> 31-Oct-2023 00:35               22473
+401-object-detection-with-output.rst               31-Oct-2023 00:35               20565
+402-pose-estimation-with-output.rst                31-Oct-2023 00:35               17581
+403-action-recognition-webcam-with-output.rst      31-Oct-2023 00:35               27886
+404-style-transfer-with-output.rst                 31-Oct-2023 00:35               17562
+405-paddle-ocr-webcam-with-output.rst              31-Oct-2023 00:35               26070
+406-3D-pose-estimation-with-output.rst             31-Oct-2023 00:35               33937
+407-person-tracking-with-output.rst                31-Oct-2023 00:35               28861
+notebook_utils-with-output.rst                     31-Oct-2023 00:35               12520
+notebooks_tags.json                                31-Oct-2023 00:35               10406
+notebooks_with_binder_buttons.txt                  31-Oct-2023 00:35                1166
+notebooks_with_colab_buttons.txt                   31-Oct-2023 00:35                1569
 

diff --git a/docs/notebooks/notebook_utils-with-output.rst b/docs/notebooks/notebook_utils-with-output.rst index ea2ec2dffa9637..7d23c0ccae3320 100644 --- a/docs/notebooks/notebook_utils-with-output.rst +++ b/docs/notebooks/notebook_utils-with-output.rst @@ -7,12 +7,12 @@ OpenVINO™ Notebooks. The code is synchronized with the There are five categories: -- `Files <#Files>`__ -- `Images <#Images>`__ -- `Videos <#Videos>`__ -- `Visualization <#Visualization>`__ -- `OpenVINO Tools <#OpenVINO-Tools>`__ -- `Checks and Alerts <#Checks-and-Alerts>`__ +- `Files <#files>`__ +- `Images <#images>`__ +- `Videos <#videos>`__ +- `Visualization <#visualization>`__ +- `OpenVINO Tools <#openvino-tools>`__ +- `Checks and Alerts <#checks-and-alerts>`__ Each category contains a test cell that also shows how to use the functions in the section. @@ -26,8 +26,8 @@ functions in the section. .. parsed-literal:: - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 - DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 + DEPRECATION: pytorch-lightning 1.6.5 has a non-standard dependency specifier torch>=1.8.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063 Files @@ -91,7 +91,7 @@ Test File Functions .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-475/.workspace/scm/ov-notebook/notebooks/utils/Safety_Full_Hat_and_Vest.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-534/.workspace/scm/ov-notebook/notebooks/utils/Safety_Full_Hat_and_Vest.mp4 .. code:: ipython3 @@ -108,12 +108,12 @@ Test File Functions .. parsed-literal:: - openvino_notebooks_readme.md: 0%| | 0.00/10.9k [00:00This notebook requires OpenVINO 2022.1. The version on your system is: 2023.0.1-11005-fa1c41994f3-releases/2023/0.
Please run pip install --upgrade -r requirements.txt in the openvino_env environment to install this version. See the OpenVINO Notebooks README for detailed instructions +
This notebook requires OpenVINO 2022.1. The version on your system is: 2023.1.0-12185-9e6b00e51cd-releases/2023/1.
Please run pip install --upgrade -r requirements.txt in the openvino_env environment to install this version. See the OpenVINO Notebooks README for detailed instructions diff --git a/docs/notebooks/notebook_utils-with-output_files/index.html b/docs/notebooks/notebook_utils-with-output_files/index.html index 02c4762765eea9..bde25b66758af5 100644 --- a/docs/notebooks/notebook_utils-with-output_files/index.html +++ b/docs/notebooks/notebook_utils-with-output_files/index.html @@ -1,13 +1,13 @@ -Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/notebook_utils-with-output_files/ +Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/notebook_utils-with-output_files/ -

Index of /projects/ov-notebook/0.1.0-latest/20230815220807/dist/rst_files/notebook_utils-with-output_files/


../
-notebook_utils-with-output_12_0.jpg                16-Aug-2023 01:31              121563
-notebook_utils-with-output_12_0.png                16-Aug-2023 01:31              869307
-notebook_utils-with-output_26_0.png                16-Aug-2023 01:31               46994
-notebook_utils-with-output_41_0.png                16-Aug-2023 01:31               10059
-notebook_utils-with-output_41_1.png                16-Aug-2023 01:31               37584
-notebook_utils-with-output_41_2.png                16-Aug-2023 01:31               16690
-notebook_utils-with-output_41_3.png                16-Aug-2023 01:31               38992
+

Index of /projects/ov-notebook/0.1.0-latest/20231030220807/dist/rst_files/notebook_utils-with-output_files/


../
+notebook_utils-with-output_12_0.jpg                31-Oct-2023 00:35              121563
+notebook_utils-with-output_12_0.png                31-Oct-2023 00:35              869307
+notebook_utils-with-output_26_0.png                31-Oct-2023 00:35               45498
+notebook_utils-with-output_41_0.png                31-Oct-2023 00:35               10059
+notebook_utils-with-output_41_1.png                31-Oct-2023 00:35               37584
+notebook_utils-with-output_41_2.png                31-Oct-2023 00:35               16690
+notebook_utils-with-output_41_3.png                31-Oct-2023 00:35               38992
 

diff --git a/docs/notebooks/notebook_utils-with-output_files/notebook_utils-with-output_26_0.png b/docs/notebooks/notebook_utils-with-output_files/notebook_utils-with-output_26_0.png index 970378fa67d2f7..bfbb21457a5712 100644 --- a/docs/notebooks/notebook_utils-with-output_files/notebook_utils-with-output_26_0.png +++ b/docs/notebooks/notebook_utils-with-output_files/notebook_utils-with-output_26_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fe1fec7d917f0af31e6bee93a056bf53355c8bbff57d519a093c313d67ba1d6 -size 46994 +oid sha256:a447faf6463b3efa6f6d4c8bcedd6731e29c3807e9aa8dff94973e5e6c0263f5 +size 45498 diff --git a/docs/notebooks/notebooks_tags.json b/docs/notebooks/notebooks_tags.json index 090e04d579a9a7..1a102761264322 100644 --- a/docs/notebooks/notebooks_tags.json +++ b/docs/notebooks/notebooks_tags.json @@ -39,14 +39,12 @@ "107-speech-recognition-quantization-data2vec": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Transformers" ], "107-speech-recognition-quantization-wav2vec2": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Transformers" ], @@ -84,7 +82,6 @@ "112-pytorch-post-training-quantization-nncf": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Torchvision" ], @@ -127,18 +124,34 @@ "NNCF", "Pytorch" ], + "123-detectron2-to-openvino": [ + "Pytorch" + ], + "124-hugging-face-hub": [ + "Pytorch", + "Transformers" + ], + "125-convnext-classification": [ + "Pytorch", + "Torchvision" + ], + "125-lraspp-segmentation": [ + "Pytorch", + "Torchvision" + ], + "126-tensorflow-hub": [ + "Tensorflow" + ], "203-meter-reader": [ "Dynamic Shape", "Reshape Model" ], "204-segmenter-semantic-segmentation": [ "Benchmark Model", - "ONNX", "Pytorch", "Torchvision" ], "205-vision-background-removal": [ - "ONNX", "Pytorch" ], "206-vision-paddlegan-anime": [ @@ -154,9 +167,6 @@ "Download Model", "ONNX" ], - "209-handwritten-ocr": [ - "Download Model" - ], "210-slowfast-video-recognition": [ "ONNX", "Pytorch" @@ -165,7 +175,6 @@ "Convert Model", "Download Model", "Dynamic Shape", - "ONNX", "Pytorch", "Reshape Model" ], @@ -173,11 +182,8 @@ "ONNX", "Pytorch" ], - "213-question-answering": [ - "Convert Model", - "Download Model" - ], "214-grammar-correction": [ + "Async Inference", "Transformers" ], "216-attention-center": [ @@ -187,30 +193,22 @@ "Download Model", "Pytorch" ], - "218-vehicle-detection-and-recognition": [ - "Download Model" - ], "219-knowledge-graphs-conve": [ "Benchmark Model", - "ONNX", "Pytorch" ], "220-cross-lingual-books-alignment": [ "Async Inference", - "Optimize Model", "Pytorch", "Transformers" ], - "221-machine-translation": [ - "Download Model" - ], "222-vision-image-colorization": [ "Convert Model", "Download Model" ], "223-text-prediction": [ "Dynamic Shape", - "ONNX", + "Pytorch", "Transformers" ], "224-3D-segmentation-point-clouds": [ @@ -237,8 +235,6 @@ "Transformers" ], "228-clip-zero-shot-convert": [ - "ONNX", - "Pytorch", "Transformers" ], "228-clip-zero-shot-quantize": [ @@ -254,37 +250,39 @@ "230-yolov8-instance-segmentation": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Reshape Model" ], "230-yolov8-keypoint-detection": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Reshape Model" ], "230-yolov8-object-detection": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Reshape Model" ], "231-instruct-pix2pix-image-editing": [ - "ONNX", + "Benchmark Model", + "NNCF", "Pytorch", "Transformers" ], "232-clip-language-saliency-map": [ "Async Inference", - "ONNX", "Pytorch", "Transformers" ], - "233-blip-visual-language-processing": [ - "Dynamic Shape", + "233-blip-convert": [ + "Pytorch", + "Transformers" + ], + "233-blip-optimize": [ + "Benchmark Model", + "NNCF", "Pytorch", "Transformers" ], @@ -314,16 +312,20 @@ "237-segment-anything": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Torchvision" ], - "238-deep-floyd-if": [ + "238-deep-floyd-if-convert": [ + "Pytorch", + "Reshape Model" + ], + "238-deep-floyd-if-optimize": [ + "Benchmark Model", + "NNCF", "Pytorch", "Reshape Model" ], "239-image-bind-convert": [ - "ONNX", "Pytorch" ], "239-image-bind-quantize": [ @@ -335,21 +337,17 @@ "Transformers" ], "241-riffusion-text-to-music": [ - "Pytorch" + "Pytorch", + "Transformers" ], "242-freevc-voice-conversion": [ "ONNX", "Pytorch" ], "244-named-entity-recognition": [ - "ONNX", - "Pytorch", "Transformers" ], "245-typo-detector": [ - "ONNX", - "Optimize Model", - "Pytorch", "Transformers" ], "246-depth-estimation-videpth": [ @@ -361,7 +359,11 @@ "ONNX", "Transformers" ], + "248-stable-diffusion-xl": [ + "Transformers" + ], "249-oneformer-segmentation": [ + "NNCF", "Pytorch", "Transformers" ], @@ -390,6 +392,44 @@ "Reshape Model", "Transformers" ], + "255-mms-massively-multilingual-speech": [ + "NNCF", + "Pytorch", + "Transformers" + ], + "256-bark-text-to-audio": [ + "Dynamic Shape", + "Pytorch" + ], + "257-llava-multimodal-chatbot": [ + "Async Inference", + "NNCF", + "Pytorch", + "Transformers" + ], + "258-blip-diffusion-subject-generation": [ + "Dynamic Shape", + "Pytorch" + ], + "259-decidiffusion-image-generation": [ + "Pytorch", + "Transformers" + ], + "260-pix2struct-docvqa": [ + "Async Inference", + "Transformers" + ], + "261-fast-segment-anything": [ + "ONNX", + "Pytorch" + ], + "262-softvc-voice-conversion": [ + "Pytorch" + ], + "263-latent-consistency-models-image-generation": [ + "Pytorch", + "Transformers" + ], "301-tensorflow-training-openvino-nncf": [ "Benchmark Model", "NNCF", @@ -402,7 +442,6 @@ "302-pytorch-quantization-aware-training": [ "Benchmark Model", "NNCF", - "ONNX", "Pytorch", "Torchvision", "Train Model" diff --git a/docs/notebooks/notebooks_with_binder_buttons.txt b/docs/notebooks/notebooks_with_binder_buttons.txt index 1ea3afbe1bc7c1..ee9d747c1a629c 100644 --- a/docs/notebooks/notebooks_with_binder_buttons.txt +++ b/docs/notebooks/notebooks_with_binder_buttons.txt @@ -11,6 +11,11 @@ 115-async-api 120-tensorflow-object-detection-to-openvino 121-convert-to-openvino +123-detectron2-to-openvino +124-hugging-face-hub +125-convnext-classification +125-lraspp-segmentation +126-tensorflow-hub 201-vision-monodepth 202-vision-superresolution-image 202-vision-superresolution-video @@ -31,6 +36,7 @@ 243-tflite-selfie-segmentation 247-code-language-id 250-music-generation +261-fast-segment-anything 401-object-detection 402-pose-estimation 403-action-recognition-webcam diff --git a/docs/notebooks/notebooks_with_colab_buttons.txt b/docs/notebooks/notebooks_with_colab_buttons.txt index 082087e6ac4a32..5f9967df05dd15 100644 --- a/docs/notebooks/notebooks_with_colab_buttons.txt +++ b/docs/notebooks/notebooks_with_colab_buttons.txt @@ -1,31 +1,56 @@ +001-hello-world 002-openvino-api +003-hello-segmentation +004-hello-detection +101-tensorflow-classification-to-openvino 102-pytorch-to-openvino +103-paddle-to-openvino-classification +104-model-tools +105-language-quantize-bert +106-auto-device 107-speech-recognition-quantization-data2vec 111-yolov5-quantization-migration +113-image-classification-quantization 115-async-api 116-sparsity-optimization +118-optimize-preprocessing 119-tflite-to-openvino 120-tensorflow-object-detection-to-openvino 121-convert-to-openvino +123-detectron2-to-openvino +124-hugging-face-hub +125-convnext-classification +125-lraspp-segmentation +126-tensorflow-hub 201-vision-monodepth 202-vision-superresolution-image 202-vision-superresolution-video 204-segmenter-semantic-segmentation 205-vision-background-removal 206-vision-paddlegan-anime +207-vision-paddlegan-superresolution +208-optical-character-recognition +209-handwritten-ocr +211-speech-to-text +213-question-answering +216-attention-center +219-knowledge-graphs-conve 220-cross-lingual-books-alignment 221-machine-translation 223-text-prediction -227-whisper-subtitles-generation +224-3D-segmentation-point-clouds +227-whisper-convert +229-distilbert-sequence-classification 230-yolov8-instance-segmentation 230-yolov8-keypoint-detection 230-yolov8-object-detection -230-yolov8-optimization 232-clip-language-saliency-map 243-tflite-selfie-segmentation 244-named-entity-recognition 250-music-generation 251-tiny-sd-image-generation +260-pix2struct-docvqa +261-fast-segment-anything 305-tensorflow-quantization-aware-training 401-object-detection 404-style-transfer From 28279013af08c5d45852ab9c780efc431f7bbd60 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Mon, 6 Nov 2023 11:07:24 +0100 Subject: [PATCH 199/275] aligned lin build timeouts (#20885) --- .github/workflows/android_arm64.yml | 2 +- .github/workflows/linux_riscv.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 7f9f892cc82263..5a8076279b0f37 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -27,7 +27,7 @@ concurrency: jobs: Build: - timeout-minutes: 40 + timeout-minutes: 150 defaults: run: shell: bash diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml index cdeccfa643678a..2f79ea22c062b5 100644 --- a/.github/workflows/linux_riscv.yml +++ b/.github/workflows/linux_riscv.yml @@ -31,7 +31,7 @@ concurrency: jobs: Build: - timeout-minutes: 30 + timeout-minutes: 150 defaults: run: shell: bash From 1083b3b58c234cb1c7c16727c585946da98b969a Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 6 Nov 2023 13:46:03 +0100 Subject: [PATCH 200/275] [core]Migrate Erf operator to new API (#20867) * Migrate Erf operator to new API * Remove `visit_attributes` is same as base class * Optimize reference implementation for size --- src/core/include/openvino/op/erf.hpp | 5 +- .../include/openvino/reference/erf.hpp | 34 ++++--- src/core/src/op/erf.cpp | 94 ++++++++----------- 3 files changed, 65 insertions(+), 68 deletions(-) diff --git a/src/core/include/openvino/op/erf.hpp b/src/core/include/openvino/op/erf.hpp index 63f20687f65b9b..29d6b46545cea0 100644 --- a/src/core/include/openvino/op/erf.hpp +++ b/src/core/include/openvino/op/erf.hpp @@ -21,11 +21,8 @@ class OPENVINO_API Erf : public util::UnaryElementwiseArithmetic { /// \param arg Node that produces the input tensor. Erf(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/erf.hpp b/src/core/reference/include/openvino/reference/erf.hpp index ea69fe98bd6ca3..d928ab9087fa49 100644 --- a/src/core/reference/include/openvino/reference/erf.hpp +++ b/src/core/reference/include/openvino/reference/erf.hpp @@ -6,22 +6,34 @@ #include #include -#include + +#include "openvino/reference/utils/type_util.hpp" namespace ov { namespace reference { -template ::value, bool>::type = true> -void erf(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(std::erf(arg[i])); - } +namespace func { + +template ::value>::type* = nullptr> +T erf(const T v) { + return static_cast(std::round(std::erf(v))); +} + +template ()>::type* = nullptr> +T erf(const T v) { + return std::erf(v); } +} // namespace func -template ::value, bool>::type = true> -void erf(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(std::round(std::erf(arg[i]))); - } +/** + * @brief Reference implementation of Erf operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ +template +void erf(const T* arg, T* out, const size_t count) { + std::transform(arg, arg + count, out, func::erf); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/erf.cpp b/src/core/src/op/erf.cpp index e471065a94279b..1315453b2d626a 100644 --- a/src/core/src/op/erf.cpp +++ b/src/core/src/op/erf.cpp @@ -2,79 +2,67 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/erf.hpp" +#include "openvino/op/erf.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/log.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/util.hpp" #include "openvino/reference/erf.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { -bool ngraph::op::v0::Erf::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Erf_visit_attributes); - return true; -} +namespace erf { +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg, Tensor& out, const size_t count) { + reference::erf(arg.data(), out.data(), count); + return true; + } +}; +} // namespace erf +namespace v0 { -shared_ptr op::Erf::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Erf::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Erf_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -op::Erf::Erf(const Output& arg) : UnaryElementwiseArithmetic(arg) { +Erf::Erf(const Output& arg) : UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace erfop { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::erf(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} - -bool evaluate_erf(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); +bool Erf::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Erf_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_erf, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_erf, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_erf, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_erf, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_erf, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_erf, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace erfop + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); -bool op::Erf::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Erf_evaluate); - return erfop::evaluate_erf(inputs[0], outputs[0], shape_size(inputs[0]->get_shape())); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool op::Erf::has_evaluate() const { +bool Erf::has_evaluate() const { OV_OP_SCOPE(v0_Erf_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From d0eb27bd3b3876049a65c4a76c37c5dfbda5a000 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Mon, 6 Nov 2023 16:57:05 +0400 Subject: [PATCH 201/275] [TF FE] Support Complex Tensors (#20860) * [TF FE] Support complex tensors Signed-off-by: Kazantsev, Roman * Align output type for Real and Imag operations Signed-off-by: Kazantsev, Roman * Update decoding complex types * Add support for ComplexAbs, FFT and IFFT operations Signed-off-by: Kazantsev, Roman * Correct axes based on a number of inner-most dimensions * Add layer tests Signed-off-by: Kazantsev, Roman * Update supported ops documentation Signed-off-by: Kazantsev, Roman * Add a comment for ComplexTypeMark Signed-off-by: Kazantsev, Roman --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow/docs/supported_ops.md | 32 +-- .../tensorflow/src/decoder_proto.cpp | 10 +- src/frontends/tensorflow/src/op_table.cpp | 16 ++ .../include/common_op_table.hpp | 7 + .../include/helper_ops/complex_type_mark.hpp | 51 +++++ .../tensorflow_common/include/utils.hpp | 7 +- .../tensorflow_common/src/op/complex.cpp | 61 ++++++ .../tensorflow_common/src/op/complex_abs.cpp | 61 ++++++ .../tensorflow_common/src/op/fft.cpp | 64 ++++++ .../tensorflow_common/src/op/ifft.cpp | 64 ++++++ .../tensorflow_common/src/op/irfft.cpp | 64 ++++++ .../tensorflow_common/src/op/real_imag.cpp | 55 +++++ .../tensorflow_common/src/op/rfft.cpp | 59 ++++++ .../tensorflow_common/src/op/roll.cpp | 45 +++- src/frontends/tensorflow_common/src/utils.cpp | 39 +++- .../tensorflow_tests/test_tf_ComplexFFT.py | 196 ++++++++++++++++++ tools/mo/openvino/tools/mo/convert_impl.py | 4 - 17 files changed, 804 insertions(+), 31 deletions(-) create mode 100644 src/frontends/tensorflow_common/include/helper_ops/complex_type_mark.hpp create mode 100644 src/frontends/tensorflow_common/src/op/complex.cpp create mode 100644 src/frontends/tensorflow_common/src/op/complex_abs.cpp create mode 100644 src/frontends/tensorflow_common/src/op/fft.cpp create mode 100644 src/frontends/tensorflow_common/src/op/ifft.cpp create mode 100644 src/frontends/tensorflow_common/src/op/irfft.cpp create mode 100644 src/frontends/tensorflow_common/src/op/real_imag.cpp create mode 100644 src/frontends/tensorflow_common/src/op/rfft.cpp create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md index e9b9a499f55a76..32bd4caef5bc37 100644 --- a/src/frontends/tensorflow/docs/supported_ops.md +++ b/src/frontends/tensorflow/docs/supported_ops.md @@ -218,8 +218,8 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | CollectiveReduceV2 | NO | | | CollectiveReduceV3 | NO | | | CombinedNonMaxSuppression | NO | | -| Complex | NO | | -| ComplexAbs | NO | | +| Complex | YES | | +| ComplexAbs | YES | | | CompositeTensorVariantFromComponents | NO | | | CompositeTensorVariantToComponents | NO | | | CompressElement | NO | | @@ -425,9 +425,9 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | ExtractImagePatches | YES | | | ExtractJpegShape | NO | | | ExtractVolumePatches | NO | | -| FFT | NO | | -| FFT2D | NO | | -| FFT3D | NO | | +| FFT | YES | | +| FFT2D | YES | | +| FFT3D | YES | | | FIFOQueue | YES | | | FIFOQueueV2 | YES | | | Fact | NO | | @@ -492,12 +492,12 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | HashTableV2 | YES | | | HistogramFixedWidth | NO | | | HistogramSummary | NO | | -| IFFT | NO | | -| IFFT2D | NO | | -| IFFT3D | NO | | -| IRFFT | NO | | -| IRFFT2D | NO | | -| IRFFT3D | NO | | +| IFFT | YES | | +| IFFT2D | YES | | +| IFFT3D | YES | | +| IRFFT | YES | | +| IRFFT2D | YES | | +| IRFFT3D | YES | | | Identity | YES | | | IdentityN | YES | | | IdentityReader | NO | | @@ -507,7 +507,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | IgammaGradA | NO | | | Igammac | NO | | | IgnoreErrorsDataset | NO | | -| Imag | NO | | +| Imag | YES | | | ImageProjectiveTransformV2 | NO | | | ImageProjectiveTransformV3 | NO | | | ImageSummary | NO | | @@ -826,9 +826,9 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | QueueIsClosedV2 | NO | | | QueueSize | NO | | | QueueSizeV2 | NO | | -| RFFT | NO | | -| RFFT2D | NO | | -| RFFT3D | NO | | +| RFFT | YES | | +| RFFT2D | YES | | +| RFFT3D | YES | | | RGBToHSV | NO | | | RaggedBincount | NO | | | RaggedCountSparseOutput | NO | | @@ -876,7 +876,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV | ReaderRestoreStateV2 | NO | | | ReaderSerializeState | NO | | | ReaderSerializeStateV2 | NO | | -| Real | NO | | +| Real | YES | | | RealDiv | YES | | | RebatchDataset | NO | | | RebatchDatasetV2 | NO | | diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp index 9e0a53efb6d09f..573e850850ce16 100644 --- a/src/frontends/tensorflow/src/decoder_proto.cpp +++ b/src/frontends/tensorflow/src/decoder_proto.cpp @@ -113,10 +113,14 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { case ::tensorflow::AttrValue::ValueCase::kType: { auto atype = attrs[0].type(); - if (atype != ::tensorflow::DT_STRING) { - return get_ov_type(attrs[0].type()); - } else { + if (atype == ::tensorflow::DT_STRING) { return ov::Any("DT_STRING"); + } else if (atype == ::tensorflow::DT_COMPLEX64) { + return ov::Any("DT_COMPLEX64"); + } else if (atype == ::tensorflow::DT_COMPLEX128) { + return ov::Any("DT_COMPLEX128"); + } else { + return get_ov_type(atype); } } diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index e5f25dad31270a..176525fa7ef643 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -154,6 +154,8 @@ const std::map get_supported_ops() { {"CheckNumerics", CreatorFunction(translate_identity_op)}, {"CheckNumericsV2", CreatorFunction(translate_identity_op)}, {"ClipByValue", CreatorFunction(translate_clip_by_value_op)}, + {"Complex", CreatorFunction(translate_complex_op)}, + {"ComplexAbs", CreatorFunction(translate_complex_abs_op)}, {"Concat", CreatorFunction(translate_concat_op)}, {"ConcatV2", CreatorFunction(translate_concat_op)}, {"Const", CreatorFunction(translate_const_op)}, @@ -178,6 +180,9 @@ const std::map get_supported_ops() { {"FakeQuantWithMinMaxVars", CreatorFunction(translate_fake_quant_op)}, {"FakeQuantWithMinMaxVarsPerChannel", CreatorFunction(translate_fake_quant_op)}, {"FakeQuantWithMinMaxArgs", CreatorFunction(translate_fake_quant_with_min_max_args)}, + {"FFT", CreatorFunction(translate_fft_op)}, + {"FFT2D", CreatorFunction(translate_fft_op)}, + {"FFT3D", CreatorFunction(translate_fft_op)}, {"FIFOQueue", CreatorFunction(translate_fifo_queue_op)}, {"FIFOQueueV2", CreatorFunction(translate_fifo_queue_op)}, {"Fill", CreatorFunction(translate_fill_op)}, @@ -196,7 +201,14 @@ const std::map get_supported_ops() { {"IdentityN", CreatorFunction(translate_identity_n_op)}, {"Inv", CreatorFunction(translate_inv_op)}, {"If", CreatorFunction(translate_if_op)}, + {"IFFT", CreatorFunction(translate_ifft_op)}, + {"IFFT2D", CreatorFunction(translate_ifft_op)}, + {"IFFT3D", CreatorFunction(translate_ifft_op)}, + {"Imag", CreatorFunction(translate_real_imag_op)}, {"input_arg", CreatorFunction(translate_input_arg_op)}, + {"IRFFT", CreatorFunction(translate_irfft_op)}, + {"IRFFT2D", CreatorFunction(translate_irfft_op)}, + {"IRFFT3D", CreatorFunction(translate_irfft_op)}, {"Iterator", CreatorFunction(translate_iterator_op)}, {"IteratorGetNext", CreatorFunction(translate_iterator_get_next_op)}, {"IteratorV2", CreatorFunction(translate_iterator_op)}, @@ -248,6 +260,7 @@ const std::map get_supported_ops() { {"Rank", CreatorFunction(translate_rank_op)}, {"RandomUniform", CreatorFunction(translate_random_uniform_op)}, {"RandomUniformInt", CreatorFunction(translate_random_uniform_int_op)}, + {"Real", CreatorFunction(translate_real_imag_op)}, {"Reciprocal", CreatorFunction(translate_reciprocal_op)}, {"Relu6", CreatorFunction(translate_relu_6_op)}, {"Reshape", CreatorFunction(translate_reshape_op)}, @@ -257,6 +270,9 @@ const std::map get_supported_ops() { {"ResizeBilinear", CreatorFunction(translate_interpolate_op)}, {"ResizeNearestNeighbor", CreatorFunction(translate_interpolate_op)}, {"ResourceGather", CreatorFunction(translate_resource_gather_op)}, + {"RFFT", CreatorFunction(translate_rfft_op)}, + {"RFFT2D", CreatorFunction(translate_rfft_op)}, + {"RFFT3D", CreatorFunction(translate_rfft_op)}, {"Roll", CreatorFunction(translate_roll_op)}, {"Round", CreatorFunction(translate_round_op)}, {"Rsqrt", CreatorFunction(translate_rsqrt_op)}, diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 6befa470761a45..6b5d83d4c2bb84 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -46,6 +46,8 @@ OP_CONVERTER(translate_broadcast_to_op); OP_CONVERTER(translate_bucketize_op); OP_CONVERTER(translate_cast_op); OP_CONVERTER(translate_clip_by_value_op); +OP_CONVERTER(translate_complex_op); +OP_CONVERTER(translate_complex_abs_op); OP_CONVERTER(translate_concat_op); OP_CONVERTER(translate_const_op); OP_CONVERTER(translate_conv_2d_op); @@ -66,6 +68,7 @@ OP_CONVERTER(translate_expand_dims_op); OP_CONVERTER(translate_extract_image_patches_op); OP_CONVERTER(translate_fake_quant_op); OP_CONVERTER(translate_fake_quant_with_min_max_args); +OP_CONVERTER(translate_fft_op); OP_CONVERTER(translate_fill_op); OP_CONVERTER(translate_floor_div_op); OP_CONVERTER_NAMED(translate_fused_batch_norm_op); @@ -75,11 +78,13 @@ OP_CONVERTER(translate_gather_nd_op); OP_CONVERTER(translate_gather_tree_op); OP_CONVERTER(translate_identity_op); OP_CONVERTER(translate_identity_n_op); +OP_CONVERTER(translate_ifft_op); OP_CONVERTER(translate_input_arg_op); OP_CONVERTER(translate_inv_op); OP_CONVERTER(translate_invert_permutation_op); OP_CONVERTER(translate_output_arg_op); OP_CONVERTER(translate_interpolate_op); +OP_CONVERTER(translate_irfft_op); OP_CONVERTER(translate_is_finite_op); OP_CONVERTER(translate_is_inf_op); OP_CONVERTER(translate_is_nan_op); @@ -109,6 +114,7 @@ OP_CONVERTER(translate_range_op); OP_CONVERTER(translate_rank_op); OP_CONVERTER(translate_random_uniform_op); OP_CONVERTER(translate_random_uniform_int_op); +OP_CONVERTER(translate_real_imag_op); OP_CONVERTER(translate_relu_6_op); OP_CONVERTER(translate_reciprocal_op); OP_CONVERTER(translate_reshape_op); @@ -116,6 +122,7 @@ OP_CONVERTER(translate_resource_gather_op); OP_CONVERTER(translate_reverse_op); OP_CONVERTER(translate_reverse_v2_op); OP_CONVERTER(translate_reverse_sequence_op); +OP_CONVERTER(translate_rfft_op); OP_CONVERTER(translate_roll_op); OP_CONVERTER(translate_round_op); OP_CONVERTER(translate_rsqrt_op); diff --git a/src/frontends/tensorflow_common/include/helper_ops/complex_type_mark.hpp b/src/frontends/tensorflow_common/include/helper_ops/complex_type_mark.hpp new file mode 100644 index 00000000000000..5d0e2d5eb2d140 --- /dev/null +++ b/src/frontends/tensorflow_common/include/helper_ops/complex_type_mark.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/type/element_type.hpp" +#include "openvino/op/util/framework_node.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow { + +// ComplexTypeMark serves to mark places that require complex type propagation +// that means to represent native complex type with simulating floating-point tensor +// that has one extra dimension to concatenate real and imaginary parts of complex tensor. +// For example, a tensor of complex type with shape [N1, N2, ..., Nk] will be transformed +// into a floating-point tensor [N1, N2, ..., Nk, 2] +// where a slice with index [..., 0] represents a real part and +// a slice with index [..., 1] represents a imaginary part. +class ComplexTypeMark : public ov::op::util::FrameworkNode { +public: + OPENVINO_OP("ComplexTypeMark", "util", ov::op::util::FrameworkNode); + + ComplexTypeMark(const ov::Output& input, const ov::element::Type& complex_part_type) + : ov::op::util::FrameworkNode(ov::OutputVector{input}, 1), + m_complex_part_type(complex_part_type) { + validate_and_infer_types(); + } + + void validate_and_infer_types() override { + set_output_type(0, ov::element::dynamic, PartialShape::dynamic()); + } + + std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override { + auto complex_type_mark = std::make_shared(inputs[0], m_complex_part_type); + complex_type_mark->set_attrs(get_attrs()); + return complex_type_mark; + } + + ov::element::Type get_complex_part_type() const { + return m_complex_part_type; + } + +private: + ov::element::Type m_complex_part_type; +}; + +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/include/utils.hpp b/src/frontends/tensorflow_common/include/utils.hpp index acca76aaab8dcc..1fa5d0083fde55 100644 --- a/src/frontends/tensorflow_common/include/utils.hpp +++ b/src/frontends/tensorflow_common/include/utils.hpp @@ -88,7 +88,10 @@ void fill_explicit_pads_vectors(const NodeContext& node, ov::CoordinateDiff& pads_begin, ov::CoordinateDiff& pads_end); -void default_op_checks(const NodeContext& node, size_t min_input_size, const std::vector& supported_ops); +void default_op_checks(const NodeContext& node, + size_t min_input_size, + const std::vector& supported_ops, + bool supported_complex = false); ov::Output get_elements_number_1d(const Output& output, ov::element::Type output_type, @@ -155,6 +158,8 @@ ov::Output get_data_slice(const ov::Output& data, const int64_t& stop, const int64_t& step); +ov::Output compute_broadcast_args(const ov::Output& shape1, const ov::Output& shape2); + } // namespace tensorflow } // namespace frontend } // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/complex.cpp b/src/frontends/tensorflow_common/src/op/complex.cpp new file mode 100644 index 00000000000000..db686e928d7785 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/complex.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_complex_op(const NodeContext& node) { + default_op_checks(node, 2, {"Complex"}, true); + auto real = node.get_input(0); + auto imag = node.get_input(1); + auto tout = node.get_attribute("Tout", "DT_COMPLEX64"); + element::Type complex_part_type = (tout == "DT_COMPLEX64" ? element::f32 : element::f64); + + // compute target shape to which real and imag parts must be broadcasted + // and broadcast them + auto real_shape = make_shared(real, element::i32); + auto imag_shape = make_shared(imag, element::i32); + auto target_shape = compute_broadcast_args(real_shape, imag_shape); + real = make_shared(real, target_shape); + imag = make_shared(imag, target_shape); + + // expand real and imaginary parts with one dimension in the end for further concatenation + // this way, complex tensor with real and imag of shapes [N1, N2, ..., Nk] will be represented as floating-point + // tensor of shape [N1, N2, ..., Nk, 2] + auto real_rank = compute_subgraph_scalar_rank(real, element::i32, false); + real = make_shared(real, real_rank); + imag = make_shared(imag, real_rank); + + // concatenate real and imaginary parts to have a complex tensor represented as a floating-point tensor of shape + // [N1, N2, ..., Nk, 2] + auto complex_tensor = make_shared(OutputVector{real, imag}, -1)->output(0); + complex_tensor = make_shared(complex_tensor, complex_part_type); + + // set node name and tensor + set_node_name(node.get_name(), complex_tensor.get_node_shared_ptr()); + + // create complex type mark operation for upcoming operations in a graph + auto complex_type_mark = make_shared(complex_tensor, complex_part_type); + return complex_type_mark->outputs(); +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/complex_abs.cpp b/src/frontends/tensorflow_common/src/op/complex_abs.cpp new file mode 100644 index 00000000000000..008bc369320ebe --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/complex_abs.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/reduce_sum.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_complex_abs_op(const NodeContext& node) { + default_op_checks(node, 1, {"ComplexAbs"}, true); + auto op_type = node.get_op_type(); + auto x = node.get_input(0); + auto tout = node.get_attribute("Tout", element::f32); + + // check that complex type mark is set to the input + auto complex_type_mark = as_type_ptr(x.get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION(node, + complex_type_mark, + "[TensorFlow Frontend] internal error: ComplexTypeMark is not set to input of " + op_type); + auto complex_part_type = complex_type_mark->get_complex_part_type(); + // data is complex tensor representation in a form [N1, N2, ..., Nk, 2] + // where slice [N1, N2, ..., Nk, 0] contains real part of the complex tensor and + // slice [N1, N2, ..., Nk, 1] contains imaginary part of the complex tensor + auto data = complex_type_mark->input_value(0); + + // compute element-wise square for complex representation + auto const_two = make_shared(complex_part_type, Shape{}, 2); + auto squared_data = make_shared(data, const_two); + + // compute sum of squared real and imaginary parts + auto const_minus_one = make_shared(element::i32, Shape{}, -1); + auto complex_abs = make_shared(squared_data, const_minus_one, false)->output(0); + + // compute ComplexAbs by root-squared operation + auto const_half = make_shared(complex_part_type, Shape{}, 0.5f); + complex_abs = make_shared(complex_abs, const_half); + + // aling output type required by tout attribute + complex_abs = make_shared(complex_abs, tout); + + set_node_name(node.get_name(), complex_abs.get_node_shared_ptr()); + return {complex_abs}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/fft.cpp b/src/frontends/tensorflow_common/src/op/fft.cpp new file mode 100644 index 00000000000000..b46a7ce91757cf --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/fft.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/dft.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_fft_op(const NodeContext& node) { + default_op_checks(node, 1, {"FFT", "FFT2D", "FFT3D"}, true); + auto op_type = node.get_op_type(); + auto input = node.get_input(0); + + // check that ComplexTypeMark is set + auto complex_type_mark = as_type_ptr(input.get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + complex_type_mark, + "[TensorFlow Frontend] internal error: ComplexTypeMark is not set to input for " + op_type); + auto data = complex_type_mark->input_value(0); + auto complex_part_type = complex_type_mark->get_complex_part_type(); + + // compute a number of inner-most dimensions + int32_t num_axes = 1; + if (op_type == "FFT2D") { + num_axes = 2; + } else if (op_type == "FFT3D") { + num_axes = 3; + } + + // compute axes along which to compute FFT + auto const_num_axes = make_shared(element::i32, Shape{}, num_axes); + auto const_one = make_shared(element::i32, Shape{}, 1); + auto data_rank = compute_subgraph_scalar_rank(data, element::i32, true); + // exclude the last dimension since it concatenated real and imaginary parts + auto data_rank_minus_one = make_shared(data_rank, const_one); + auto start = make_shared(data_rank_minus_one, const_num_axes); + auto axes = make_shared(start, data_rank_minus_one, const_one, element::i32); + + // compute FFT and align its output type + auto fft = make_shared(data, axes); + set_node_name(node.get_name(), fft); + + // insert ComplexTypeMark since FFT generates output of complex type + complex_type_mark = make_shared(fft, complex_part_type); + + return {complex_type_mark}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/ifft.cpp b/src/frontends/tensorflow_common/src/op/ifft.cpp new file mode 100644 index 00000000000000..927d7934f549ee --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/ifft.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/idft.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_ifft_op(const NodeContext& node) { + default_op_checks(node, 1, {"IFFT", "IFFT2D", "IFFT3D"}, true); + auto op_type = node.get_op_type(); + auto input = node.get_input(0); + + // check that ComplexTypeMark is set + auto complex_type_mark = as_type_ptr(input.get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + complex_type_mark, + "[TensorFlow Frontend] internal error: ComplexTypeMark is not set to input for " + op_type); + auto data = complex_type_mark->input_value(0); + auto complex_part_type = complex_type_mark->get_complex_part_type(); + + // compute a number of inner-most dimensions + int32_t num_axes = 1; + if (op_type == "IFFT2D") { + num_axes = 2; + } else if (op_type == "IFFT3D") { + num_axes = 3; + } + + // compute axes along which to compute inverse FFT + auto const_num_axes = make_shared(element::i32, Shape{}, num_axes); + auto const_one = make_shared(element::i32, Shape{}, 1); + auto data_rank = compute_subgraph_scalar_rank(data, element::i32, true); + // exclude the last dimension since it concatenated real and imaginary parts + auto data_rank_minus_one = make_shared(data_rank, const_one); + auto start = make_shared(data_rank_minus_one, const_num_axes); + auto axes = make_shared(start, data_rank_minus_one, const_one, element::i32); + + // compute inverse FFT and align its output type + auto ifft = make_shared(data, axes); + set_node_name(node.get_name(), ifft); + + // insert ComplexTypeMark since IFFT generates output of complex type + complex_type_mark = make_shared(ifft, complex_part_type); + + return {complex_type_mark}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/irfft.cpp b/src/frontends/tensorflow_common/src/op/irfft.cpp new file mode 100644 index 00000000000000..5fc1f08d35c158 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/irfft.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/irdft.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_irfft_op(const NodeContext& node) { + default_op_checks(node, 2, {"IRFFT", "IRFFT2D", "IRFFT3D"}, true); + auto op_type = node.get_op_type(); + auto input = node.get_input(0); + auto fft_length = node.get_input(1); + auto treal = node.get_attribute("Treal", element::f32); + + auto complex_type_mark = as_type_ptr(input.get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + complex_type_mark, + "[TensorFlow Frontend] internal error: ComplexTypeMark is not created before " + op_type + " operation."); + + // compute a number of inner-most dimensions + int32_t num_axes = 1; + if (op_type == "IRFFT2D") { + num_axes = 2; + } else if (op_type == "IRFFT3D") { + num_axes = 3; + } + + // compute axes along which to compute inverse RFFT + auto const_num_axes = make_shared(element::i32, Shape{}, num_axes); + auto data = complex_type_mark->input_value(0); + auto data_rank = compute_subgraph_scalar_rank(data, element::i32, true); + auto const_one = make_shared(element::i32, Shape{}, 1); + auto data_rank_minus_one = make_shared(data_rank, const_one); + auto start = make_shared(data_rank_minus_one, const_num_axes); + auto axes = make_shared(start, data_rank_minus_one, const_one, element::i32); + auto irdft = make_shared(complex_type_mark->input_value(0), axes, fft_length)->output(0); + + // no need to insert ComplexTypeMark because operation generates a floating-point tensor + irdft = make_shared(irdft, treal); + set_node_name(node.get_name(), irdft.get_node_shared_ptr()); + + return {irdft}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/real_imag.cpp b/src/frontends/tensorflow_common/src/op/real_imag.cpp new file mode 100644 index 00000000000000..7b237ac4db443b --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/real_imag.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/gather.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_real_imag_op(const NodeContext& node) { + default_op_checks(node, 1, {"Real", "Imag"}, true); + auto op_type = node.get_op_type(); + auto input = node.get_input(0); + auto tout = node.get_attribute("Tout", element::f32); + // Complex tensor is represented as a floating-point tensor of shape [N1, N2, ..., Nk, 2] + // where real part is placed in the slice by last dimension [..., 0] and + // imaginary part is placed by index [..., 1] + int32_t axis_value = (op_type == "Real") ? 0 : 1; + + // check that complex type mark is set at the input + auto complex_type_mark = as_type_ptr(input.get_node_shared_ptr()); + TENSORFLOW_OP_VALIDATION( + node, + complex_type_mark, + "[TensorFlow Frontend] internal error: ComplexTypeMark is not set at the input of " + op_type); + auto data = complex_type_mark->input_value(0); + + // gather the required slice corresponding to Real or Imaginary part + auto gather_index = make_shared(element::i32, Shape{}, axis_value); + auto gather_axis = make_shared(element::i32, Shape{1}, -1); + auto complex_part = make_shared(data, gather_index, gather_axis)->output(0); + + // align output type required by tout attribute + complex_part = make_shared(complex_part, tout); + + set_node_name(node.get_name(), complex_part.get_node_shared_ptr()); + + return {complex_part}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/rfft.cpp b/src/frontends/tensorflow_common/src/op/rfft.cpp new file mode 100644 index 00000000000000..7e38c8651a9058 --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/rfft.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/core/any.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/rdft.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" + +using namespace std; +using namespace ov; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_rfft_op(const NodeContext& node) { + default_op_checks(node, 2, {"RFFT", "RFFT2D", "RFFT3D"}); + auto op_type = node.get_op_type(); + auto input = node.get_input(0); + auto fft_length = node.get_input(1); + auto tcomplex = node.get_attribute("Tcomplex", "DT_COMPLEX64"); + element::Type complex_part_type = (tcomplex == "DT_COMPLEX64" ? element::f32 : element::f64); + + // compute a number of inner-most dimension of the input signal + int32_t num_axes = 1; + if (op_type == "RFFT2D") { + num_axes = 2; + } else if (op_type == "RFFT3D") { + num_axes = 3; + } + + // compute axes along which to compute inverse RFFT + auto const_num_axes = make_shared(element::i32, Shape{}, num_axes); + auto input_rank = compute_subgraph_scalar_rank(input, element::i32, true); + auto start = make_shared(input_rank, const_num_axes); + auto const_one = make_shared(element::i32, Shape{}, 1); + auto axes = make_shared(start, input_rank, const_one, element::i32); + + // compute real FFT and align its output type + auto rfft = make_shared(input, axes, fft_length)->output(0); + rfft = make_shared(rfft, complex_part_type); + set_node_name(node.get_name(), rfft.get_node_shared_ptr()); + + // insert ComplexTypeMark since RFFT generates output of complex type + auto complex_type_mark = make_shared(rfft, complex_part_type); + + return {complex_type_mark}; +} + +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/roll.cpp b/src/frontends/tensorflow_common/src/op/roll.cpp index 3f53e178a38572..6ed227c0eef52a 100644 --- a/src/frontends/tensorflow_common/src/op/roll.cpp +++ b/src/frontends/tensorflow_common/src/op/roll.cpp @@ -2,12 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/roll.hpp" + #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "helper_ops/complex_type_mark.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/floor_mod.hpp" +#include "openvino/op/subtract.hpp" +#include "utils.hpp" using namespace std; using namespace ov; -using namespace ov::opset8; +using namespace ov::op; using namespace ov::frontend::tensorflow; namespace ov { @@ -15,12 +22,38 @@ namespace frontend { namespace tensorflow { namespace op { ov::OutputVector translate_roll_op(const NodeContext& node) { - auto data = node.get_input(0); + default_op_checks(node, 3, {"Roll"}, true); + auto input = node.get_input(0); auto shift = node.get_input(1); auto axis = node.get_input(2); - auto res = std::make_shared(data, shift, axis); - set_node_name(node.get_name(), res); - return res->outputs(); + + // check if complex type mark is set + // if yes, sinking it through Roll operation further + auto complex_type_mark = as_type_ptr(input.get_node_shared_ptr()); + element::Type complex_part_type = element::dynamic; + if (complex_type_mark) { + input = complex_type_mark->input_value(0); + complex_part_type = complex_type_mark->get_complex_part_type(); + + // axes can be negative so we need to adjust them + // since the last dimension for complex type case is auxiliary (not real) + axis = make_shared(axis, element::i64); + auto input_rank = compute_subgraph_scalar_rank(input, element::i64, true); + auto const_one = make_shared(element::i64, Shape{}, 1); + auto input_rank_minus_one = make_shared(input_rank, const_one)->output(0); + + // adjust axis to make them non-negative + axis = make_shared(axis, input_rank_minus_one); + } + + auto roll = std::make_shared(input, shift, axis)->output(0); + set_node_name(node.get_name(), roll.get_node_shared_ptr()); + + if (complex_type_mark) { + roll = make_shared(roll, complex_part_type)->output(0); + } + + return {roll}; } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/src/utils.cpp b/src/frontends/tensorflow_common/src/utils.cpp index 5e65bb7dae2e1b..adf736d3b2cf84 100644 --- a/src/frontends/tensorflow_common/src/utils.cpp +++ b/src/frontends/tensorflow_common/src/utils.cpp @@ -7,6 +7,7 @@ #include #include "common_op_table.hpp" +#include "helper_ops/complex_type_mark.hpp" #include "openvino/opsets/opset10.hpp" using namespace ov; @@ -242,7 +243,10 @@ OutputVector translate_convolution_op(const frontend::NodeContext& node, size_t return {conv}; } -void default_op_checks(const frontend::NodeContext& node, size_t min_input_size, const vector& supported_ops) { +void default_op_checks(const frontend::NodeContext& node, + size_t min_input_size, + const vector& supported_ops, + bool supported_complex) { auto op_type = node.get_op_type(); TENSORFLOW_OP_VALIDATION(node, find(supported_ops.begin(), supported_ops.end(), op_type) != supported_ops.end(), @@ -250,6 +254,21 @@ void default_op_checks(const frontend::NodeContext& node, size_t min_input_size, TENSORFLOW_OP_VALIDATION(node, node.get_input_size() >= min_input_size, op_type + " must have at least " + to_string(min_input_size) + " inputs."); + + // check if it supports complex type in case complex type input + bool has_input_complex_type = false; + auto input_size = static_cast(node.get_input_size()); + for (int input_ind = 0; input_ind < input_size; ++input_ind) { + auto node_input = node.get_input(input_ind); + if (as_type_ptr(node_input.get_node_shared_ptr())) { + has_input_complex_type = true; + break; + } + } + TENSORFLOW_OP_VALIDATION( + node, + !has_input_complex_type || supported_complex, + "[TensorFlow Frontend] internal error: translator for " + op_type + " does not support input complex type"); } bool is_conditional_edge(const string& input_tensor_name) { @@ -356,6 +375,24 @@ Output get_data_slice(const Output& data, const int64_t& start, cons return make_shared(data, start_const, stop_const, step_const)->output(0); } +Output compute_broadcast_args(const Output& shape1, const Output& shape2) { + // compute a number of shape elements to append for broadcasting + auto size0 = make_shared(shape1); + auto size1 = make_shared(shape2); + auto max_size = make_shared(size0, size1); + auto diff1 = make_shared(max_size, size0); + auto diff2 = make_shared(max_size, size1); + + // pad the shortest shape value with minus ones + // to take dynamic shapes into account + auto const_zero = create_same_type_const(diff1, std::vector{0}, Shape{1}); + auto const_one = create_same_type_const_scalar(shape1, 1); + auto padded_s0 = make_shared(shape1, diff1, const_zero, const_one, ov::op::PadMode::CONSTANT); + auto padded_s1 = make_shared(shape2, diff2, const_zero, const_one, ov::op::PadMode::CONSTANT); + + auto broadcasted_shape = make_shared(padded_s0, padded_s1); + return broadcasted_shape->output(0); +} } // namespace tensorflow } // namespace frontend } // namespace ov diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py b/tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py new file mode 100644 index 00000000000000..14586b3bba3805 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py @@ -0,0 +1,196 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestComplexFFT(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + rng = np.random.default_rng() + assert 'param_real' in inputs_info + assert 'param_imag' in inputs_info + param_real_shape = inputs_info['param_real'] + param_imag_shape = inputs_info['param_imag'] + inputs_data = {} + inputs_data['param_real'] = 4 * rng.random(param_real_shape).astype(np.float32) - 2 + inputs_data['param_imag'] = 4 * rng.random(param_imag_shape).astype(np.float32) - 2 + return inputs_data + + def create_complex_fft_net(self, input_shape, shift_roll, axis_roll, fft_op): + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real') + param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag') + shift = tf.constant(shift_roll, dtype=tf.int32) + axis = tf.constant(axis_roll, dtype=tf.int32) + complex = tf.raw_ops.Complex(real=param_real, imag=param_imag) + roll = tf.raw_ops.Roll(input=complex, shift=shift, axis=axis) + fft = fft_op(input=roll) + real = tf.raw_ops.Real(input=fft) + imag = tf.raw_ops.Imag(input=fft) + tf.raw_ops.Pack(values=[real, imag], axis=-1) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[1, 50, 2], shift_roll=[10, 1], axis_roll=[-2, -1]), + dict(input_shape=[4, 20, 3], shift_roll=[2, 10], axis_roll=[0, 1]), + dict(input_shape=[1, 50, 50, 2], shift_roll=[10, 20], axis_roll=[-2, -1]), + dict(input_shape=[4, 20, 30, 3], shift_roll=[2, 10], axis_roll=[0, 1]), + dict(input_shape=[1, 50, 50, 30, 2], shift_roll=[10, 20, 4], axis_roll=[-3, -2, -1]), + dict(input_shape=[4, 20, 30, 10, 3], shift_roll=[2, 10], axis_roll=[1, 2]), + ] + + @pytest.mark.parametrize("fft_op", [ + tf.raw_ops.FFT, tf.raw_ops.FFT2D, tf.raw_ops.FFT3D, + tf.raw_ops.IFFT, tf.raw_ops.IFFT2D, tf.raw_ops.IFFT3D + ]) + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_complex_fft_basic(self, params, fft_op, + ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test( + *self.create_complex_fft_net(**params, fft_op=fft_op), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api, custom_eps=1e-2) + + +class TestComplexAbs(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + rng = np.random.default_rng() + assert 'param_real' in inputs_info + assert 'param_imag' in inputs_info + param_real_shape = inputs_info['param_real'] + param_imag_shape = inputs_info['param_imag'] + inputs_data = {} + inputs_data['param_real'] = 4 * rng.random(param_real_shape).astype(np.float32) - 2 + inputs_data['param_imag'] = 4 * rng.random(param_imag_shape).astype(np.float32) - 2 + return inputs_data + + def create_complex_abs_net(self, input_shape): + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real') + param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag') + complex = tf.raw_ops.Complex(real=param_real, imag=param_imag) + tf.raw_ops.ComplexAbs(x=complex) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[]), + dict(input_shape=[2]), + dict(input_shape=[1, 3]), + dict(input_shape=[2, 3, 4]), + dict(input_shape=[3, 4, 5, 6]), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_complex_abs_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test( + *self.create_complex_abs_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestComplexRFFT(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + rng = np.random.default_rng() + assert 'param' in inputs_info + param_shape = inputs_info['param'] + inputs_data = {} + inputs_data['param'] = 4 * rng.random(param_shape).astype(np.float32) - 2 + return inputs_data + + def create_complex_rfft_net(self, input_shape, fft_length, rfft_op): + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + param = tf.compat.v1.placeholder(np.float32, input_shape, 'param') + fft_length_const = tf.constant(fft_length, dtype=tf.int32) + rfft = rfft_op(input=param, fft_length=fft_length_const) + real = tf.raw_ops.Real(input=rfft) + imag = tf.raw_ops.Imag(input=rfft) + tf.raw_ops.Pack(values=[real, imag], axis=-1) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[1, 3, 20], fft_length=[10], rfft_op=tf.raw_ops.RFFT), + dict(input_shape=[1, 3, 20], fft_length=[20], rfft_op=tf.raw_ops.RFFT), + dict(input_shape=[1, 3, 20, 40], fft_length=[20, 10], rfft_op=tf.raw_ops.RFFT2D), + dict(input_shape=[1, 3, 20, 40], fft_length=[10, 40], rfft_op=tf.raw_ops.RFFT2D), + dict(input_shape=[1, 2, 10, 20, 5], fft_length=[2, 5, 3], rfft_op=tf.raw_ops.RFFT3D), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_complex_rfft_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test( + *self.create_complex_rfft_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestComplexIRFFT(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + rng = np.random.default_rng() + assert 'param_real' in inputs_info + assert 'param_imag' in inputs_info + param_real_shape = inputs_info['param_real'] + param_imag_shape = inputs_info['param_imag'] + inputs_data = {} + inputs_data['param_real'] = 4 * rng.random(param_real_shape).astype(np.float32) - 2 + inputs_data['param_imag'] = 4 * rng.random(param_imag_shape).astype(np.float32) - 2 + return inputs_data + + def create_complex_irfft_net(self, input_shape, fft_length, irfft_op): + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real') + param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag') + fft_length_const = tf.constant(fft_length, dtype=tf.int32) + complex = tf.raw_ops.Complex(real=param_real, imag=param_imag) + irfft_op(input=complex, fft_length=fft_length_const) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[1, 3, 20], fft_length=[10], irfft_op=tf.raw_ops.IRFFT), + dict(input_shape=[1, 3, 20], fft_length=[20], irfft_op=tf.raw_ops.IRFFT), + dict(input_shape=[1, 3, 20, 40], fft_length=[20, 10], irfft_op=tf.raw_ops.IRFFT2D), + dict(input_shape=[1, 3, 20, 40], fft_length=[10, 40], irfft_op=tf.raw_ops.IRFFT2D), + pytest.param(dict(input_shape=[1, 10, 20, 30, 5], fft_length=[2, 3, 4], irfft_op=tf.raw_ops.IRFFT3D), + marks=pytest.mark.xfail(reason="accuracy-issue-TBD")) + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_complex_irfft_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test( + *self.create_complex_irfft_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) diff --git a/tools/mo/openvino/tools/mo/convert_impl.py b/tools/mo/openvino/tools/mo/convert_impl.py index 9d683f4b6ac977..3a6df79daa69ab 100644 --- a/tools/mo/openvino/tools/mo/convert_impl.py +++ b/tools/mo/openvino/tools/mo/convert_impl.py @@ -314,10 +314,6 @@ def update_fallback_with_conversion_error(use_new_frontend: bool, is_tf: bool, e all_fallback_operations = [ # corresponds to TF1 While operation "LoopCond", "Enter", "NextIteration", "Exit", "Switch", "Merge", - # corresponds to operations with complex tensors - "FFT", "FFT2D", "FFT3D", "IFFT", "IFFT2D", "IFFT3D", - "RFFT", "RFFT2D", "RFFT3D", "IRFFT", "IRFFT2D", "IRFFT3D", - "Complex", "ComplexAbs", "Real", "Imag", ] if len(conversion_error_match) < 1 or len(conversion_error_match[0]) != 4: # no match for the fallback by unsupported operation From 3036a3d2498e8f6d744f253db506536a589f6210 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Mon, 6 Nov 2023 18:17:44 +0100 Subject: [PATCH 202/275] [DOCS] improving the "conversion" section v2 (#20887) adjustments to conversion and workflow --- .../ov_workflow_diagram_convenience.svg | 3 ++ .../ov_workflow_diagram_performance.svg | 3 ++ docs/articles_en/openvino_workflow.md | 43 +++++++++++------ .../openvino_workflow/model_preparation.md | 46 ++++++++++--------- 4 files changed, 61 insertions(+), 34 deletions(-) create mode 100644 docs/_static/images/ov_workflow_diagram_convenience.svg create mode 100644 docs/_static/images/ov_workflow_diagram_performance.svg diff --git a/docs/_static/images/ov_workflow_diagram_convenience.svg b/docs/_static/images/ov_workflow_diagram_convenience.svg new file mode 100644 index 00000000000000..50867b24996215 --- /dev/null +++ b/docs/_static/images/ov_workflow_diagram_convenience.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc2855ac007644a2562362bc7a8786c93b3d1d3e96ba733eec9a6c03f63a8c9 +size 160830 diff --git a/docs/_static/images/ov_workflow_diagram_performance.svg b/docs/_static/images/ov_workflow_diagram_performance.svg new file mode 100644 index 00000000000000..1af57d3d1ab642 --- /dev/null +++ b/docs/_static/images/ov_workflow_diagram_performance.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a013860e4b2f942c5632bae8e3dfade266cfdcad2e34f6371ea8b1873e18f75b +size 178797 diff --git a/docs/articles_en/openvino_workflow.md b/docs/articles_en/openvino_workflow.md index 5453c03426cd3f..9b2f75991eec51 100644 --- a/docs/articles_en/openvino_workflow.md +++ b/docs/articles_en/openvino_workflow.md @@ -20,36 +20,53 @@ pytorch_2_0_torch_compile -.. image:: ./_static/images/model_conversion_diagram.svg - :alt: model conversion diagram - OpenVINO offers multiple workflows, depending on the use case and personal or project preferences. -The diagram above is only a rough representation of the available options, but this section will -give you a detailed view of how you can go from preparing your model, through optimizing it, -to executing inference, and deploying your solution. +This section will give you a detailed view of how you can go from preparing your model, +through optimizing it, to executing inference, and deploying your solution. +Once you obtain a model in one of the :doc:`supported model formats `, +you can decide how to proceed: -| :doc:`Model Preparation ` -| Learn how to convert pre-trained models to OpenVINO IR, using different approaches for more convenience or higher performance. +.. tab-set:: + .. tab-item:: Workflow for convenience -| :doc:`Model Optimization and Compression ` -| Find out how to optimize a model to achieve better inference performance, utilizing multiple optimization methods for both in-training compression and post-training quantization. + This approach assumes you run your model directly. + + .. image:: _static/images/ov_workflow_diagram_convenience.svg + :align: center + :alt: OpenVINO workflow diagram for convenience + + .. tab-item:: Workflow for performance (recommended for production) + + This approach assumes you convert your model to OpenVINO IR explicitly, which means the + conversion stage is not part of the final application. + + .. image:: _static/images/ov_workflow_diagram_performance.svg + :align: center + :alt: OpenVINO workflow diagram for performance + +| :doc:`Model Preparation ` +| Learn how to convert pre-trained models to OpenVINO IR. + +| :doc:`Model Optimization and Compression ` +| Find out how to optimize a model to achieve better inference performance, utilizing + multiple optimization methods for both in-training compression and post-training quantization. + | :doc:`Running Inference ` -| See how to run inference with OpenVINO, which is the most basic form of deployment, and the quickest way of running a deep learning model. +| See how to run inference with OpenVINO, which is the most basic form of deployment, + and the quickest way of running a deep learning model. | :doc:`Deployment Option 1. Using OpenVINO Runtime ` | Deploy a model locally, reading the file directly from your application and utilizing resources available to the system. | Deployment on a local system uses the steps described in the section on running inference. - | :doc:`Deployment Option 2. Using Model Server ` | Deploy a model remotely, connecting your application to an inference server and utilizing external resources, with no impact on the app's performance. | Deployment on OpenVINO Model Server is quick and does not require any additional steps described in the section on running inference. - | :doc:`Deployment Option 3. Using torch.compile for PyTorch 2.0 ` | Deploy a PyTorch model using OpenVINO in a PyTorch-native application. diff --git a/docs/articles_en/openvino_workflow/model_preparation.md b/docs/articles_en/openvino_workflow/model_preparation.md index 0dabb7c5cbd2bf..d30121850e94b0 100644 --- a/docs/articles_en/openvino_workflow/model_preparation.md +++ b/docs/articles_en/openvino_workflow/model_preparation.md @@ -10,7 +10,6 @@ :maxdepth: 1 :hidden: - Conversion Parameters Setting Input Shapes Convert from PyTorch @@ -21,26 +20,29 @@ Supported_Model_Formats -To start working with OpenVINO, you need to obtain a model in one of the -:doc:`supported model formats `. The easiest way -to do so is to download a pre-trained network from an online database, such as -`TensorFlow Hub `__, -`Hugging Face `__, or -`Torchvision models `__. +You can obtain a model in one of :doc:`supported formats ` +in many ways. The easiest one is to download it from an online database, +such as `TensorFlow Hub `__, `Hugging Face `__, +and `Torchvision models `__. Now you have two options: -The OpenVINO workflow starts with converting the selected model to its -proprietary format, :doc:`OpenVINO IR ` -(`openvino.Model `__ - -`ov.Model `__). -Although in most cases it can be done automatically, under the hood, explicit -conversion may enable more optimization options and better performance. -It is done in one of two ways: +* Skip model conversion and run inference directly from the source format. Conversion + will still be performed but it will happen automatically and "under the hood." + This option, while convenient, offers lower performance and stability, as well as + fewer optimization options. -* the Python API functions (``openvino.convert_model`` and ``openvino.save_model``) -* the ``ovc`` command line tool. +* Explicitly convert the model to :doc:`OpenVINO IR `. + This approach offers the best possible results and is the recommended one, + especially for for production-ready solutions. Explicit conversion can be done in two ways: -.. note:: + * the Python API functions (``openvino.convert_model`` and ``openvino.save_model``) + * the ``ovc`` command line tool. + + Once saved as :doc:`OpenVINO IR ` (a set of ``.xml`` and ``.bin`` files), + the model may be deployed with maximum performance. Because it is already optimized + for OpenVINO inference, it can be read, compiled, and inferred with no additional delay. +.. note:: + Model conversion API prior to OpenVINO 2023.1 is considered deprecated. Existing and new projects are recommended to transition to the new solutions, keeping in mind that they are not fully backwards compatible @@ -48,13 +50,15 @@ It is done in one of two ways: For more details, see the :doc:`Model Conversion API Transition Guide `. -Convert a Model in Python: ``convert_model`` +Convert a Model with Python: ``convert_model`` ############################################## The Model conversion API in Python uses the ``openvino.convert_model`` function, -turning a given model to the ``openvino.Model`` object. The object may be used -further, compiled and inferred, or saved to a drive as :doc:`OpenVINO IR ` -(``openvino.save_model`` produces a set of ``.xml`` and ``.bin`` files). +turning a given model into the `openvino.Model `__ +object and loading it to memory. Now it can be: saved to a drive with `openvino.save_model`` +or further :doc:`optimized with NNCF ` +prior to saving. + See how to use ``openvino.convert_model`` with models from some of the most popular public repositories: From 64c21fd6f94b05cb59696db9bfc061758f14dde1 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 6 Nov 2023 19:13:53 +0100 Subject: [PATCH 203/275] [MO] Fixed MO fallback unit test. (#20868) * Fixed MO unit test to import paddle conditionally. * Replace generate with pytest.mark.parametrize. --- .../mo/utils/mo_fallback_test_actual.py | 138 +++++++++--------- 1 file changed, 70 insertions(+), 68 deletions(-) diff --git a/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py b/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py index 04b9791297f97b..d0b6a26c5a818b 100644 --- a/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py +++ b/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py @@ -1,22 +1,19 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import unittest -from unittest.mock import patch, Mock - -import openvino -from openvino.tools.mo.convert_impl import prepare_ir -from openvino.tools.mo.utils.error import Error -from openvino.frontend import FrontEndManager, FrontEnd # pylint: disable=no-name-in-module,import-error -from onnx.helper import make_graph, make_model, make_tensor_value_info import argparse import os -import onnx -import paddle -import numpy as np import shutil +from unittest.mock import patch, Mock + +import numpy as np +import onnx import pytest -from generator import generator, generate +from onnx.helper import make_graph, make_model, make_tensor_value_info +from openvino.frontend import FrontEndManager, FrontEnd # pylint: disable=no-name-in-module,import-error + +from openvino.tools.mo.convert_impl import prepare_ir +from openvino.tools.mo.utils.error import Error try: import openvino_telemetry as tm @@ -24,6 +21,12 @@ except ImportError: import openvino.tools.mo.utils.telemetry_stub as tm +try: + import paddle + paddle_imported = True +except ImportError: + paddle_imported = False + def base_args_config(use_legacy_fe:bool=None, use_new_fe:bool=None): args = argparse.Namespace() @@ -71,9 +74,8 @@ def save_paddle_model(name, exe, feedkeys:list, fetchlist:list, target_dir:str): paddle.fluid.io.save_inference_model(model_dir, feedkeys, fetchlist, exe, model_filename=name+".pdmodel", params_filename=name+".pdiparams") -@generator -class TestMoFallback(unittest.TestCase): - def setUp(self): +class TestMoFallback(): + def setup_method(self): tm.Telemetry.__init__ = Mock(return_value=None) tm.Telemetry.send_event = Mock() FrontEnd.add_extension = Mock() @@ -121,35 +123,36 @@ def setUp(self): with open(file, 'w') as f: f.write(content) - self.paddle_dir = "paddle_dir" - paddle.enable_static() - if not os.path.exists(self.paddle_dir): - os.mkdir(self.paddle_dir) - x = np.array([-2, 0, 1]).astype('float32') - node_x = paddle.static.data(name='x', shape=x.shape, dtype='float32') - out = paddle.nn.functional.relu(node_x) + if paddle_imported: + self.paddle_dir = "paddle_dir" + paddle.enable_static() + if not os.path.exists(self.paddle_dir): + os.mkdir(self.paddle_dir) + x = np.array([-2, 0, 1]).astype('float32') + node_x = paddle.static.data(name='x', shape=x.shape, dtype='float32') + out = paddle.nn.functional.relu(node_x) - cpu = paddle.static.cpu_places(1) - exe = paddle.static.Executor(cpu[0]) - exe.run(paddle.static.default_startup_program()) + cpu = paddle.static.cpu_places(1) + exe = paddle.static.Executor(cpu[0]) + exe.run(paddle.static.default_startup_program()) - save_paddle_model("relu", exe, feedkeys=['x'], fetchlist=[out], target_dir=self.paddle_dir) + save_paddle_model("relu", exe, feedkeys=['x'], fetchlist=[out], target_dir=self.paddle_dir) - - def tearDown(self): + def teardown_method(self): for name in self.models.keys(): os.remove(name) for name in self.test_config_files: os.remove(name) - shutil.rmtree(self.paddle_dir) - - - @generate(*[(['dir_to_extension'], None, None, 'mo_legacy', 'extensions'), # fallback - (['dir_to_extension'], None, True, None, None), # exception - (['dir_to_extension'], True, None, 'mo_legacy', None), - ([''], True, None, 'mo_legacy', None), - ([''], None, True, 'onnx_frontend', None), - (None, None, None, 'onnx_frontend', None), + if paddle_imported: + shutil.rmtree(self.paddle_dir) + + @pytest.mark.parametrize("extension, use_legacy, use_new_fe, conversion_method, fallback_reason", [ + (['dir_to_extension'], None, None, 'mo_legacy', 'extensions'), # fallback + (['dir_to_extension'], None, True, None, None), # exception + (['dir_to_extension'], True, None, 'mo_legacy', None), + ([''], True, None, 'mo_legacy', None), + ([''], None, True, 'onnx_frontend', None), + (None, None, None, 'onnx_frontend', None) ]) def test_fallback_if_extension_specified(self, extension, use_legacy, use_new_fe, conversion_method, fallback_reason): with patch('openvino.tools.mo.convert_impl.get_default_frontends') as default_fe: @@ -170,10 +173,10 @@ def test_fallback_if_extension_specified(self, extension, use_legacy, use_new_fe with pytest.raises(Error): # not supported extensions on new path prepare_ir(args) - - @generate(*[(None, None, 'onnx_frontend'), - (True, None, None), # exception - (None, True, 'onnx_frontend'), + @pytest.mark.parametrize("use_legacy, use_new_fe, conversion_method", [ + (None, None, 'onnx_frontend'), + (True, None, None), # exception + (None, True, 'onnx_frontend'), ]) def test_fallback_if_new_extension_specified(self, use_legacy, use_new_fe, conversion_method): with patch('openvino.tools.mo.convert_impl.get_default_frontends') as default_fe: @@ -189,10 +192,10 @@ def test_fallback_if_new_extension_specified(self, use_legacy, use_new_fe, conve with pytest.raises(Error): prepare_ir(args) - - @generate(*[(None, None, 'onnx_frontend'), - (True, None, None), # exception - (None, True, 'onnx_frontend'), + @pytest.mark.parametrize("use_legacy, use_new_fe, conversion_method", [ + (None, None, 'onnx_frontend'), + (True, None, None), # exception + (None, True, 'onnx_frontend') ]) def test_fallback_if_two_new_extension_specified(self, use_legacy, use_new_fe, conversion_method): with patch('openvino.tools.mo.convert_impl.get_default_frontends') as default_fe: @@ -208,14 +211,14 @@ def test_fallback_if_two_new_extension_specified(self, use_legacy, use_new_fe, c with pytest.raises(Error): prepare_ir(args) - - @generate(*[('fake_config.json' , None, None, 'mo_legacy', 'transformations_config'), # fallback - ('test_config.json' , None, None, 'mo_legacy', 'transformations_config'), # fallback - ('fake_config.json' , True, None, 'mo_legacy', None), - (None, None, True, 'onnx_frontend', None), - (None, None, None, 'onnx_frontend', None), - ]) - def test_fallback_if_tranformations_config_specified(self, trans_config, use_legacy, use_new_fe, expected_path, fallback_reason): + @pytest.mark.parametrize("trans_config, use_legacy, use_new_fe, expected_path, fallback_reason", [ + ('fake_config.json', None, None, 'mo_legacy', 'transformations_config'), # fallback + ('test_config.json', None, None, 'mo_legacy', 'transformations_config'), # fallback + ('fake_config.json', True, None, 'mo_legacy', None), + (None, None, True, 'onnx_frontend', None), + (None, None, None, 'onnx_frontend', None)]) + def test_fallback_if_tranformations_config_specified(self, trans_config, use_legacy, use_new_fe, expected_path, + fallback_reason): with patch('openvino.tools.mo.convert_impl.get_default_frontends') as default_fe: default_fe.return_value = get_test_default_frontends() args = base_args_config(use_legacy, use_new_fe) @@ -232,11 +235,11 @@ def test_fallback_if_tranformations_config_specified(self, trans_config, use_leg with pytest.raises(AssertionError): # not called tm.Telemetry.send_event.assert_any_call('mo', 'fallback_reason', fallback_reason) - - @generate(*[(['dir_to_extension'], 'fake_config.json', None, 'mo_legacy', 'extensions, transformations_config'), # fallback - (None, 'fake_config.json', None, 'mo_legacy', 'transformations_config'), # fallback - (['dir_to_extension'], None, None, 'mo_legacy', 'extensions'), # fallback - (None, None, True, 'onnx_frontend', None), + @pytest.mark.parametrize("extension, trans_config, use_new_fe, expected_path, fallback_reason", [ + (['dir_to_extension'], 'fake_config.json', None, 'mo_legacy', 'extensions, transformations_config'), # fallback + (None, 'fake_config.json', None, 'mo_legacy', 'transformations_config'), # fallback + (['dir_to_extension'], None, None, 'mo_legacy', 'extensions'), # fallback + (None, None, True, 'onnx_frontend', None) ]) def test_fallback_if_both_extension_and_trans_config_specified(self, extension, trans_config, use_new_fe, expected_path, fallback_reason): with patch('openvino.tools.mo.convert_impl.get_default_frontends') as default_fe: @@ -255,11 +258,10 @@ def test_fallback_if_both_extension_and_trans_config_specified(self, extension, with pytest.raises(AssertionError): # not called tm.Telemetry.send_event.assert_any_call('mo', 'fallback_reason', fallback_reason) - - @generate(*[('fake_config.json', None, None, 'mo_legacy'), - ('fake_config.json', True, None, 'mo_legacy'), - (None, None, True, 'onnx_frontend'), - ]) + @pytest.mark.parametrize("trans_config, use_legacy, use_new_fe, expected_path", + [('fake_config.json', None, None, 'mo_legacy'), + ('fake_config.json', True, None, 'mo_legacy'), + (None, None, True, 'onnx_frontend')]) def test_fallback_if_legacy_set_as_default(self, trans_config, use_legacy, use_new_fe, expected_path): with patch('openvino.tools.mo.convert_impl.get_default_frontends') as default_fe: default_fe.return_value = {'onnx': 'legacy', 'tf': 'legacy'} @@ -273,10 +275,10 @@ def test_fallback_if_legacy_set_as_default(self, trans_config, use_legacy, use_n with pytest.raises(AssertionError): # not called tm.Telemetry.send_event.assert_any_call('mo', 'fallback_reason') - - @generate(*[(True, None, None, 'paddle_frontend'), - (None, None, None, 'paddle_frontend'), - ]) + @pytest.mark.skipif(not paddle_imported, reason="PaddlePaddle is not installed") + @pytest.mark.parametrize("use_new_fe, use_legacy, extension, expected_path", + [(True, None, None, 'paddle_frontend'), + (None, None, None, 'paddle_frontend')]) def test_no_fallback_if_pdpd(self, use_new_fe, use_legacy, extension, expected_path): args = base_args_config(use_legacy, use_new_fe) args.framework = 'paddle' @@ -289,7 +291,7 @@ def test_no_fallback_if_pdpd(self, use_new_fe, use_legacy, extension, expected_p with pytest.raises(AssertionError): # not called tm.Telemetry.send_event.assert_any_call('mo', 'fallback_reason') - + @pytest.mark.skipif(not paddle_imported, reason="PaddlePaddle is not installed") def test_exception_if_old_extensions_used_for_pdpd(self): args = base_args_config() args.framework = 'paddle' From 5cd9659033ef37d9f61203eb4630ee3add54ed01 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Mon, 6 Nov 2023 20:56:52 +0100 Subject: [PATCH 204/275] [core] Migrate HSwish operator to new API (#20854) * Drop ngraph remains * Use ov::Tensor instaed of ngraph::HostTensor --- src/core/include/openvino/op/hswish.hpp | 6 +- src/core/src/op/hswish.cpp | 84 +++++++++++-------------- 2 files changed, 36 insertions(+), 54 deletions(-) diff --git a/src/core/include/openvino/op/hswish.hpp b/src/core/include/openvino/op/hswish.hpp index 34cff2955b5ab6..fc2130b56d9655 100644 --- a/src/core/include/openvino/op/hswish.hpp +++ b/src/core/include/openvino/op/hswish.hpp @@ -25,12 +25,8 @@ class OPENVINO_API HSwish : public util::UnaryElementwiseArithmetic { /// \param data Input tensor HSwish(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; - std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v4 diff --git a/src/core/src/op/hswish.cpp b/src/core/src/op/hswish.cpp index b509ecb95aabd1..fd2d89896c0460 100644 --- a/src/core/src/op/hswish.cpp +++ b/src/core/src/op/hswish.cpp @@ -2,78 +2,64 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/hswish.hpp" - -#include +#include "openvino/op/hswish.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/hswish.hpp" -using namespace std; -using namespace ngraph; - -op::v4::HSwish::HSwish(const Output& arg) : UnaryElementwiseArithmetic(arg) { +namespace ov { +namespace op { +namespace v4 { +HSwish::HSwish(const Output& arg) : UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -bool op::v4::HSwish::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v4_HSwish_visit_attributes); - return true; -} - -shared_ptr op::v4::HSwish::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr HSwish::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v4_HSwish_clone_with_new_inputs); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -OPENVINO_SUPPRESS_DEPRECATED_START namespace hswish { namespace { -template -inline bool evaluate(const HostTensorPtr& arg, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - - ov::reference::hswish(arg->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_hswish(const HostTensorPtr& arg, const HostTensorPtr& out) { - bool rc = true; - size_t count = shape_size(arg->get_shape()); - out->set_unary(arg); - - switch (arg->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_hswish, bf16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_hswish, f16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_hswish, f32, arg, out, count); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in, Tensor& out, const size_t count) { + ov::reference::hswish(in.data(), out.data(), count); + return true; } - return rc; -} +}; } // namespace } // namespace hswish -bool op::v4::HSwish::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool HSwish::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v4_HSwish_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return hswish::evaluate_hswish(inputs[0], outputs[0]); + OPENVINO_ASSERT(inputs.size() == 1); + OPENVINO_ASSERT(outputs.size() == 1); + + const auto& input_shape = inputs[0].get_shape(); + const auto count = shape_size(input_shape); + outputs[0].set_shape(input_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + count); } -bool op::v4::HSwish::has_evaluate() const { +bool HSwish::has_evaluate() const { OV_OP_SCOPE(v4_HSwish_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::bf16: - case ngraph::element::f16: - case ngraph::element::f32: + case element::bf16: + case element::f16: + case element::f32: return true; default: - break; + return false; } - return false; } +} // namespace v4 +} // namespace op +} // namespace ov From 494a9cf9a9fc6d953df27d04dc0dcedb79cc7b93 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 7 Nov 2023 10:29:45 +0400 Subject: [PATCH 205/275] [TF FE] Refine tests for complex tensors support (#20905) Signed-off-by: Kazantsev, Roman --- tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py | 2 +- tests/model_hub_tests/tf_hub_tests/precommit_models | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py b/tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py index 14586b3bba3805..18d23b593abba1 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_ComplexFFT.py @@ -182,7 +182,7 @@ def create_complex_irfft_net(self, input_shape, fft_length, irfft_op): dict(input_shape=[1, 3, 20, 40], fft_length=[20, 10], irfft_op=tf.raw_ops.IRFFT2D), dict(input_shape=[1, 3, 20, 40], fft_length=[10, 40], irfft_op=tf.raw_ops.IRFFT2D), pytest.param(dict(input_shape=[1, 10, 20, 30, 5], fft_length=[2, 3, 4], irfft_op=tf.raw_ops.IRFFT3D), - marks=pytest.mark.xfail(reason="accuracy-issue-TBD")) + marks=pytest.mark.xfail(reason="accuracy-issue-124452")) ] @pytest.mark.parametrize("params", test_data_basic) diff --git a/tests/model_hub_tests/tf_hub_tests/precommit_models b/tests/model_hub_tests/tf_hub_tests/precommit_models index 3be4384dd1c576..c4d62c7b30af09 100644 --- a/tests/model_hub_tests/tf_hub_tests/precommit_models +++ b/tests/model_hub_tests/tf_hub_tests/precommit_models @@ -10,6 +10,8 @@ imagenet/mobilenet_v1_100_224/classification,https://tfhub.dev/google/imagenet/m magenta/arbitrary-image-stylization-v1-256,https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2?tf-hub-format=compressed small_bert/bert_en_uncased_L-4_H-256_A-4,https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/2?tf-hub-format=compressed movinet/a5/base/kinetics-600/classification,https://tfhub.dev/tensorflow/movinet/a5/base/kinetics-600/classification/3?tf-hub-format=compressed +# model with complex tensors and FFT ops +yamnet,https://tfhub.dev/google/yamnet/1?tf-hub-format=compressed,skip,124451 # secure notebook models unet/industrial/class_1,https://tfhub.dev/nvidia/unet/industrial/class_1/1?tf-hub-format=compressed movenet/singlepose/thunder,https://tfhub.dev/google/movenet/singlepose/thunder/4?tf-hub-format=compressed From a304f03852d923d8ff38675f721aa0626096f6c0 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Tue, 7 Nov 2023 08:26:58 +0100 Subject: [PATCH 206/275] [core] Migrate Softmax operator to new API (#20894) * Drop ngraph remains * Use ov::Tensor instaed of ngraph::HostTensor --- src/core/include/openvino/op/softmax.hpp | 8 +- src/core/src/op/softmax.cpp | 144 +++++++++++------------ 2 files changed, 74 insertions(+), 78 deletions(-) diff --git a/src/core/include/openvino/op/softmax.hpp b/src/core/include/openvino/op/softmax.hpp index 8a43c6dae7bdef..4537c2ed26485a 100644 --- a/src/core/include/openvino/op/softmax.hpp +++ b/src/core/include/openvino/op/softmax.hpp @@ -38,9 +38,7 @@ class OPENVINO_API Softmax : public Op { void set_axis(const size_t axis) { m_axis = axis; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; private: @@ -77,9 +75,7 @@ class OPENVINO_API Softmax : public Op { void set_axis(const int64_t& axis) { m_axis = axis; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; private: diff --git a/src/core/src/op/softmax.cpp b/src/core/src/op/softmax.cpp index 775f88419c4fe9..d2e1326d76c58b 100644 --- a/src/core/src/op/softmax.cpp +++ b/src/core/src/op/softmax.cpp @@ -2,59 +2,46 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/softmax.hpp" +#include "openvino/op/softmax.hpp" #include -#include +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/op/util/op_types.hpp" +#include "openvino/core/attribute_visitor.hpp" #include "openvino/reference/softmax.hpp" +#include "validation_util.hpp" -using namespace std; -using namespace ngraph; - -OPENVINO_SUPPRESS_DEPRECATED_START +namespace ov { +namespace op { +namespace softmax { namespace { -template -inline bool evaluate(const HostTensorPtr& arg, const HostTensorPtr& out, const ov::Shape& shape, const AxisSet& axes) { - ov::reference::softmax(arg->get_data_ptr(), out->get_data_ptr(), shape, axes); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_softmax(const HostTensorPtr& arg, const HostTensorPtr& out, const AxisSet& axes) { - auto shape = out->get_shape(); - bool rc = true; - - switch (arg->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_softmax, bf16, arg, out, shape, axes); - OPENVINO_TYPE_CASE(evaluate_softmax, f16, arg, out, shape, axes); - OPENVINO_TYPE_CASE(evaluate_softmax, f32, arg, out, shape, axes); - OPENVINO_TYPE_CASE(evaluate_softmax, f64, arg, out, shape, axes); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in, Tensor& out, const Shape& shape, const AxisSet& axes) { + ov::reference::softmax(in.data(), out.data(), shape, axes); + return true; } - return rc; -} +}; } // namespace +} // namespace softmax -// *** SOFTMAX OP SET V1 *** - -op::v1::Softmax::Softmax(const Output& arg, const size_t axis) : Op({arg}), m_axis(axis) { +namespace v1 { +Softmax::Softmax(const Output& arg, const size_t axis) : Op({arg}), m_axis(axis) { constructor_validate_and_infer_types(); } -bool ngraph::op::v1::Softmax::visit_attributes(AttributeVisitor& visitor) { +bool Softmax::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_Softmax_visit_attributes); visitor.on_attribute("axis", m_axis); return true; } -void op::v1::Softmax::validate_and_infer_types() { +void Softmax::validate_and_infer_types() { OV_OP_SCOPE(v1_Softmax_validate_and_infer_types); - const ov::PartialShape& input_shape = get_input_partial_shape(0); + const auto& input_shape = get_input_partial_shape(0); if (input_shape.rank().is_static()) NODE_VALIDATION_CHECK(this, m_axis < static_cast(input_shape.rank().get_length()), @@ -67,47 +54,53 @@ void op::v1::Softmax::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), input_shape); } -shared_ptr op::v1::Softmax::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Softmax::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Softmax_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), m_axis); + return std::make_shared(new_args.at(0), m_axis); } -bool op::v1::Softmax::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Softmax::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Softmax_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - outputs[0]->set_unary(inputs[0]); - return evaluate_softmax(inputs[0], outputs[0], AxisSet{m_axis}); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); + + const auto& input_shape = inputs[0].get_shape(); + outputs[0].set_shape(input_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + input_shape, + AxisSet{m_axis}); } -bool op::v1::Softmax::has_evaluate() const { +bool Softmax::has_evaluate() const { OV_OP_SCOPE(v1_Softmax_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::bf16: - case ngraph::element::f16: - case ngraph::element::f32: - case ngraph::element::f64: + case element::bf16: + case element::f16: + case element::f32: + case element::f64: return true; default: - break; + return false; } - return false; } +} // namespace v1 -// *** SOFTMAX OP SET V8 *** -op::v8::Softmax::Softmax(const Output& arg, const int64_t axis) : Op({arg}), m_axis(axis) { +namespace v8 { +Softmax::Softmax(const Output& arg, const int64_t axis) : Op({arg}), m_axis(axis) { constructor_validate_and_infer_types(); } -bool op::v8::Softmax::visit_attributes(AttributeVisitor& visitor) { +bool Softmax::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v8_Softmax_visit_attributes); visitor.on_attribute("axis", m_axis); return true; } -void op::v8::Softmax::validate_and_infer_types() { +void Softmax::validate_and_infer_types() { OV_OP_SCOPE(v8_Softmax_validate_and_infer_types); const auto& input_shape = get_input_partial_shape(0); if (input_shape.rank().is_static()) { @@ -124,41 +117,48 @@ void op::v8::Softmax::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), input_shape); } -shared_ptr op::v8::Softmax::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Softmax::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v8_Softmax_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), m_axis); + return std::make_shared(new_args.at(0), m_axis); } -bool op::v8::Softmax::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Softmax::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v8_Softmax_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - outputs[0]->set_unary(inputs[0]); - auto rank = static_cast(inputs[0]->get_shape().size()); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); + + const auto& input_shape = inputs[0].get_shape(); + const auto rank = static_cast(input_shape.size()); OPENVINO_ASSERT(-rank <= m_axis && m_axis < rank, "Reduction axis (", m_axis, ") is out of bounds (argument shape: ", - inputs[0]->get_shape(), + input_shape, ")."); - OPENVINO_SUPPRESS_DEPRECATED_START - size_t axis = static_cast(ov::normalize_axis(this->description(), m_axis, rank)); - OPENVINO_SUPPRESS_DEPRECATED_END - return evaluate_softmax(inputs[0], outputs[0], AxisSet{axis}); + const auto axis = static_cast(ov::util::normalize(m_axis, rank)); + + outputs[0].set_shape(input_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + input_shape, + AxisSet{axis}); } -bool op::v8::Softmax::has_evaluate() const { +bool Softmax::has_evaluate() const { OV_OP_SCOPE(v8_Softmax_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::bf16: - case ngraph::element::f16: - case ngraph::element::f32: - case ngraph::element::f64: + case element::bf16: + case element::f16: + case element::f32: + case element::f64: return true; default: - break; + return false; } - return false; } +} // namespace v8 +} // namespace op +} // namespace ov From dcdf6750a748d51b61757790d89d2a057530ed3b Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 7 Nov 2023 08:31:04 +0100 Subject: [PATCH 207/275] [core]Migrate Sign operator to new API (#20875) * Migrate Sign operator to new API * Optimize Sign reference implementation * Fix code style --- src/core/include/openvino/op/sign.hpp | 5 +- .../include/openvino/reference/sign.hpp | 26 ++++- src/core/src/op/sign.cpp | 98 ++++++++----------- 3 files changed, 66 insertions(+), 63 deletions(-) diff --git a/src/core/include/openvino/op/sign.hpp b/src/core/include/openvino/op/sign.hpp index 841f54c359da7d..b2f4e1305ab8ae 100644 --- a/src/core/include/openvino/op/sign.hpp +++ b/src/core/include/openvino/op/sign.hpp @@ -22,11 +22,8 @@ class OPENVINO_API Sign : public util::UnaryElementwiseArithmetic { /// \param arg Node that produces the input tensor. Sign(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/sign.hpp b/src/core/reference/include/openvino/reference/sign.hpp index 5666eb5ebf4607..6363a725164dee 100644 --- a/src/core/reference/include/openvino/reference/sign.hpp +++ b/src/core/reference/include/openvino/reference/sign.hpp @@ -4,15 +4,35 @@ #pragma once +#include #include +#include "openvino/reference/utils/type_util.hpp" + namespace ov { namespace reference { +namespace func { +template ::value>::type* = nullptr> +constexpr T sign(const T v) { + return static_cast(static_cast(v)); +} + +template () || std::is_signed::value>::type* = nullptr> +constexpr T sign(const T v) { + return static_cast((T{0} < v) - (v < T{0})); +} +} // namespace func + +/** + * @brief Reference implementation of Sign operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ template void sign(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = (arg[i] < T(0) ? T(-1) : (arg[i] > T(0) ? T(1) : T(0))); - } + std::transform(arg, arg + count, out, func::sign); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/sign.cpp b/src/core/src/op/sign.cpp index 9e22a7f75d4643..f22798bfcdc79f 100644 --- a/src/core/src/op/sign.cpp +++ b/src/core/src/op/sign.cpp @@ -2,81 +2,67 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/sign.hpp" +#include "openvino/op/sign.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/reference/sign.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { -op::Sign::Sign(const Output& arg) : UnaryElementwiseArithmetic(arg) { - constructor_validate_and_infer_types(); -} +namespace sign { +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool ngraph::op::v0::Sign::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Sign_visit_attributes); - return true; + template > + static result_type visit(const Tensor& arg, Tensor& out, const size_t count) { + reference::sign(arg.data(), out.data(), count); + return true; + } +}; +} // namespace sign +namespace v0 { + +Sign::Sign(const Output& arg) : UnaryElementwiseArithmetic(arg) { + constructor_validate_and_infer_types(); } -shared_ptr op::Sign::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Sign::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Sign_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace signop { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::sign(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} - -bool evaluate_sign(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); +bool Sign::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Sign_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_sign, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sign, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sign, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sign, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sign, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_sign, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace signop + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); -bool op::Sign::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Sign_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return signop::evaluate_sign(inputs[0], outputs[0], shape_size(inputs[0]->get_shape())); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool op::Sign::has_evaluate() const { +bool Sign::has_evaluate() const { OV_OP_SCOPE(v0_Sign_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From e976e7b90cf773738ba20cc68f72e4c1bc1206e1 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 7 Nov 2023 08:32:22 +0100 Subject: [PATCH 208/275] [PT FE] Add tests for Speech-Transformer (#20847) * Add tests for Speech-Transformer * Update tests/model_hub_tests/torch_tests/test_speech-transformer.py * Update tests/model_hub_tests/torch_tests/test_speech-transformer.py --- .../torch_tests/test_speech-transformer.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tests/model_hub_tests/torch_tests/test_speech-transformer.py diff --git a/tests/model_hub_tests/torch_tests/test_speech-transformer.py b/tests/model_hub_tests/torch_tests/test_speech-transformer.py new file mode 100644 index 00000000000000..e99de5c39e1c39 --- /dev/null +++ b/tests/model_hub_tests/torch_tests/test_speech-transformer.py @@ -0,0 +1,72 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys +import tempfile +import torch +import pytest +import subprocess + +from models_hub_common.test_convert_model import TestConvertModel +from openvino import convert_model + + +# To make tests reproducible we seed the random generator +torch.manual_seed(0) + + +class TestSpeechTransformerConvertModel(TestConvertModel): + def setup_class(self): + self.repo_dir = tempfile.TemporaryDirectory() + os.system( + f"git clone https://github.com/mvafin/Speech-Transformer.git {self.repo_dir.name}") + subprocess.check_call(["git", "checkout", "071eebb7549b66bae2cb93e3391fe99749389456"], cwd=self.repo_dir.name) + checkpoint_url = "https://github.com/foamliu/Speech-Transformer/releases/download/v1.0/speech-transformer-cn.pt" + subprocess.check_call(["wget", checkpoint_url], cwd=self.repo_dir.name) + + def load_model(self, model_name, model_link): + sys.path.append(self.repo_dir.name) + from transformer.transformer import Transformer + + filename = os.path.join(self.repo_dir.name, 'speech-transformer-cn.pt') + m = Transformer() + m.load_state_dict(torch.load( + filename, map_location=torch.device('cpu'))) + + self.example = (torch.randn(32, 209, 320), + torch.stack(sorted(torch.randint(55, 250, [32]), reverse=True)), + torch.randint(-1, 4232, [32, 20])) + self.input = (torch.randn(32, 209, 320), + torch.stack(sorted(torch.randint(55, 400, [32]), reverse=True)), + torch.randint(-1, 4232, [32, 25])) + return m + + def get_inputs_info(self, model_obj): + return None + + def prepare_inputs(self, inputs_info): + return [i.numpy() for i in self.input] + + def convert_model(self, model_obj): + m = convert_model(model_obj, example_input=self.example) + return m + + def infer_fw_model(self, model_obj, inputs): + fw_outputs = model_obj(*[torch.from_numpy(i) for i in inputs]) + if isinstance(fw_outputs, dict): + for k in fw_outputs.keys(): + fw_outputs[k] = fw_outputs[k].numpy(force=True) + elif isinstance(fw_outputs, (list, tuple)): + fw_outputs = [o.numpy(force=True) for o in fw_outputs] + else: + fw_outputs = [fw_outputs.numpy(force=True)] + return fw_outputs + + def teardown_class(self): + # remove all downloaded files from cache + self.repo_dir.cleanup() + + @pytest.mark.nightly + def test_convert_model(self, ie_device): + self.run("speech-transformer", None, ie_device) From 8f304701994e3e301657033aa8aecabffcaa2220 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 7 Nov 2023 09:12:37 +0100 Subject: [PATCH 209/275] [core]Migrate MatMul operator to new API (#20857) * Migrate MatMul operator to new API * Correct get shapes references --- src/core/include/openvino/op/matmul.hpp | 4 +- src/core/src/op/matmul.cpp | 142 +++++++++++------------- 2 files changed, 68 insertions(+), 78 deletions(-) diff --git a/src/core/include/openvino/op/matmul.hpp b/src/core/include/openvino/op/matmul.hpp index 96aab8a1c4649c..302b3b9b16765c 100644 --- a/src/core/include/openvino/op/matmul.hpp +++ b/src/core/include/openvino/op/matmul.hpp @@ -31,9 +31,7 @@ class OPENVINO_API MatMul : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; bool get_transpose_a() const { diff --git a/src/core/src/op/matmul.cpp b/src/core/src/op/matmul.cpp index d43b787e032232..06fd0a9f33e424 100644 --- a/src/core/src/op/matmul.cpp +++ b/src/core/src/op/matmul.cpp @@ -2,110 +2,99 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/matmul.hpp" - -#include +#include "openvino/op/matmul.hpp" +#include "element_visitor.hpp" #include "itt.hpp" #include "matmul_shape_inference.hpp" -#include "ngraph/attribute_visitor.hpp" #include "openvino/reference/matmul.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace matmul { + +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const Shape& out_shape, + const bool transpose_a, + const bool transpose_b) { + reference::matmul(arg0.data(), + arg1.data(), + out.data(), + shape0, + shape1, + out_shape, + transpose_a, + transpose_b); + return true; + } +}; +} // namespace matmul + +namespace v0 { -op::MatMul::MatMul(const Output& A, const Output& B, const bool& transpose_a, const bool& transpose_b) +MatMul::MatMul(const Output& A, const Output& B, const bool& transpose_a, const bool& transpose_b) : Op(OutputVector{A, B}), m_transpose_a{transpose_a}, m_transpose_b{transpose_b} { constructor_validate_and_infer_types(); } -bool ngraph::op::v0::MatMul::visit_attributes(AttributeVisitor& visitor) { +bool MatMul::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_MatMul_visit_attributes); visitor.on_attribute("transpose_a", m_transpose_a); visitor.on_attribute("transpose_b", m_transpose_b); return true; } -shared_ptr op::MatMul::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr MatMul::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_MatMul_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), m_transpose_a, m_transpose_b); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace matmul { -namespace { -template -bool evaluate(const op::MatMul* op, const HostTensorPtr& arg0, const HostTensorPtr& arg1, const HostTensorPtr& output) { - using T = typename element_type_traits::value_type; - - ov::Shape arg0_shape = arg0->get_shape(); - ov::Shape arg1_shape = arg1->get_shape(); - - std::vector input_shapes = {arg0_shape, arg1_shape}; - std::vector output_shapes = shape_infer(op, input_shapes); - - ov::Shape output_shape = output_shapes[0].to_shape(); - output->set_element_type(arg0->get_element_type()); - output->set_shape(output_shape); - - ov::reference::matmul(arg0->get_data_ptr(), - arg1->get_data_ptr(), - output->get_data_ptr(), - arg0_shape, - arg1_shape, - output_shape, - op->get_transpose_a(), - op->get_transpose_b()); - return true; -} - -bool evaluate_matmul(const op::MatMul* op, - const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& output) { - bool rc = true; - - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_matmul, i32, op, arg0, arg1, output); - OPENVINO_TYPE_CASE(evaluate_matmul, i64, op, arg0, arg1, output); - OPENVINO_TYPE_CASE(evaluate_matmul, u32, op, arg0, arg1, output); - OPENVINO_TYPE_CASE(evaluate_matmul, u64, op, arg0, arg1, output); - OPENVINO_TYPE_CASE(evaluate_matmul, f16, op, arg0, arg1, output); - OPENVINO_TYPE_CASE(evaluate_matmul, f32, op, arg0, arg1, output); - default: - rc = false; - break; - } - return rc; + return std::make_shared(new_args.at(0), new_args.at(1), m_transpose_a, m_transpose_b); } -} // namespace -} // namespace matmul -bool op::MatMul::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool MatMul::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_MatMul_evaluate); - return matmul::evaluate_matmul(this, inputs[0], inputs[1], outputs[0]); + OPENVINO_ASSERT(outputs.size() == 1); + + const auto out_shape = shape_infer(this, ov::util::get_tensors_partial_shapes(inputs)).front().to_shape(); + outputs[0].set_shape(out_shape); + + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + out_shape, + m_transpose_a, + m_transpose_b); } -bool op::MatMul::has_evaluate() const { +bool MatMul::has_evaluate() const { OV_OP_SCOPE(v0_MatMul_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } -void ngraph::op::v0::MatMul::validate_and_infer_types() { +void MatMul::validate_and_infer_types() { OV_OP_SCOPE(v0_MatMul_validate_and_infer_types); element::Type result_et; @@ -117,8 +106,11 @@ void ngraph::op::v0::MatMul::validate_and_infer_types() { get_input_element_type(1), ")."); - const auto &A_shape = get_input_partial_shape(0), B_shape = get_input_partial_shape(1); - std::vector input_shapes = {A_shape, B_shape}; - std::vector output_shapes = shape_infer(this, input_shapes); + const auto& A_shape = get_input_partial_shape(0); + const auto& B_shape = get_input_partial_shape(1); + const auto output_shapes = shape_infer(this, std::vector{A_shape, B_shape}); set_output_type(0, result_et, output_shapes[0]); } +} // namespace v0 +} // namespace op +} // namespace ov From cdd342ea495a4cfef75a9e64cef21adff80602bf Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 7 Nov 2023 09:34:26 +0100 Subject: [PATCH 210/275] [PT FE] Add ALIKED to model tests (#20899) * Add ALIKED to model tests * Update tests/model_hub_tests/torch_tests/test_aliked.py * Update tests/model_hub_tests/torch_tests/test_aliked.py --- .../torch_tests/test_aliked.py | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tests/model_hub_tests/torch_tests/test_aliked.py diff --git a/tests/model_hub_tests/torch_tests/test_aliked.py b/tests/model_hub_tests/torch_tests/test_aliked.py new file mode 100644 index 00000000000000..8641fbae851fa2 --- /dev/null +++ b/tests/model_hub_tests/torch_tests/test_aliked.py @@ -0,0 +1,136 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys +import math +import tempfile +import torch +import pytest +import subprocess +from models_hub_common.test_convert_model import TestConvertModel +from openvino import convert_model, Model, PartialShape, Type +import openvino.runtime.opset12 as ops +from openvino.frontend import ConversionExtension +import numpy as np + + +# To make tests reproducible we seed the random generator +torch.manual_seed(0) + + +def custom_op_loop(context): + map = context.get_input(0) + points = context.get_input(1) + kernel_size = context.get_values_from_const_input(2, None, int) + # kernel_size=2, radius=0.5, pad_left_top=0, pad_right_bottom=1 + # kernel_size=3, radius=1.0, pad_left_top=1, pad_right_bottom=1 + # kernel_size=4, radius=1.5, pad_left_top=1, pad_right_bottom=2 + # kernel_size=5, radius=2.0, pad_left_top=2, pad_right_bottom=2 + radius = (kernel_size - 1.0) / 2.0 + pad_left_top = math.floor(radius) + pad_right_bottom = math.ceil(radius) + + # pad map: Cx(H+2*radius)x(W+2*radius) + map_pad = ops.pad(map, + np.int32([0, pad_left_top, pad_left_top]), + np.int32([0, pad_right_bottom, pad_right_bottom]), + "constant", + 0.) + + # get patches + points_shape = ops.shape_of(points) + batch = ops.gather(points_shape, np.int32([0]), 0) + loop = ops.loop(batch.output(0), ops.constant([True]).output(0)) + points_i = ops.parameter(PartialShape([1, 2]), Type.i64) + points_i_1d = ops.squeeze(points_i, 0) + points_i_rev = ops.gather(points_i_1d, np.int32([1, 0]), 0) + map_body = ops.parameter(PartialShape([-1, -1, -1]), Type.i32) + points_plus_kenel = ops.add(points_i_rev, np.int64(kernel_size)) + patch_i = ops.slice( + map_body, points_i_rev, points_plus_kenel, np.int64([1, 1]), np.int64([1, 2])) + patch_i = ops.unsqueeze(patch_i, 0) + body = Model([ops.constant([True]), patch_i], [points_i, map_body]) + loop.set_function(body) + loop.set_special_body_ports([-1, 0]) + loop.set_sliced_input(points_i, points, 0, 1, 1, -1, 0) + loop.set_invariant_input(map_body, map_pad.output(0)) + res = loop.get_concatenated_slices(patch_i.output(0), 0, 1, 1, -1, 0) + return [res] + + +def read_image(path, idx): + import cv2 + from torchvision.transforms import ToTensor + + img_path = os.path.join(path, f"{idx}.jpg") + img_ref = cv2.imread(img_path) + img_ref = cv2.resize(img_ref, (640,640)) + img_rgb = cv2.cvtColor(img_ref, cv2.COLOR_BGR2RGB) + img_tensor = ToTensor()(img_rgb) + return img_tensor.unsqueeze_(0) + + +class TestAlikedConvertModel(TestConvertModel): + def setup_class(self): + self.repo_dir = tempfile.TemporaryDirectory() + os.system( + f"git clone https://github.com/mvafin/ALIKED.git {self.repo_dir.name}") + subprocess.check_call(["git", "checkout", "6008af43942925eec7e32006814ef41fbd0858d8"], cwd=self.repo_dir.name) + subprocess.check_call([sys.executable, "-m", "pip", "install", + "-r", os.path.join(self.repo_dir.name, "requirements.txt")]) + subprocess.check_call(["sh", "build.sh"], cwd=os.path.join( + self.repo_dir.name, "custom_ops")) + + def load_model(self, model_name, model_link): + sys.path.append(self.repo_dir.name) + from nets.aliked import ALIKED + + m = ALIKED(model_name=model_name, device="cpu") + img_tensor = read_image(os.path.join( + self.repo_dir.name, "assets", "st_pauls_cathedral"), 1) + self.example = (img_tensor,) + img_tensor2 = read_image(os.path.join( + self.repo_dir.name, "assets", "st_pauls_cathedral"), 2) + self.input = (img_tensor2,) + return m + + def get_inputs_info(self, model_obj): + return None + + def prepare_inputs(self, inputs_info): + return [i.numpy() for i in self.input] + + def convert_model(self, model_obj): + m = convert_model(model_obj, + example_input=self.example, + extension=ConversionExtension( + "custom_ops::get_patches_forward", custom_op_loop) + ) + return m + + def infer_fw_model(self, model_obj, inputs): + fw_outputs = model_obj(*[torch.from_numpy(i) for i in inputs]) + if isinstance(fw_outputs, dict): + for k in fw_outputs.keys(): + fw_outputs[k] = fw_outputs[k].numpy(force=True) + elif isinstance(fw_outputs, (list, tuple)): + fw_outputs = [o.numpy(force=True) for o in fw_outputs] + else: + fw_outputs = [fw_outputs.numpy(force=True)] + return fw_outputs + + def teardown_class(self): + # remove all downloaded files from cache + self.repo_dir.cleanup() + + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.parametrize("name", ['aliked-n16rot']) + def test_convert_model_all_models_default(self, name, ie_device): + self.run(name, None, ie_device) + + @pytest.mark.nightly + @pytest.mark.parametrize("name", ['aliked-t16', 'aliked-n16', 'aliked-n32']) + def test_convert_model_all_models(self, name, ie_device): + self.run(name, None, ie_device) From c0381ab08d2fcdc51c621196e935aa02c64a4b50 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 7 Nov 2023 14:05:03 +0400 Subject: [PATCH 211/275] Updated labeler config (#20913) --- .github/labeler.yml | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index 77833eb47cdaf3..d393b2354e1c36 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -48,6 +48,9 @@ - 'scripts/**/*' - '.gitmodules' - '**/setup.py' +- 'conan.lock' +- 'conanfile.txt' +- 'vcpkg.json' - any: ['thirdparty/**/*', '!thirdparty/**/CMakeLists.txt'] @@ -65,7 +68,6 @@ 'category: GPU': - 'src/plugins/intel_gpu/**/*' -- 'src/tests/**/gpu/**/*' - 'thirdparty/ocl/**/*' 'category: HETERO': @@ -74,13 +76,11 @@ 'category: IE Tests': - 'thirdparty/gtest/**/*' - 'src/frontends/tests/frontend/shared/**/*' -- any: ['src/tests/**/*', - '!src/tests/**/gpu/**/*', - '!src/tests/**/inference_engine/**/*'] +- 'src/tests/**/*' 'category: inference': - 'src/inference/**/*' -- 'src/tests/functional/inference_engine/**/*' +- 'src/cmake/**/*' 'category: IR FE': - 'src/frontends/ir/**/*' @@ -91,10 +91,13 @@ 'category: MO': - 'tools/mo/**/*' - 'tools/ovc/**/*' +- 'tests/layer_tests/mo_python_api_tests/**/*' +- 'tests/layer_tests/ovc_python_api_tests/**/*' 'category: ONNX FE': - 'src/frontends/onnx/**/*' - 'thirdparty/onnx/**/*' +- 'tests/layer_tests/onnx_tests/**/*' 'category: packaging': - 'cmake/**/packaging/**/*' @@ -103,6 +106,7 @@ 'category: PDPD FE': - 'src/frontends/paddle/**/*' +- 'tests/layer_tests/py_frontend_tests/test_paddle_frontend.py' 'category: POT': - 'tools/pot/**/*' @@ -119,6 +123,7 @@ - 'thirdparty/gflags/**/*' - 'thirdparty/json/**/*' - 'thirdparty/cnpy/**/*' +- 'tests/samples_tests/smoke_tests/**/*' 'category: TEMPLATE': - 'src/plugins/template/**/*' @@ -127,6 +132,10 @@ - 'src/frontends/tensorflow/**/*' - 'src/frontends/tensorflow_common/**/*' - 'tests/layer_tests/tensorflow_tests/**/*' +- 'tests/layer_tests/tensorflow2_keras_tests/**/*' +- 'tests/layer_tests/jax_tests/**/*' +- any: ['tests/model_hub_tests/**', + '!tests/model_hub_tests/torch_tests/**/*'] 'category: TFL FE': - 'src/frontends/tensorflow_lite/**/*' @@ -137,12 +146,21 @@ - 'src/frontends/pytorch/**/*' - 'tests/layer_tests/pytorch_tests/**/*' - 'src/bindings/python/src/openvino/frontend/pytorch/**/*' +- 'tests/layer_tests/py_frontend_tests/test_torch_decoder.py' +- 'tests/layer_tests/py_frontend_tests/test_torch_frontend.py' +- any: ['tests/model_hub_tests/**', + '!tests/model_hub_tests/tf_hub_tests/**/*'] 'category: tools': - any: ['tools/**', '!tools/pot/**/*', - '!tools/mo/**/*'] + '!tools/mo/**/*', + '!tools/ovc/**/*'] 'category: transformations': - 'src/common/transformations/**/*' - 'src/common/offline_transformations/**/*' + +'category: licensing': +- 'licensing/**/*' +- 'LICENSE' From cb53ee5db74cd7377fbb370608e0d8f11f2d2df7 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 7 Nov 2023 11:29:43 +0100 Subject: [PATCH 212/275] [core]Migrate ReLU operator to new API (#20874) * Migrate ReLU operator to new API * Optimize ReLU reference implementation * Correct define const value in ReLU --- src/core/include/openvino/op/relu.hpp | 5 +- .../include/openvino/reference/relu.hpp | 40 +++++-- src/core/src/op/relu.cpp | 100 +++++++----------- 3 files changed, 76 insertions(+), 69 deletions(-) diff --git a/src/core/include/openvino/op/relu.hpp b/src/core/include/openvino/op/relu.hpp index ed6b94ac869e88..69a6ea6059d189 100644 --- a/src/core/include/openvino/op/relu.hpp +++ b/src/core/include/openvino/op/relu.hpp @@ -25,11 +25,8 @@ class OPENVINO_API Relu : public util::UnaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; - bool visit_attributes(AttributeVisitor& visitor) override; }; } // namespace v0 } // namespace op diff --git a/src/core/reference/include/openvino/reference/relu.hpp b/src/core/reference/include/openvino/reference/relu.hpp index 0aad9b2e25f7ed..d19202d19c1073 100644 --- a/src/core/reference/include/openvino/reference/relu.hpp +++ b/src/core/reference/include/openvino/reference/relu.hpp @@ -4,16 +4,44 @@ #pragma once +#include #include +#include "openvino/reference/copy.hpp" +#include "openvino/reference/utils/type_util.hpp" + namespace ov { namespace reference { -template -void relu(const T* arg, T* out, size_t count) { - T zero = 0; - for (size_t i = 0; i < count; i++) { - out[i] = arg[i] > zero ? arg[i] : zero; - } + +/** + * @brief Reference implementation of ReLU operator (signed values). + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ +template () || std::is_signed::value>::type* = nullptr> +void relu(const T* arg, T* out, const size_t count) { + std::replace_copy_if( + arg, + arg + count, + out, + [](const T v) { + return v < T{0}; + }, + T{0}); +} + +/** + * @brief Reference implementation of ReLU operator (unsigned). + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ +template ::value>::type* = nullptr> +void relu(const T* arg, T* out, const size_t count) { + copy(arg, out, count); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/relu.cpp b/src/core/src/op/relu.cpp index 30395883d9b4c7..ebc8aa46d39053 100644 --- a/src/core/src/op/relu.cpp +++ b/src/core/src/op/relu.cpp @@ -2,84 +2,66 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/relu.hpp" - -#include +#include "openvino/op/relu.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/multiply.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/relu.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace relu { +struct Evaluate : element::NoAction { + using element::NoAction::visit; -op::Relu::Relu(const Output& arg) : UnaryElementwiseArithmetic(arg) { + template > + static result_type visit(const Tensor& arg, Tensor& out, const size_t count) { + reference::relu(arg.data(), out.data(), count); + return true; + } +}; +} // namespace relu +namespace v0 { + +Relu::Relu(const Output& arg) : UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -shared_ptr op::Relu::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Relu::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Relu_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace relu { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::relu(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; + return std::make_shared(new_args.at(0)); } -bool evaluate_relu(const HostTensorPtr& arg0, const HostTensorPtr& out) { - bool rc = true; - size_t count = shape_size(arg0->get_shape()); - out->set_unary(arg0); +bool Relu::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Relu_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_relu, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_relu, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_relu, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_relu, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_relu, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_relu, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace relu + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); -bool op::Relu::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Relu_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return relu::evaluate_relu(inputs[0], outputs[0]); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool op::Relu::has_evaluate() const { +bool Relu::has_evaluate() const { OV_OP_SCOPE(v0_Relu_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; -} - -bool op::Relu::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Relu_visit_attributes); - return true; } +} // namespace v0 +} // namespace op +} // namespace ov From c3948ca7997f88bb6efb3f53057fa32e6c7afa23 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Tue, 7 Nov 2023 11:34:12 +0100 Subject: [PATCH 213/275] [nGraph Transformations] NMS convert precisions - change sequence of checks (#20795) --- .../src/transformations/convert_precision.cpp | 62 +++++++++-------- .../tests/utils/convert_precision.cpp | 69 +++++++++++++++++++ 2 files changed, 103 insertions(+), 28 deletions(-) diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 4fd52934dd415f..abce1a034d7cad 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -608,6 +608,19 @@ bool fuse_type_to_nms5(const std::shared_ptr& node, const precisions_m } bool res = false; + auto type_relaxed = std::dynamic_pointer_cast(node); + if (type_relaxed) { + for (size_t i = 0; i < node->get_output_size(); i++) { + auto it = precisions.find(node->get_output_element_type(i)); + if (it == precisions.end()) { + continue; + } + const auto& to = it->second; + type_relaxed->set_overridden_output_type(to, i); + res = true; + } + return res; + } auto it = precisions.find(node->get_output_element_type(0)); if (it != precisions.end()) { const auto& to = it->second; @@ -620,7 +633,6 @@ bool fuse_type_to_nms5(const std::shared_ptr& node, const precisions_m } } - auto type_relaxed = std::dynamic_pointer_cast(node); ov::element::TypeVector output_types; for (size_t i = 0; i < node->get_output_size(); i++) { it = precisions.find(node->get_output_element_type(i)); @@ -629,22 +641,13 @@ bool fuse_type_to_nms5(const std::shared_ptr& node, const precisions_m continue; } const auto& to = it->second; - if (type_relaxed) { - type_relaxed->set_overridden_output_type(to, i); - res = true; - } output_types.push_back(to); } - if (!type_relaxed) { - auto relaxed_op = std::make_shared>(*nms, - ov::element::TypeVector{}, - output_types); - replace_node(node, relaxed_op); - res = true; - } - - return res; + auto relaxed_op = + std::make_shared>(*nms, ov::element::TypeVector{}, output_types); + replace_node(node, relaxed_op); + return true; } bool fuse_type_to_nms9(const std::shared_ptr& node, const precisions_map& precisions) { @@ -654,6 +657,19 @@ bool fuse_type_to_nms9(const std::shared_ptr& node, const precisions_m } bool res = false; + auto type_relaxed = std::dynamic_pointer_cast(node); + if (type_relaxed) { + for (size_t i = 0; i < node->get_output_size(); i++) { + auto it = precisions.find(node->get_output_element_type(i)); + if (it == precisions.end()) { + continue; + } + const auto& to = it->second; + type_relaxed->set_overridden_output_type(to, i); + res = true; + } + return res; + } auto it = precisions.find(node->get_output_element_type(0)); if (it != precisions.end()) { const auto& to = it->second; @@ -666,7 +682,6 @@ bool fuse_type_to_nms9(const std::shared_ptr& node, const precisions_m } } - auto type_relaxed = std::dynamic_pointer_cast(node); ov::element::TypeVector output_types; for (size_t i = 0; i < node->get_output_size(); i++) { it = precisions.find(node->get_output_element_type(i)); @@ -675,22 +690,13 @@ bool fuse_type_to_nms9(const std::shared_ptr& node, const precisions_m continue; } const auto& to = it->second; - if (type_relaxed) { - type_relaxed->set_overridden_output_type(to, i); - res = true; - } output_types.push_back(to); } - if (!type_relaxed) { - auto relaxed_op = std::make_shared>(*nms, - ov::element::TypeVector{}, - output_types); - replace_node(node, relaxed_op); - res = true; - } - - return res; + auto relaxed_op = + std::make_shared>(*nms, ov::element::TypeVector{}, output_types); + replace_node(node, relaxed_op); + return true; } bool fuse_type_to_nms_rotated(const std::shared_ptr& node, const precisions_map& precisions) { diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 608af4926bc942..012334161cd673 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -19,6 +19,7 @@ #include "openvino/opsets/opset4.hpp" #include "openvino/opsets/opset5.hpp" #include "openvino/opsets/opset8.hpp" +#include "openvino/opsets/opset9.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/visualize_tree.hpp" #include "ov_ops/type_relaxed.hpp" @@ -140,6 +141,74 @@ TEST(TransformationTests, ConvertPrecision_NMS5) { ASSERT_FALSE(has_type(f)); } +TEST(TransformationTests, DoubleConvertPrecision_NMS5) { + std::shared_ptr f; + { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + auto max_output_boxes_per_class = opset5::Constant::create(element::i64, Shape{}, {10}); + auto iou_threshold = opset5::Constant::create(element::f32, Shape{}, {0.75}); + auto score_threshold = opset5::Constant::create(element::f32, Shape{}, {0.7}); + auto nms = std::make_shared(boxes, + scores, + max_output_boxes_per_class, + iou_threshold, + score_threshold, + opset5::NonMaxSuppression::BoxEncodingType::CORNER, + true); + + auto result1 = std::make_shared(nms->output(0)); + auto result2 = std::make_shared(nms->output(1)); + auto result3 = std::make_shared(nms->output(2)); + f = std::make_shared(ResultVector{result1, result2, result3}, ParameterVector{boxes, scores}); + } + + pass::Manager manager; + static const precisions_map precisions1 = {{element::f32, element::f16}}; + static const precisions_map precisions2 = {{element::i64, element::i32}}; + manager.register_pass(); + manager.register_pass(precisions1); + manager.register_pass(precisions2); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_FALSE(has_type(f)); +} + +TEST(TransformationTests, DoubleConvertPrecision_NMS9) { + std::shared_ptr f; + { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + auto max_output_boxes_per_class = opset9::Constant::create(element::i64, Shape{}, {10}); + auto iou_threshold = opset9::Constant::create(element::f32, Shape{}, {0.75}); + auto score_threshold = opset9::Constant::create(element::f32, Shape{}, {0.7}); + auto nms = std::make_shared(boxes, + scores, + max_output_boxes_per_class, + iou_threshold, + score_threshold, + opset9::NonMaxSuppression::BoxEncodingType::CORNER, + true); + + auto result1 = std::make_shared(nms->output(0)); + auto result2 = std::make_shared(nms->output(1)); + auto result3 = std::make_shared(nms->output(2)); + f = std::make_shared(ResultVector{result1, result2, result3}, ParameterVector{boxes, scores}); + } + + pass::Manager manager; + static const precisions_map precisions1 = {{element::f32, element::f16}}; + static const precisions_map precisions2 = {{element::i64, element::i32}}; + manager.register_pass(); + manager.register_pass(precisions1); + manager.register_pass(precisions2); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_FALSE(has_type(f)); + ASSERT_FALSE(has_type(f)); +} + TEST(TransformationTests, ConvertPrecision_MatrixNms) { std::shared_ptr f; { From 681331d3d72ded65d16c0cf6e4743aa397b4f754 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Tue, 7 Nov 2023 11:37:37 +0100 Subject: [PATCH 214/275] [CPU] Increase RandomUniform test mean and variance thresholds --- .../functional/single_layer_tests/classes/random_uniform.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp index 8e071439bc8577..a7d5dcecc42187 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp @@ -44,8 +44,8 @@ class RandomUniformLayerTestCPU : public testing::WithParamInterface Date: Tue, 7 Nov 2023 11:42:46 +0100 Subject: [PATCH 215/275] [PyOV] ngraph linter check update (#20870) * [PyOV] ngraph linter check update * Update src/bindings/python/requirements_test.txt Co-authored-by: Roman Kazantsev --------- Co-authored-by: Roman Kazantsev --- src/bindings/python/requirements_test.txt | 2 +- src/bindings/python/setup.cfg | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt index 539feaa2f12efa..62320649b6acb9 100644 --- a/src/bindings/python/requirements_test.txt +++ b/src/bindings/python/requirements_test.txt @@ -21,7 +21,7 @@ flake8-rst-docstrings pygments flake8-string-format flake8-variables-names -flake8_builtins<2.2.0 # Issue 124276 +flake8_builtins<2.3.0 flake8_coding flake8_commas flake8_pep3101 diff --git a/src/bindings/python/setup.cfg b/src/bindings/python/setup.cfg index b9b15ef0ca1214..5ea6fe46cd57f6 100644 --- a/src/bindings/python/setup.cfg +++ b/src/bindings/python/setup.cfg @@ -35,6 +35,7 @@ deps = -rrequirements.txt # ignore: # A001 variable "var_name" is shadowing a python builtin # A002 argument "..." is shadowing a python builtin +# A004 import statement "..."" is shadowing a Python builtin # CCE001 ClassName.method1 should be after ClassName.method2 # D100 - Missing docstring in public module # D101 - Missing docstring in public class @@ -57,7 +58,7 @@ deps = -rrequirements.txt # VNE003 - variable names that shadow builtins are not allowed # F401 - module imported but unused -ignore=A001,A002,CCE001,D100,D101,D102,D103,D104,D105,D107,D412,E402,F401,N803,RST301,TAE002,T001,W503,PT007,PT012 +ignore=A001,A002,A004,CCE001,D100,D101,D102,D103,D104,D105,D107,D412,E402,F401,N803,RST301,TAE002,T001,W503,PT007,PT012 inline-quotes = double filename = *.py max-line-length = 160 From 368e6bfb8a3f3533fcb32cba8ba9e4d00b0190b8 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 7 Nov 2023 11:57:29 +0100 Subject: [PATCH 216/275] Fix constant folding in MulMulMulFusion (#20803) * Fix constant folding in MulMulMulFusion by add f64 precision in Multiply to perform evaluate for const folding * Do not transform if input has not supported type --- .../lin_op_sequence_fusion.cpp | 9 +++- .../lin_op_sequence_fusion_test.cpp | 42 +++++++++++++++++++ src/core/src/op/multiply.cpp | 2 + .../functional/op_reference/multiply.cpp | 1 + 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp index eaaa2a52312993..6c45867b0075b5 100644 --- a/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp @@ -17,6 +17,13 @@ using namespace ov; +namespace { +const auto is_eltwise_supported_type = [](const Output& output) -> bool { + const auto is_single_output = pass::pattern::consumers_count(1); + return is_single_output(output) && output.get_node()->has_evaluate(); +}; +} + ov::pass::AddMultiplyFusion::AddMultiplyFusion() { MATCHER_SCOPE(AddMultiplyFusion); // Create Add->Multiply pattern where Add has exactly one consumer @@ -105,7 +112,7 @@ ov::pass::MultiplyMultiplyFusion::MultiplyMultiplyFusion() { auto m_data = pass::pattern::any_input(); auto m_mul1_constant = ov::pass::pattern::wrap_type(); auto m_mul1 = - ov::pass::pattern::wrap_type({m_data, m_mul1_constant}, pattern::consumers_count(1)); + ov::pass::pattern::wrap_type({m_data, m_mul1_constant}, is_eltwise_supported_type); auto m_mul2_constant = ov::pass::pattern::wrap_type(); auto m_mul2 = ov::pass::pattern::wrap_type({m_mul1, m_mul2_constant}); diff --git a/src/common/transformations/tests/common_optimizations/lin_op_sequence_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/lin_op_sequence_fusion_test.cpp index 71c16dea1242a3..cfef2d2b2acf03 100644 --- a/src/common/transformations/tests/common_optimizations/lin_op_sequence_fusion_test.cpp +++ b/src/common/transformations/tests/common_optimizations/lin_op_sequence_fusion_test.cpp @@ -79,6 +79,48 @@ TEST_F(TransformationTestsF, MulMulMulFusion) { } } +TEST_F(TransformationTestsF, MulMulMulFusion_f64) { + { + auto input = std::make_shared(element::f64, Shape{1, 128, 3072}); + auto mul1_const = opset3::Constant::create(element::f64, Shape{128, 1}, {2}); + auto mul2_const = opset3::Constant::create(element::f64, Shape{128, 1}, {3}); + auto mul3_const = opset3::Constant::create(element::f64, Shape{1}, {3}); + + auto mul1 = std::make_shared(input, mul1_const); + auto mul2 = std::make_shared(mul1, mul2_const); + auto mul3 = std::make_shared(mul2, mul3_const); + + model = std::make_shared(NodeVector{mul2}, ParameterVector{input}); + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f64, Shape{1, 128, 3072}); + auto mul1_const = opset3::Constant::create(element::f64, Shape{128, 1}, {12}); + + auto mul1 = std::make_shared(input, mul1_const); + + model_ref = std::make_shared(NodeVector{mul1}, ParameterVector{input}); + } +} + +TEST_F(TransformationTestsF, MulMulMulFusion_not_supported_type) { + constexpr auto et = element::u8; + { + auto input = std::make_shared(et, Shape{1, 128, 3072}); + auto mul1_const = opset3::Constant::create(et, Shape{128, 1}, {2}); + auto mul2_const = opset3::Constant::create(et, Shape{128, 1}, {3}); + auto mul3_const = opset3::Constant::create(et, Shape{1}, {3}); + + auto mul1 = std::make_shared(input, mul1_const); + auto mul2 = std::make_shared(mul1, mul2_const); + auto mul3 = std::make_shared(mul2, mul3_const); + + model = std::make_shared(NodeVector{mul2}, ParameterVector{input}); + manager.register_pass(); + } +} + TEST_F(TransformationTestsF, AddAddAddFusion) { { auto input = std::make_shared(element::f32, Shape{1, 128, 3072}); diff --git a/src/core/src/op/multiply.cpp b/src/core/src/op/multiply.cpp index 04ccc8d05e349d..b30c2adaa7d6c5 100644 --- a/src/core/src/op/multiply.cpp +++ b/src/core/src/op/multiply.cpp @@ -41,6 +41,7 @@ bool evaluate_multiply(const HostTensorPtr& arg0, OPENVINO_TYPE_CASE(evaluate_multiply, u64, arg0, arg1, out, broadcast_spec); OPENVINO_TYPE_CASE(evaluate_multiply, f16, arg0, arg1, out, broadcast_spec); OPENVINO_TYPE_CASE(evaluate_multiply, f32, arg0, arg1, out, broadcast_spec); + OPENVINO_TYPE_CASE(evaluate_multiply, f64, arg0, arg1, out, broadcast_spec); OPENVINO_TYPE_CASE(evaluate_multiply, bf16, arg0, arg1, out, broadcast_spec); OPENVINO_TYPE_CASE(evaluate_multiply, u8, arg0, arg1, out, broadcast_spec); OPENVINO_TYPE_CASE(evaluate_multiply, i16, arg0, arg1, out, broadcast_spec); @@ -80,6 +81,7 @@ bool op::v1::Multiply::has_evaluate() const { case ngraph::element::u64: case ngraph::element::f16: case ngraph::element::f32: + case ngraph::element::f64: case ngraph::element::bf16: return true; default: diff --git a/src/plugins/template/tests/functional/op_reference/multiply.cpp b/src/plugins/template/tests/functional/op_reference/multiply.cpp index bd3b27f500aeac..726917eac9c66b 100644 --- a/src/plugins/template/tests/functional/op_reference/multiply.cpp +++ b/src/plugins/template/tests/functional/op_reference/multiply.cpp @@ -117,6 +117,7 @@ std::vector generateParamsForMultiplyFloat() { std::vector generateCombinedParamsForMultiply() { const std::vector> allTypeParams{generateParamsForMultiply(), + generateParamsForMultiply(), generateParamsForMultiply(), generateParamsForMultiply(), generateParamsForMultiply(), From f17f17acc7d73014adecc0027e27d469520841f9 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Tue, 7 Nov 2023 12:16:32 +0100 Subject: [PATCH 217/275] Use custom labeler to label changes not matching any pattern (#20888) Needed for Smart CI (https://github.com/openvinotoolkit/openvino/pull/19825) --- .github/workflows/labeler.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index ce50c74b18392f..fcb8fb3184271b 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -9,8 +9,10 @@ jobs: pull-requests: write runs-on: ubuntu-latest steps: - - uses: actions/labeler@v4 + - uses: akladiev/labeler@v4.3.1 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" configuration-path: '.github/labeler.yml' sync-labels: 'true' + dot: 'true' + non-matching-label: 'no-match-files' From e8b6e1742906c60741792dfa5b75008033029192 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Tue, 7 Nov 2023 12:19:34 +0100 Subject: [PATCH 218/275] [core] Migrate Softplus operator to new API (#20900) * Drop ngraph remains * Use ov::Tensor instaed of ngraph::HostTensor --- src/core/include/openvino/op/softplus.hpp | 5 +- src/core/src/op/softplus.cpp | 85 ++++++++++------------- 2 files changed, 37 insertions(+), 53 deletions(-) diff --git a/src/core/include/openvino/op/softplus.hpp b/src/core/include/openvino/op/softplus.hpp index aaff04caa53471..b8b83fb8c528ff 100644 --- a/src/core/include/openvino/op/softplus.hpp +++ b/src/core/include/openvino/op/softplus.hpp @@ -22,14 +22,11 @@ class OPENVINO_API SoftPlus : public util::UnaryElementwiseArithmetic { /// /// \param data Input tensor SoftPlus(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v4 diff --git a/src/core/src/op/softplus.cpp b/src/core/src/op/softplus.cpp index 43ce8d9720b67c..a5896c00795665 100644 --- a/src/core/src/op/softplus.cpp +++ b/src/core/src/op/softplus.cpp @@ -2,28 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/softplus.hpp" - -#include +#include "openvino/op/softplus.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/softplus.hpp" -using namespace std; -using namespace ngraph; - -op::v4::SoftPlus::SoftPlus(const Output& arg) : util::UnaryElementwiseArithmetic(arg) { +namespace ov { +namespace op { +namespace v4 { +SoftPlus::SoftPlus(const Output& arg) : util::UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -bool op::v4::SoftPlus::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v4_SoftPlus_visit_attributes); - return true; -} - -void op::v4::SoftPlus::validate_and_infer_types() { +void SoftPlus::validate_and_infer_types() { OV_OP_SCOPE(v4_SoftPlus_validate_and_infer_types); const element::Type& input_et = get_input_element_type(0); @@ -36,57 +28,52 @@ void op::v4::SoftPlus::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); } -shared_ptr op::v4::SoftPlus::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr SoftPlus::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v4_SoftPlus_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -OPENVINO_SUPPRESS_DEPRECATED_START namespace softplus { namespace { -template -inline bool evaluate(const HostTensorPtr& arg, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::softplus(arg->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; -bool evaluate_softplus(const HostTensorPtr& arg, const HostTensorPtr& out) { - bool rc = true; - out->set_unary(arg); - size_t count = shape_size(arg->get_shape()); - - switch (arg->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_softplus, bf16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_softplus, f16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_softplus, f32, arg, out, count); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in, Tensor& out, const size_t count) { + ov::reference::softplus(in.data(), out.data(), count); + return true; } - return rc; -} +}; } // namespace } // namespace softplus -bool op::v4::SoftPlus::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool SoftPlus::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v4_SoftPlus_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return softplus::evaluate_softplus(inputs[0], outputs[0]); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); + + const auto& input_shape = inputs[0].get_shape(); + const auto count = shape_size(input_shape); + outputs[0].set_shape(input_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + count); } -bool op::v4::SoftPlus::has_evaluate() const { +bool SoftPlus::has_evaluate() const { OV_OP_SCOPE(v4_SoftPlus_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::bf16: - case ngraph::element::f16: - case ngraph::element::f32: + case element::bf16: + case element::f16: + case element::f32: return true; default: - break; + return false; } - return false; } +} // namespace v4 +} // namespace op +} // namespace ov From e82283cf85115817ace1cfcc161066ec8bd727f1 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 7 Nov 2023 13:05:39 +0100 Subject: [PATCH 219/275] [core]Migrate Mish operator to new API (#20892) * Migrate Mish operator to new API * Remove `visit_attributes` is same as base class * Refactor Mish reference implementation * Add cast as function is generic -mish calculation is floating-point but return type can be integral. --- src/core/include/openvino/op/mish.hpp | 6 +- .../include/openvino/reference/mish.hpp | 16 +++- src/core/src/op/mish.cpp | 96 ++++++++----------- 3 files changed, 55 insertions(+), 63 deletions(-) diff --git a/src/core/include/openvino/op/mish.hpp b/src/core/include/openvino/op/mish.hpp index 455bd713166f95..d44484cd947e98 100644 --- a/src/core/include/openvino/op/mish.hpp +++ b/src/core/include/openvino/op/mish.hpp @@ -22,14 +22,10 @@ class OPENVINO_API Mish : public util::UnaryElementwiseArithmetic { /// /// \param data Input tensor Mish(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v4 diff --git a/src/core/reference/include/openvino/reference/mish.hpp b/src/core/reference/include/openvino/reference/mish.hpp index 023bf719a2cff7..d2707cb8652602 100644 --- a/src/core/reference/include/openvino/reference/mish.hpp +++ b/src/core/reference/include/openvino/reference/mish.hpp @@ -9,11 +9,19 @@ namespace ov { namespace reference { + +/** + * @brief Reference implementation of Mish operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ template -void mish(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(arg[i] * std::tanh(std::log((std::exp(arg[i]) + 1.0)))); - } +void mish(const T* arg, T* out, const size_t count) { + std::transform(arg, arg + count, out, [](const T v) { + return static_cast(v * std::tanh(std::log(std::exp(v) + T{1}))); + }); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/mish.cpp b/src/core/src/op/mish.cpp index 0fc374d3f8a8f5..606ff6239f59a4 100644 --- a/src/core/src/op/mish.cpp +++ b/src/core/src/op/mish.cpp @@ -2,90 +2,78 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/mish.hpp" - -#include +#include "openvino/op/mish.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/mish.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace mish { -op::v4::Mish::Mish(const Output& arg) : util::UnaryElementwiseArithmetic(arg) { - constructor_validate_and_infer_types(); -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg, Tensor& out, const size_t count) { + reference::mish(arg.data(), out.data(), count); + return true; + } +}; +} // namespace mish -bool op::v4::Mish::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v4_Mish_visit_attributes); - return true; +namespace v4 { + +Mish::Mish(const Output& arg) : util::UnaryElementwiseArithmetic(arg) { + constructor_validate_and_infer_types(); } -void op::v4::Mish::validate_and_infer_types() { +void Mish::validate_and_infer_types() { OV_OP_SCOPE(v4_Mish_validate_and_infer_types); NODE_VALIDATION_CHECK(this, get_input_size() == 1, "Only accepts one argument. Got: ", get_input_size()); - element::Type data_batch_et = get_input_element_type(0); + const auto& data_batch_et = get_input_element_type(0); NODE_VALIDATION_CHECK(this, data_batch_et.is_real(), "Element must be of floating point type, Got: ", data_batch_et); - set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); + set_output_type(0, data_batch_et, get_input_partial_shape(0)); } -shared_ptr op::v4::Mish::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Mish::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v4_Mish_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace mish { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::mish(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +bool Mish::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v4_Mish_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); -bool evaluate_mish(const HostTensorPtr& arg0, const HostTensorPtr& out) { - bool rc = true; - size_t count = shape_size(arg0->get_shape()); - out->set_unary(arg0); + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_mish, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_mish, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace mish - -bool op::v4::Mish::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v4_Mish_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return mish::evaluate_mish(inputs[0], outputs[0]); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool op::v4::Mish::has_evaluate() const { +bool Mish::has_evaluate() const { OV_OP_SCOPE(v4_Mish_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: return true; default: - break; + return false; } - return false; } +} // namespace v4 +} // namespace op +} // namespace ov From 95aef4bf51220e3bd74f0978bd1d32791ab4669b Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 7 Nov 2023 13:43:13 +0100 Subject: [PATCH 220/275] [core]Migrate Exp operator to new API (#20893) * Migrate Exp operator to new API * Add missing includes --- src/core/include/openvino/op/exp.hpp | 5 +- .../include/openvino/reference/exp.hpp | 17 ++- src/core/src/op/exp.cpp | 101 ++++++++---------- 3 files changed, 57 insertions(+), 66 deletions(-) diff --git a/src/core/include/openvino/op/exp.hpp b/src/core/include/openvino/op/exp.hpp index 408c008a8e5b53..90ee8ff9f69ffb 100644 --- a/src/core/include/openvino/op/exp.hpp +++ b/src/core/include/openvino/op/exp.hpp @@ -22,12 +22,9 @@ class OPENVINO_API Exp : public util::UnaryElementwiseArithmetic { /// \param arg Node that produces the input tensor. Exp(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/exp.hpp b/src/core/reference/include/openvino/reference/exp.hpp index 1f7bf80e2c3e67..dde6f812461ccd 100644 --- a/src/core/reference/include/openvino/reference/exp.hpp +++ b/src/core/reference/include/openvino/reference/exp.hpp @@ -4,16 +4,25 @@ #pragma once +#include #include #include namespace ov { namespace reference { -template + +/** + * @brief Reference implementation of Exp operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ +template void exp(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(std::exp(arg[i])); - } + std::transform(arg, arg + count, out, [](const T v) { + return static_cast(std::exp(v)); + }); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/exp.cpp b/src/core/src/op/exp.cpp index 14131f07c75b3a..ecd4d37913306b 100644 --- a/src/core/src/op/exp.cpp +++ b/src/core/src/op/exp.cpp @@ -2,83 +2,68 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/exp.hpp" - -#include +#include "openvino/op/exp.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/exp.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace exp { -op::Exp::Exp(const Output& arg) : UnaryElementwiseArithmetic(arg) { - constructor_validate_and_infer_types(); -} +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg, Tensor& out, const size_t count) { + reference::exp(arg.data(), out.data(), count); + return true; + } +}; +} // namespace exp -bool ngraph::op::v0::Exp::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v0_Exp_visit_attributes); - return true; +namespace v0 { + +Exp::Exp(const Output& arg) : UnaryElementwiseArithmetic(arg) { + constructor_validate_and_infer_types(); } -shared_ptr op::Exp::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Exp::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Exp_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace expop { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::exp(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; -} +bool Exp::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Exp_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); -bool evaluate_exp(const HostTensorPtr& arg0, const HostTensorPtr& out) { - bool rc = true; - size_t count = shape_size(arg0->get_shape()); - out->set_unary(arg0); + const auto& in_shape = inputs[0].get_shape(); + outputs[0].set_shape(in_shape); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_exp, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_exp, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_exp, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_exp, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_exp, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_exp, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace expop - -bool op::Exp::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Exp_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return expop::evaluate_exp(inputs[0], outputs[0]); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(in_shape)); } -bool op::Exp::has_evaluate() const { +bool Exp::has_evaluate() const { OV_OP_SCOPE(v0_Exp_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From e3d7dffa83467c10cc418868b857d5fab9ac8827 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Tue, 7 Nov 2023 17:01:39 +0400 Subject: [PATCH 221/275] Remove legacy API from FEs (except ONNX) (#20849) * Remove `ngraph` from PT FE and FE tests utils * Remove `ngraph` from Paddle FE * Remove `InferenceEngine` from some ONNX FE test * Port `generate_embedding.py` to API2.0 * CLangFormat * Fix comments --- .../onnx/tests/model_support_tests.cpp | 25 +++++------ src/frontends/paddle/src/frontend.cpp | 5 ++- .../src/internal/op/conditional_block.cpp | 4 +- .../src/internal/op/tensorarray_write.cpp | 4 +- .../paddle/src/internal/op/while.cpp | 4 -- .../internal/pass/transform_fakequantize.cpp | 42 ++++++++----------- .../paddle/src/internal/pass/transform_if.cpp | 18 +++----- .../internal/pass/transform_tensorarray.cpp | 28 ++++--------- .../src/internal/pass/transform_while.cpp | 20 +++------ src/frontends/paddle/src/op/reduce_ops.hpp | 2 +- src/frontends/paddle/src/op/softshrink.cpp | 16 ++++--- .../gen_scripts/generate_embedding.py | 35 +++++++--------- .../softmax_reshape_elimination.cpp | 3 +- .../shared/gtest_main_manifest/main.cpp | 4 +- 14 files changed, 81 insertions(+), 129 deletions(-) diff --git a/src/frontends/onnx/tests/model_support_tests.cpp b/src/frontends/onnx/tests/model_support_tests.cpp index 4e484860062fd3..2072720752c8c4 100644 --- a/src/frontends/onnx/tests/model_support_tests.cpp +++ b/src/frontends/onnx/tests/model_support_tests.cpp @@ -5,10 +5,9 @@ #include #include -#include #include "common_test_utils/file_utils.hpp" -#include "ie_common.h" +#include "openvino/runtime/core.hpp" namespace { std::string model_path(const char* model) { @@ -22,51 +21,47 @@ std::string model_path(const char* model) { TEST(ONNXReader_ModelSupported, basic_model) { // this model is a basic ONNX model taken from ngraph's unit test (add_abc.onnx) // it contains the minimum number of fields required to accept this file as a valid model - EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/basic.onnx"))); + EXPECT_NO_THROW(ov::Core{}.read_model(model_path("supported/basic.onnx"))); } TEST(ONNXReader_ModelSupported, basic_reverse_fields_order) { // this model contains the same fields as basic.onnx but serialized in reverse order - EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/basic_reverse_fields_order.onnx"))); + EXPECT_NO_THROW(ov::Core{}.read_model(model_path("supported/basic_reverse_fields_order.onnx"))); } TEST(ONNXReader_ModelSupported, more_fields) { // this model contains some optional fields (producer_name and doc_string) but 5 fields in total - EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/more_fields.onnx"))); + EXPECT_NO_THROW(ov::Core{}.read_model(model_path("supported/more_fields.onnx"))); } TEST(ONNXReader_ModelSupported, varint_on_two_bytes) { // the docstring's payload length is encoded as varint using 2 bytes which should be parsed correctly - EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/varint_on_two_bytes.onnx"))); + EXPECT_NO_THROW(ov::Core{}.read_model(model_path("supported/varint_on_two_bytes.onnx"))); } TEST(ONNXReader_ModelSupported, scrambled_keys) { // same as the prototxt_basic but with a different order of keys - EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/scrambled_keys.onnx"))); + EXPECT_NO_THROW(ov::Core{}.read_model(model_path("supported/scrambled_keys.onnx"))); } TEST(ONNXReader_ModelUnsupported, no_graph_field) { // this model contains only 2 fields (it doesn't contain a graph in particular) - EXPECT_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("unsupported/no_graph_field.onnx")), - InferenceEngine::NetworkNotRead); + EXPECT_THROW(ov::Core{}.read_model(model_path("unsupported/no_graph_field.onnx")), ov::Exception); } TEST(ONNXReader_ModelUnsupported, incorrect_onnx_field) { // in this model the second field's key is F8 (field number 31) which is doesn't exist in ONNX // this test will have to be changed if the number of fields in onnx.proto // (ModelProto message definition) ever reaches 31 or more - EXPECT_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("unsupported/incorrect_onnx_field.onnx")), - InferenceEngine::NetworkNotRead); + EXPECT_THROW(ov::Core{}.read_model(model_path("unsupported/incorrect_onnx_field.onnx")), ov::Exception); } TEST(ONNXReader_ModelUnsupported, unknown_wire_type) { // in this model the graph key contains wire type 7 encoded in it - this value is incorrect - EXPECT_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("unsupported/unknown_wire_type.onnx")), - InferenceEngine::NetworkNotRead); + EXPECT_THROW(ov::Core{}.read_model(model_path("unsupported/unknown_wire_type.onnx")), ov::Exception); } TEST(ONNXReader_ModelUnsupported, duplicate_fields) { // the model contains the IR_VERSION field twice - this is not correct - EXPECT_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("unsupported/duplicate_onnx_fields.onnx")), - std::exception); + EXPECT_THROW(ov::Core{}.read_model(model_path("unsupported/duplicate_onnx_fields.onnx")), ov::Exception); } diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index 8caf37968db985..66178022dfd945 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -31,6 +31,7 @@ #include "openvino/core/so_extension.hpp" #include "openvino/frontend/extension/conversion.hpp" #include "openvino/frontend/paddle/node_context.hpp" +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/util/common_util.hpp" #include "paddle_fw_node.hpp" #include "paddle_utils.hpp" @@ -139,8 +140,8 @@ OPENVINO_SUPPRESS_DEPRECATED_START std::istream* variant_to_stream_ptr(const ov::Any& variant, std::fstream& fs, std::stringstream& ss) { if (variant.is()) { return variant.as(); - } else if (variant.is>()) { - auto& aligned_weights_buffer = variant.as>(); + } else if (variant.is>()) { + auto& aligned_weights_buffer = variant.as>(); ss.write(aligned_weights_buffer->get_ptr(), aligned_weights_buffer->size()); FRONT_END_INITIALIZATION_CHECK(ss && ss.good(), "Cannot open ov::tensor."); return &ss; diff --git a/src/frontends/paddle/src/internal/op/conditional_block.cpp b/src/frontends/paddle/src/internal/op/conditional_block.cpp index 96d475ba05799a..4173e45f56d384 100644 --- a/src/frontends/paddle/src/internal/op/conditional_block.cpp +++ b/src/frontends/paddle/src/internal/op/conditional_block.cpp @@ -5,9 +5,9 @@ #include "internal/op/conditional_block.hpp" #include -#include -#include "ngraph/op/constant.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; diff --git a/src/frontends/paddle/src/internal/op/tensorarray_write.cpp b/src/frontends/paddle/src/internal/op/tensorarray_write.cpp index 62949a2e738767..78d9315affdf13 100644 --- a/src/frontends/paddle/src/internal/op/tensorarray_write.cpp +++ b/src/frontends/paddle/src/internal/op/tensorarray_write.cpp @@ -5,9 +5,9 @@ #include "internal/op/tensorarray_write.hpp" #include -#include -#include "ngraph/op/constant.hpp" +#include "openvino/core/validation_util.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; diff --git a/src/frontends/paddle/src/internal/op/while.cpp b/src/frontends/paddle/src/internal/op/while.cpp index 6d06f4fe2ec598..309b8a8ec14926 100644 --- a/src/frontends/paddle/src/internal/op/while.cpp +++ b/src/frontends/paddle/src/internal/op/while.cpp @@ -5,10 +5,6 @@ #include "internal/op/while.hpp" #include -#include - -#include "ngraph/op/constant.hpp" -#include "openvino/op/util/precision_sensitive_attribute.hpp" using namespace std; using namespace ov; diff --git a/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp b/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp index 3aa363a06e43f4..a34fe5d480e080 100644 --- a/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp +++ b/src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp @@ -4,15 +4,9 @@ #include "internal/pass/transform_fakequantize.hpp" -#include -#include -#include -#include -#include -#include - #include "default_opset.hpp" -#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" using namespace ov::frontend::paddle::op::default_opset; @@ -43,24 +37,24 @@ using namespace ov::frontend::paddle::op; Multiply */ ov::frontend::paddle::pass::TransformFakeQuantize::TransformFakeQuantize() { - const auto input_label = ngraph::pattern::any_input(); - const auto q_zp_label = ngraph::pattern::any_input(); + const auto input_label = pattern::any_input(); + const auto q_zp_label = pattern::any_input(); // quantize phase - const auto q_zp_cvt_label = ngraph::pattern::wrap_type({q_zp_label}); - const auto q_sub_label = ngraph::pattern::wrap_type({input_label, q_zp_cvt_label}); - const auto q_real_scale_label = ngraph::pattern::wrap_type(); - const auto div_label = ngraph::pattern::wrap_type({q_sub_label, q_real_scale_label}); - const auto round_label = ngraph::pattern::wrap_type({div_label}); - const auto q_clamp_label = ngraph::pattern::wrap_type({round_label}); + const auto q_zp_cvt_label = pattern::wrap_type({q_zp_label}); + const auto q_sub_label = pattern::wrap_type({input_label, q_zp_cvt_label}); + const auto q_real_scale_label = pattern::wrap_type(); + const auto div_label = pattern::wrap_type({q_sub_label, q_real_scale_label}); + const auto round_label = pattern::wrap_type({div_label}); + const auto q_clamp_label = pattern::wrap_type({round_label}); // dequantize phase - const auto dq_cvt_label = ngraph::pattern::wrap_type({q_clamp_label}); - const auto dq_zp_label = ngraph::pattern::any_input(); - const auto dq_zp_cvt_label = ngraph::pattern::wrap_type({dq_zp_label}); - const auto dq_sub_label = ngraph::pattern::wrap_type({dq_cvt_label, dq_zp_cvt_label}); - const auto dq_real_scale_label = ngraph::pattern::wrap_type(); - const auto output_label = ngraph::pattern::wrap_type({dq_sub_label, dq_real_scale_label}); + const auto dq_cvt_label = pattern::wrap_type({q_clamp_label}); + const auto dq_zp_label = pattern::any_input(); + const auto dq_zp_cvt_label = pattern::wrap_type({dq_zp_label}); + const auto dq_sub_label = pattern::wrap_type({dq_cvt_label, dq_zp_cvt_label}); + const auto dq_real_scale_label = pattern::wrap_type(); + const auto output_label = pattern::wrap_type({dq_sub_label, dq_real_scale_label}); - matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) -> bool { + matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { const auto& opsMap = m.get_pattern_value_map(); if (transformation_callback(m.get_match_root())) { return false; @@ -127,6 +121,6 @@ ov::frontend::paddle::pass::TransformFakeQuantize::TransformFakeQuantize() { replace_node(output_node, fake_node); return true; }; - auto m = std::make_shared(output_label, "TransformFakeQuantize"); + auto m = std::make_shared(output_label, "TransformFakeQuantize"); this->register_matcher(m, callback); } diff --git a/src/frontends/paddle/src/internal/pass/transform_if.cpp b/src/frontends/paddle/src/internal/pass/transform_if.cpp index 01ba7e1d71d41c..203fcb1fb61696 100644 --- a/src/frontends/paddle/src/internal/pass/transform_if.cpp +++ b/src/frontends/paddle/src/internal/pass/transform_if.cpp @@ -4,20 +4,12 @@ #include "internal/pass/transform_if.hpp" -#include -#include -#include -#include -#include -#include - #include "default_opset.hpp" #include "internal/op/conditional_block.hpp" #include "internal/op/tensorarray_write.hpp" -#include "ngraph/op/util/op_types.hpp" -#include "openvino/frontend/paddle/exception.hpp" -#include "openvino/op/util/op_types.hpp" -#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/common_optimizations/fold_subgraph_empty_inputs.hpp" using namespace std; using namespace ov; @@ -28,7 +20,7 @@ using namespace ov::frontend::paddle::op::default_opset; // The contional_block only has "then" branch, while If op requires both "then" and "else" branch the same time. // Thus a "pass-through" model is built on purpose for "else" branch with the same outputs as "then" branch. ov::frontend::paddle::pass::TransformIf::TransformIf(std::vector> funcs) { - const auto cond_label = ngraph::pattern::wrap_type(); + const auto cond_label = pattern::wrap_type(); matcher_pass_callback callback = [funcs](pattern::Matcher& m) -> bool { const auto conditional_block = @@ -104,6 +96,6 @@ ov::frontend::paddle::pass::TransformIf::TransformIf(std::vector(cond_label, "condtionalblock_if"); + auto m = std::make_shared(cond_label, "condtionalblock_if"); this->register_matcher(m, callback); } diff --git a/src/frontends/paddle/src/internal/pass/transform_tensorarray.cpp b/src/frontends/paddle/src/internal/pass/transform_tensorarray.cpp index d4b6e8a55828dd..e1cc417821c826 100644 --- a/src/frontends/paddle/src/internal/pass/transform_tensorarray.cpp +++ b/src/frontends/paddle/src/internal/pass/transform_tensorarray.cpp @@ -4,22 +4,11 @@ #include "internal/pass/transform_tensorarray.hpp" -#include -#include -#include -#include -#include -#include -#include - #include "default_opset.hpp" -#include "internal/op/conditional_block.hpp" #include "internal/op/tensorarray_write.hpp" -#include "internal/op/while.hpp" -#include "openvino/frontend/paddle/exception.hpp" -#include "openvino/op/util/op_types.hpp" -#include "openvino/pass/constant_folding.hpp" -#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/common_optimizations/remove_concat_zero_dim_input.hpp" using namespace std; using namespace ov; @@ -29,11 +18,10 @@ using namespace frontend::paddle::op::default_opset; // Transform pattern "TensorArrayLength->TensorArrayWrite" to OV concat, which // will append to the end of array after unsqueeze along axis 0. ov::frontend::paddle::pass::TransformTensorArray::TransformTensorArray(std::vector> functions) { - const auto shape_label = ngraph::pattern::wrap_type(); - const auto length_label = ngraph::pattern::wrap_type( - {shape_label, ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}); - auto write_label = - ngraph::pattern::wrap_type({ngraph::pattern::any_input(), length_label}); + const auto shape_label = pattern::wrap_type(); + const auto length_label = pattern::wrap_type( + {shape_label, pattern::any_input(), pattern::any_input(), pattern::any_input()}); + auto write_label = pattern::wrap_type({pattern::any_input(), length_label}); matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { const auto& opsMap = m.get_pattern_value_map(); @@ -57,6 +45,6 @@ ov::frontend::paddle::pass::TransformTensorArray::TransformTensorArray(std::vect return true; }; - auto m = std::make_shared(write_label, "tensorarray"); + auto m = std::make_shared(write_label, "tensorarray"); this->register_matcher(m, callback); } diff --git a/src/frontends/paddle/src/internal/pass/transform_while.cpp b/src/frontends/paddle/src/internal/pass/transform_while.cpp index 917782f4299fd6..f00accb51b2524 100644 --- a/src/frontends/paddle/src/internal/pass/transform_while.cpp +++ b/src/frontends/paddle/src/internal/pass/transform_while.cpp @@ -4,21 +4,13 @@ #include "internal/pass/transform_while.hpp" -#include -#include -#include -#include -#include -#include - #include "default_opset.hpp" -#include "internal/op/conditional_block.hpp" -#include "internal/op/tensorarray_write.hpp" #include "internal/op/while.hpp" +#include "openvino/core/rt_info.hpp" #include "openvino/frontend/paddle/exception.hpp" -#include "openvino/op/util/op_types.hpp" -#include "openvino/pass/constant_folding.hpp" -#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/common_optimizations/fold_subgraph_empty_inputs.hpp" using namespace std; using namespace ov; @@ -34,7 +26,7 @@ using namespace ov::frontend::paddle::op::default_opset; // TensorArray could be a non-empty input of the loop body, which needs extra concat. // What's more, we have to tell which output is of TensorArray type to concate. ov::frontend::paddle::pass::TransformWhile::TransformWhile(std::vector> functions) { - const auto while_label = ngraph::pattern::wrap_type(); + const auto while_label = pattern::wrap_type(); matcher_pass_callback callback = [functions](pattern::Matcher& m) -> bool { const auto& while_node = std::dynamic_pointer_cast(m.get_match_root()); @@ -98,6 +90,6 @@ ov::frontend::paddle::pass::TransformWhile::TransformWhile(std::vector(while_label, "while_loop"); + auto m = std::make_shared(while_label, "while_loop"); this->register_matcher(m, callback); } diff --git a/src/frontends/paddle/src/op/reduce_ops.hpp b/src/frontends/paddle/src/op/reduce_ops.hpp index 9f45dad600e576..bc700c1ed0327e 100644 --- a/src/frontends/paddle/src/op/reduce_ops.hpp +++ b/src/frontends/paddle/src/op/reduce_ops.hpp @@ -23,7 +23,7 @@ NamedOutputs reduce_ops(const NodeContext& node) { } else { dims = node.get_attribute>("dim"); } - auto axesNode = default_opset::Constant::create(ngraph::element::i32, {dims.size()}, dims); + auto axesNode = default_opset::Constant::create(ov::element::i32, {dims.size()}, dims); bool scalar_output = !keep_dim; if (scalar_output) { for (int32_t i = 0; i < input_rank; i++) { diff --git a/src/frontends/paddle/src/op/softshrink.cpp b/src/frontends/paddle/src/op/softshrink.cpp index 2feda391336ce3..98c4ec53699790 100644 --- a/src/frontends/paddle/src/op/softshrink.cpp +++ b/src/frontends/paddle/src/op/softshrink.cpp @@ -16,24 +16,22 @@ NamedOutputs softshrink(const NodeContext& node) { PADDLE_OP_CHECK(node, lambda >= 0, "Softshrink op lambda must be non-negative."); PADDLE_OP_CHECK(node, input_element_type.is_signed(), "Softshrink op input must be signed type."); - std::shared_ptr output; + std::shared_ptr output; const auto positive_lambda = default_opset::Constant::create(input_element_type, Shape{}, {lambda}); const auto negative_lambda = default_opset::Constant::create(input_element_type, Shape{}, {-lambda}); - std::shared_ptr negative_node = std::make_shared(data, positive_lambda); - std::shared_ptr positive_node = std::make_shared(data, positive_lambda); + std::shared_ptr negative_node = std::make_shared(data, positive_lambda); + std::shared_ptr positive_node = std::make_shared(data, positive_lambda); - std::shared_ptr zero_node = default_opset::Constant::create(input_element_type, Shape{}, {0}); + std::shared_ptr zero_node = default_opset::Constant::create(input_element_type, Shape{}, {0}); // Create masks for values below negative lambda and above positive lambda - std::shared_ptr values_below_neg_lambda = - std::make_shared(data, negative_lambda); - std::shared_ptr values_above_pos_lambda = - std::make_shared(data, positive_lambda); + std::shared_ptr values_below_neg_lambda = std::make_shared(data, negative_lambda); + std::shared_ptr values_above_pos_lambda = std::make_shared(data, positive_lambda); output = std::make_shared(values_above_pos_lambda, negative_node, data); output = std::make_shared(values_below_neg_lambda, positive_node, output); - std::shared_ptr zero_mask = + std::shared_ptr zero_mask = std::make_shared(values_below_neg_lambda, values_above_pos_lambda); output = std::make_shared(zero_mask, output, zero_node); diff --git a/src/frontends/paddle/tests/test_models/gen_scripts/generate_embedding.py b/src/frontends/paddle/tests/test_models/gen_scripts/generate_embedding.py index 98f7e50490c0cf..1808a613f5d142 100644 --- a/src/frontends/paddle/tests/test_models/gen_scripts/generate_embedding.py +++ b/src/frontends/paddle/tests/test_models/gen_scripts/generate_embedding.py @@ -12,20 +12,20 @@ from save_model import saveModel -def ngraph_embedding(ids, vocab_embeddings, vocab_size, embedding_dim, padding_idx, sparse): +def ov_embedding(ids, vocab_embeddings, vocab_size, embedding_dim, padding_idx, sparse): """ decomposing embedding with OpenVINO ops. """ - import ngraph as ng - from ngraph import opset8 as opset - from openvino.inference_engine import IECore + import openvino as ov + from openvino.runtime import opset8 + from openvino import Core if vocab_embeddings is None: # vocab_embeddings = np.zeros((vocab_size, embedding_dim)).astype("float32") - node_ids = ng.parameter(shape=ids.shape, name='ids', dtype=ids.dtype) - node_w = ng.parameter(shape=vocab_embeddings.shape, name='w', dtype=vocab_embeddings.dtype) + node_ids = opset8.parameter(shape=ids.shape, name='ids', dtype=ids.dtype) + node_w = opset8.parameter(shape=vocab_embeddings.shape, name='w', dtype=vocab_embeddings.dtype) if padding_idx == -1: padding_idx += vocab_size @@ -37,24 +37,22 @@ def ngraph_embedding(ids, vocab_embeddings, vocab_size, embedding_dim, padding_i masked_embeddings = np.ones(vocab_embeddings.shape, dtype='int64') masked_embeddings[padding_idx,:] = 0 # mask - node_mask = ng.constant(masked_embeddings, name='mask', dtype=vocab_embeddings.dtype) - node_masked_w = ng.multiply(node_w, node_mask) + node_mask = opset8.constant(masked_embeddings, name='mask', dtype=vocab_embeddings.dtype) + node_masked_w = opset8.multiply(node_w, node_mask) - node_axis = ng.constant([0], name='const0', dtype=np.int64) - node_gather = opset.gather(data=node_masked_w if padding_idx else node_w, indices=node_ids, axis=node_axis, batch_dims=0) + node_axis = opset8.constant([0], name='const0', dtype=np.int64) + node_gather = opset8.gather(data=node_masked_w if padding_idx else node_w, indices=node_ids, axis=node_axis, batch_dims=0) - graph = ng.result(node_gather, name='y') + graph = opset8.result(node_gather, name='y') parameters = [node_ids, node_w] inputs_dict = {'ids': ids, "w": vocab_embeddings} # - function = ng.Function(graph, parameters, "embedding") - - ie_network = ng.function_to_cnn(function) - ie = IECore() - executable_network = ie.load_network(ie_network, 'CPU') - output = executable_network.infer(inputs_dict) + ov_model = ov.Model(graph, parameters, "embedding") + core = Core() + compiled_model = core.compile_model(ov_model, 'CPU') + output = compiled_model(inputs_dict) return output @@ -97,8 +95,7 @@ def embedding(name : str, ids, vocab_size, embedding_dim, padding_idx=None, spar # if compare: - ng_result = ngraph_embedding(ids, vocab_embeddings, vocab_size, embedding_dim, padding_idx, sparse) - + ng_result = ov_embedding(ids, vocab_embeddings, vocab_size, embedding_dim, padding_idx, sparse) ng_result = list(ng_result.values())[0] paddle_result = list(outputs.values())[0] diff --git a/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.cpp b/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.cpp index 821007819848ed..927097023bcdbf 100644 --- a/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.cpp +++ b/src/frontends/pytorch/src/transforms/softmax_reshape_elimination.cpp @@ -4,9 +4,8 @@ #include "softmax_reshape_elimination.hpp" -#include - #include "openvino/core/rt_info.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/softmax.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" diff --git a/src/frontends/tests/frontend/shared/gtest_main_manifest/main.cpp b/src/frontends/tests/frontend/shared/gtest_main_manifest/main.cpp index fdcd57534575f3..030990661f81a5 100644 --- a/src/frontends/tests/frontend/shared/gtest_main_manifest/main.cpp +++ b/src/frontends/tests/frontend/shared/gtest_main_manifest/main.cpp @@ -8,12 +8,12 @@ #include "utils.hpp" #include "common_test_utils/file_utils.hpp" -#include "ngraph/file_util.hpp" +#include "openvino/util/file_util.hpp" OPENVINO_SUPPRESS_DEPRECATED_START static const std::string s_manifest{ #ifdef MANIFEST - ngraph::file_util::path_join(ov::test::utils::getExecutableDirectory(), MANIFEST) + ov::util::path_join({ov::test::utils::getExecutableDirectory(), MANIFEST}) #endif }; OPENVINO_SUPPRESS_DEPRECATED_END From 3f7989a81794188dd88afc7e618b38a8f93c11cd Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Tue, 7 Nov 2023 14:37:24 +0100 Subject: [PATCH 222/275] [DOCS] Fixing link in Get Started article (#20881) * Updating Get Started section Addressing JIRA ticket: 124289 * Update get_started.md --- docs/articles_en/get_started.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/articles_en/get_started.md b/docs/articles_en/get_started.md index 54a6be18958710..9d3bcb1bce823e 100644 --- a/docs/articles_en/get_started.md +++ b/docs/articles_en/get_started.md @@ -22,7 +22,7 @@

Welcome to OpenVINO! This guide introduces installation and learning materials for Intel® Distribution of OpenVINO™ toolkit. The guide walks through the following steps:
- Quick Start Example + Quick Start Example Install OpenVINO Learn OpenVINO

@@ -36,7 +36,7 @@ .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif :width: 400 -Try out OpenVINO's capabilities with this quick start example that estimates depth in a scene using an OpenVINO monodepth model. `Run the example in a Jupyter Notebook inside your web browser `__ to quickly see how to load a model, prepare an image, inference the image, and display the result. +Try out OpenVINO's capabilities with this `quick start example `__ that estimates depth in a scene using an OpenVINO monodepth model to quickly see how to load a model, prepare an image, inference the image, and display the result. .. _install-openvino-gsg: From a0849edca142f693c61d7e45f4f1cc720c4ab719 Mon Sep 17 00:00:00 2001 From: River Li Date: Tue, 7 Nov 2023 22:25:05 +0800 Subject: [PATCH 223/275] [CPU] migrate cpu plugin api 2.0 (#18124) * [CPU] CPU plugin migrates to plugin API 2.0 * Fix legacy config/metric issue * Fix some issue of ov_cpu_func_tests 1. set_tensors_impl segment fault 2. ov::loaded_from_cache unsupported issue * Resolve some comments 1. ov::loaded_from_cache issue 2. throw_if_cancelled issue 3. import_model issue 4. set_tensor_impl issue 5. batched_inference issue * Fix dynamic shape inference issue * Fix build error * keep original model info in infer_request * Fix minor error * cache internal tensors for input/output precision change * Disable import model test cases with precision changes * fix precision issue * Fix issue for import model * Fix InferRequestCancellationTests exception issue * Skip InferRequestIOBBlobTest.*secondCallGetInputDoNotReAllocateData due to new plugin api have different behavior * Fix graph name issue * Fix ROI issues * Fix Transpose shape issue * Skip vie::Version test due to change to ov::Version * Solve input port name changes issue * Solve preprocess layout issue * Fix minor issue * tidy up code * Fix conflict after rebase * Fix Windows build warning * Add aux tensors for precision change issue * Fix import/export model issue * WA single layer name changed by preprocess * Revert "WA single layer name changed by preprocess" This reverts commit bc8fcdd43c643f6e65b2d0711381e268795b9400. * Skip some legacy tests due to plugin api 2.0 is enabled 1. skip some python legacy tests for plugin api 2.0 some different behaviors 2. skip some smoke tests due to output port name was changed * Fix 2 build warnings * Skip some AUTO plugin tests * Fix property issue caused by AUTO plugin * Skip PSROIPooling issues * Follow header files reference policy * Split out transformation fixing for nop_elimination * Fix AUTO plugin mismatch issue for get_tensor function * Fix aux tensor shape issue * Fix tensor shape issue * WA python sync inference sample's segmentfault issue * Fix reshape issue for dynamic inference * Fixed incorrect tensor name in e2e test Fixe issue: e2e ONNX_Customized_Cascade_Rcnn_api_2_True_batch_1_device_CPU_precision_FP325den8cnk * Fix python segmentfault issue of plugin api 2.0 * Fix python segmentfault issue of plugin api 2.0 * Revert "Fix python segmentfault issue of plugin api 2.0" This reverts commit 6f502e5d8642f3a7fa7a60f8007868e9f24634c6. * Fix onnx_duplicated_output_name due to empty tensor Co-authored-by: Bell, Song * Remove redundant code * Remove python segment fault WA * Keep rt_info to fix test failure in case of legacy public api * Fix output port names missing issue * Adress some reviewers' comments * Restore OnnxBackendNodeModelTest::test_maxpool_with_argmax_2d_precomputed_pads_cpu after fixing has been merged * Resolve tensor sharing issue when there are same name output port name In some case, model has 2 or more same name input/output ports, they aslo have the same precision and partial_shape. Compiled_model will share the same ov::Descriptor::Tensor pointer and ov::Tensor between multiple such ports. Considered solving python segment fault issue to create seperated input/output ports, which also need handle such tensor shared case, this patch will do it. * Resolve tensor sharing issue when there are same name output port name In some case, model has 2 or more same name input/output ports, they aslo have the same precision and partial_shape. Compiled_model will share the same ov::Descriptor::Tensor pointer and ov::Tensor between multiple such ports. Considered solving python segment fault issue to create seperated input/output ports, which also need handle such tensor shared case, this patch will do it. * Better method to find shrared tensor desc * rename with snake_case style * Remove ngraph header files * Keep external_ptr naming * Add OPENVINO_SUPPRESS_DEPRECATED for some legacy code * Use port's tensor_ptr to replace creating new tensor_ptr * Resolve some reviewer comments * Implement ov::IInferRequestInternalWrapper::GetPreProcess to recover python GetPrepProcess tests * Remove unnecessary header files reference * Assert the risk of precision change and reorder at the same time * Modify legacy python test to fit plugin api 2.0 behavior * Recover smoke_Transpose(2|4|5|6)D/TransposeLayerTest.CompareWithRefs due to fixing is merged * Fix typo issue * Address reviewer's comments * Disable precision coversion * Fix error when CpuBlockedMemoryDesc * Remove precision mismatch WA * WA precision issue for query_model * Solve precision mismatch between compiled model and graph * Fixe failure of query_model * Rebase to new plugin api update * Recover the test cases of precision mismatch * Try to fix name changing for graph model * Remove tets code * Remove fp64 * Rebase to new plugin api update * Update for some failure cases * Fix bert_benchmark failure issue * Avoid segment fault in arm acl Legacy public api + cpu plugin api will add convert op by preprocess by default for unsupported precision, but ACLConvertExecutor cannot support dimension > 6, so this test will be segment fault due to dimension > 6 smoke_TestNumpyBroadcastNgraphEvaluate/BroadcastLayerTest.CompareWithRefs/targetShape=(1.2.3.4.5.6.7.8.9.10)_axesMapping=()_mode=numpy_inShape=(1.2.1.4.1.6.1.8.1.10)_inNPrec=I8_trgDev=CPU smoke_TestNumpyBroadcastNgraphEvaluate/BroadcastLayerTest.CompareWithRefs/targetShape=(1.2.3.4.5.6.7.8.9.10)_axesMapping=()_mode=numpy_inShape=(1.2.1.4.1.6.1.8.1.10)_inNPrec=U8_trgDev=CPU * Remove precision change from preprocess to avoid ACL unsupport convert dim > 6 * ACLConvertExecutor cannot support dimension > 6, don't let preprocess to add Convert * Revert "ACLConvertExecutor cannot support dimension > 6, don't let preprocess to add Convert" This reverts commit fd7a8b35af686e806cc4fb3fa28cb7b6a0acaa71. * Revert "Remove precision change from preprocess to avoid ACL unsupport convert dim > 6" This reverts commit 3c2d9a5f172026688b77745cf0f1fb2488401948. * Debug * Debug incorrect precision checking issue * Debug Eltwise FP64 unsupported issue * Add logs for precision * debug log * Update for new dependent PRs merged * Fix failure caused by preprocess Fix below failures due to cannot find ops by name smoke_LPT/ReduceMaxTransformation.CompareWithRefImpl/f32_[1,3,10,10]_CPU_f32__256* * Fix build error * Fix failure caused by missing code during rebase * Add debug * Fix precision unsupport issue * U16/I16/U64 precision support * Resolve the issue of f64 reorder Fix below issue: Cannot create reorder primitive: unsupported reorder case * Fix convert multiple child edge issue * Solve ROI tensor failure issues * Temporarily disable num_nodes comparison * Only change convert precision for fp64 * Put convert precision change before reorder to avoid confusion * Add debug log for transformation * Fix rebase confilict * Fix clang issue * Temporarily disable test_infer_mixed_values python test of bf16 * Solve issue of smoke_ConvertCPULayerTest_BOOL_Dynamic_inputPRC=BF16 choose FP32 primType rather than BP16 primType * Fix issue of pytorch_tests/test_outer.py There are 2 output ports, but with the same port name, they should share the same tensor. * Fix arm cannot find Eltwise executor issue smoke_SetBlobCPU/SetBlobTest.CompareWithRefs/Type=INPUT_Device=CPU_PrecisionInNet=FP16_PrecisionInNgraph=BOOL will report below error: [ GENERAL_ERROR ] Supported Eltwise executor is not found It need change convert precision to avoid such problem. * Fix memory overwritten issue * Temporarily skip arm fp16 SetBlobTest * Fix compile error after rebase * Restore smoke_IsOp test due to fixing pr merged * Fix float to bf16 issue in avx2 isa * solve onnx test xfail issue * Skip test cases that ARM Eltwise executor FP16 is not supported smoke_SetBlobCPU/SetBlobTest.CompareWithRefs/Type=INPUT_Device=CPU_PrecisionInNet=FP16_PrecisionInNgraph=BOOL smoke_SetBlobCPU/SetBlobTest.CompareWithRefs/Type=BOTH_Device=CPU_PrecisionInNet=FP16_PrecisionInNgraph=BOOL [ GENERAL_ERROR ] Supported Eltwise executor is not found * [CPU] improve reorder to support any precision * Implement ReorderExecutor * Fix builld error * Not cache executor due to its primitive has been cached * Keep convert one time at most At most insert one convert if needed, if still cannot do reorder it will throw exception rather than insert the second convert For example, below reorder will not be supported: FP64<->I64/U64/U32 U32<->I64/U64 U32<->I16/U16 FP64<->FP64 BIN<->BIN * Only do conversion if layout is same * update for only convert case * Update for reviewer comments * update for failure cases * Address reviewer comments * Update rebase issue * minor update * Solve unsupported precision issue in tranfromation rather than init_edge * Remove unnecessary convert in init_edge * Minor changes * Update Reorder::reorderData * Solve issue if only coversion without reorder * Address reviewer comments * Address reviewer comments * Keep exception for unsuported precision * update * Revert reorder executor implement * Solve float->bool issue on transformation pipeline * Solve I64 is not supported issues * Solve reviewer's comments * Fixed dynamic top_k node issue * Skip nhwc and nChw16c test cases for ConvertLayer * Update for reviewers' comments * Fix some failures * Update for several failure cases * Update for apiConformanceTests failures * Fix incorrect node name after import model * update * update comments * Solve issue of smoke_MatMul_NoTranspose and smoke_MatMul_BothTranspose * Fixed AlignMatMulInputRanks scalar issue * Address reviewers' comments, remove redundant path in graph.cpp * Remove test_div_uint8_cpu from xfail_issue_58676 * Solve invalid number of nodes for smoke_Snippets_BroadcastSelect * ConstantResultSubgraphTest of u16/i16/u32/i64/u64 * restore smoke_SetBlobCPU BOOL tests for arm * [CPU] Fix ARM precision issue ARM64 ACL prefers fp16 than fp32, API 2.0 requires input/output precision not changes, then fp32 input will trigger convert node is added to convert fp32 to fp16. * Solve some ARM64 failures * Fix arm64 InferRequestVariableStateTest tests out of memory issue ARM64 will force fp16 precision, which cause states memory can be fp16, so memcpy to state_memory cannot use float * element_size, else it will be out of memory bound. * Skip 2 arm64 tests caused by forcing fp16 precision * Revert "Fix arm64 InferRequestVariableStateTest tests out of memory issue" This reverts commit 3e12bd48c253bafcf5b31d4d3736d07ecb9f73c6. * Fix python test_get_profiling_info failure issue --------- Co-authored-by: Bell, Song Co-authored-by: Chen Peter --- .../python/tests/test_runtime/test_core.py | 3 +- .../tests/test_runtime/test_infer_request.py | 4 +- .../python/tests_compatibility/__init__.py | 1 - .../test_ExecutableNetwork.py | 3 +- .../test_InferRequest.py | 10 +- .../test_onnx/test_zoo_models.py | 1 - src/frontends/onnx/tests/__init__.py | 2 - .../onnx/tests/tests_python/test_backend.py | 8 - .../tests/tests_python/test_ops_reshape.py | 4 +- .../tensorflow/tests/compilation.cpp | 3 +- src/inference/src/blob_transform.cpp | 2 +- .../interface/ie_iinfer_request_internal.cpp | 2 +- src/inference/src/dev/converter_utils.cpp | 2 +- .../src/dev/preprocessing/preprocessing.cpp | 9 +- src/inference/src/ie_common.cpp | 3 + .../intel_cpu/src/async_infer_request.cpp | 18 +- .../intel_cpu/src/async_infer_request.h | 19 +- .../{exec_network.cpp => compiled_model.cpp} | 284 +++-- src/plugins/intel_cpu/src/compiled_model.h | 89 ++ src/plugins/intel_cpu/src/config.cpp | 122 +- src/plugins/intel_cpu/src/config.h | 8 +- src/plugins/intel_cpu/src/cpu_memory.cpp | 54 +- .../intel_cpu/src/cpu_streams_calculation.cpp | 24 +- .../intel_cpu/src/cpu_streams_calculation.hpp | 12 +- .../intel_cpu/src/dnnl_extension_utils.cpp | 8 +- src/plugins/intel_cpu/src/exec_network.h | 86 -- src/plugins/intel_cpu/src/graph.cpp | 437 +++---- src/plugins/intel_cpu/src/graph.h | 48 +- src/plugins/intel_cpu/src/infer_request.cpp | 1025 +++++++---------- src/plugins/intel_cpu/src/infer_request.h | 126 +- src/plugins/intel_cpu/src/memory_state.cpp | 11 +- src/plugins/intel_cpu/src/memory_state.h | 29 +- src/plugins/intel_cpu/src/node.cpp | 4 +- .../src/nodes/common/cpu_convert.cpp | 11 +- src/plugins/intel_cpu/src/nodes/convert.cpp | 14 +- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 2 + src/plugins/intel_cpu/src/nodes/input.cpp | 10 +- src/plugins/intel_cpu/src/nodes/topk.cpp | 2 +- src/plugins/intel_cpu/src/plugin.cpp | 401 ++++--- src/plugins/intel_cpu/src/plugin.h | 90 +- src/plugins/intel_cpu/src/serialize.cpp | 143 +-- src/plugins/intel_cpu/src/serialize.h | 33 +- .../common/pass/align_matmul_input_ranks.cpp | 24 +- .../convert_to_cpu_specific_opset.hpp | 7 +- .../transformation_pipeline.cpp | 65 +- .../transformations/transformation_pipeline.h | 2 - src/plugins/intel_cpu/src/utils/blob_dump.cpp | 25 + .../intel_cpu/src/utils/ngraph_utils.hpp | 21 + .../skip_tests_config.cpp | 45 +- .../snippets/select.cpp | 104 +- .../src/fuse_transpose_reorder.cpp | 2 +- .../functional/test_utils/cpu_test_utils.cpp | 2 +- .../behavior/compiled_model/properties.cpp | 2 +- 53 files changed, 1599 insertions(+), 1867 deletions(-) rename src/plugins/intel_cpu/src/{exec_network.cpp => compiled_model.cpp} (54%) create mode 100644 src/plugins/intel_cpu/src/compiled_model.h delete mode 100644 src/plugins/intel_cpu/src/exec_network.h diff --git a/src/bindings/python/tests/test_runtime/test_core.py b/src/bindings/python/tests/test_runtime/test_core.py index a864f73228106b..33540294117909 100644 --- a/src/bindings/python/tests/test_runtime/test_core.py +++ b/src/bindings/python/tests/test_runtime/test_core.py @@ -65,7 +65,8 @@ def test_core_class(device): input_tensor = Tensor(input_data) results = request.infer({"data": input_tensor}) - assert np.allclose(results[list(results)[0]], expected_output) + # convert node may be introduced by API 2.0, which brings some deviation + assert np.allclose(results[list(results)[0]], expected_output, 1e-4, 1e-4) # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request diff --git a/src/bindings/python/tests/test_runtime/test_infer_request.py b/src/bindings/python/tests/test_runtime/test_infer_request.py index 85e4296f691081..d39459b7fadbda 100644 --- a/src/bindings/python/tests/test_runtime/test_infer_request.py +++ b/src/bindings/python/tests/test_runtime/test_infer_request.py @@ -109,8 +109,8 @@ def test_get_profiling_info(device): request.infer({tensor_name: img}) assert request.latency > 0 prof_info = request.get_profiling_info() - soft_max_node = next(node for node in prof_info if node.node_name == "fc_out") - assert "Softmax" in soft_max_node.node_type + soft_max_node = next(node for node in prof_info if node.node_type == "Softmax") + assert soft_max_node assert soft_max_node.status == ProfilingInfo.Status.EXECUTED assert isinstance(soft_max_node.real_time, datetime.timedelta) assert isinstance(soft_max_node.cpu_time, datetime.timedelta) diff --git a/src/bindings/python/tests_compatibility/__init__.py b/src/bindings/python/tests_compatibility/__init__.py index c59bff805ec8a2..b6eb6e0b5c7c97 100644 --- a/src/bindings/python/tests_compatibility/__init__.py +++ b/src/bindings/python/tests_compatibility/__init__.py @@ -135,7 +135,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_52463 = xfail_test(reason="test_operator_add_size1_singleton_broadcast_cpu - " "Not equal to tolerance") xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations") -xfail_issue_58676 = xfail_test(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07") xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140") xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported") diff --git a/src/bindings/python/tests_compatibility/test_inference_engine/test_ExecutableNetwork.py b/src/bindings/python/tests_compatibility/test_inference_engine/test_ExecutableNetwork.py index 8962eac467ce40..13a899520d5bd9 100644 --- a/src/bindings/python/tests_compatibility/test_inference_engine/test_ExecutableNetwork.py +++ b/src/bindings/python/tests_compatibility/test_inference_engine/test_ExecutableNetwork.py @@ -131,7 +131,8 @@ def test_wait_before_start(device): requests = exec_net.requests for id in range(num_requests): status = requests[id].wait() - assert status == ie.StatusCode.INFER_NOT_STARTED + # Plugin API 2.0 has the different behavior will not return this status + # assert status == ie.StatusCode.INFER_NOT_STARTED request_handler = exec_net.start_async(request_id=id, inputs={'parameter': img}) status = requests[id].wait() assert status == ie.StatusCode.OK diff --git a/src/bindings/python/tests_compatibility/test_inference_engine/test_InferRequest.py b/src/bindings/python/tests_compatibility/test_inference_engine/test_InferRequest.py index 1424c6cbcb7069..d77e5f89ba3a3c 100644 --- a/src/bindings/python/tests_compatibility/test_inference_engine/test_InferRequest.py +++ b/src/bindings/python/tests_compatibility/test_inference_engine/test_InferRequest.py @@ -302,7 +302,8 @@ def callback(self, status): request = exec_net.requests[0] request.set_completion_callback(callback) status = request.wait() - assert status == ie.StatusCode.INFER_NOT_STARTED + # Plugin API 2.0 has the different behavior will not return this status + # assert status == ie.StatusCode.INFER_NOT_STARTED request.async_infer({'parameter': img}) status = request.wait() assert status == ie.StatusCode.OK @@ -320,7 +321,8 @@ def __init__(self, request): self.cv = threading.Condition() self.request.set_completion_callback(self.callback) self.status_code = self.request.wait(ie.WaitMode.STATUS_ONLY) - assert self.status_code == ie.StatusCode.INFER_NOT_STARTED + # Plugin API 2.0 has the different behavior will not return this status + # assert self.status_code == ie.StatusCode.INFER_NOT_STARTED def callback(self, statusCode, userdata): self.status_code = self.request.wait(ie.WaitMode.STATUS_ONLY) @@ -508,7 +510,7 @@ def test_set_blob_with_incorrect_size(device): blob = ie.Blob(tensor_desc) with pytest.raises(RuntimeError) as e: exec_net.requests[0].set_blob("data", blob) - assert f"Input blob size is not equal network input size" in str(e.value) + assert f"Can't set the input tensor" in str(e.value) with pytest.raises(RuntimeError) as e: exec_net.requests[0].set_blob("out", blob) - assert f"Output blob size is not equal network output size" in str(e.value) + assert f"Can't set the output tensor" in str(e.value) diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_zoo_models.py b/src/bindings/python/tests_compatibility/test_onnx/test_zoo_models.py index e97edc69b161b1..49dff95a05449f 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_zoo_models.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_zoo_models.py @@ -21,7 +21,6 @@ xfail_issue_47495, xfail_issue_48145, xfail_issue_48190, - xfail_issue_58676, xfail_issue_78843, xfail_issue_onnx_models_140) diff --git a/src/frontends/onnx/tests/__init__.py b/src/frontends/onnx/tests/__init__.py index 87220792d2d349..452b03dc9b8fb8 100644 --- a/src/frontends/onnx/tests/__init__.py +++ b/src/frontends/onnx/tests/__init__.py @@ -45,7 +45,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): "BlackmanWindow, DFT, HammingWindow, HannWindow, LayerNormalization, " "MelWeightMatrix, SequenceMap, STFT") xfail_issue_35923 = xfail_test(reason="RuntimeError: PReLU without weights is not supported") -xfail_issue_35927 = xfail_test(reason="RuntimeError: B has zero dimension that is not allowable") xfail_issue_38091 = xfail_test(reason="AssertionError: Mismatched elements") xfail_issue_38699 = xfail_test(reason="RuntimeError: OV does not support the following ONNX operations: " "ai.onnx.preview.training.Gradient") @@ -148,7 +147,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_86911 = xfail_test(reason="LSTM_Seq_len_unpacked - AssertionError: zoo models results mismatch") xfail_issue_91151 = xfail_test(reason="RuntimeError: model input (shape={3,4}) and blob (shape=(1)) are incompatible") -xfail_issue_91490 = xfail_test(reason="y has zero dimension which is not allowed") xfail_issue_101965 = xfail_test(reason="Mismatch with numpy-based expected results.") xfail_issue_113506 = xfail_test(reason="Unsupported operation of type: LSTMSequence Node expects 7 inputs. Actual: 8") diff --git a/src/frontends/onnx/tests/tests_python/test_backend.py b/src/frontends/onnx/tests/tests_python/test_backend.py index a027f703ba29ce..27fbae1dbd3986 100644 --- a/src/frontends/onnx/tests/tests_python/test_backend.py +++ b/src/frontends/onnx/tests/tests_python/test_backend.py @@ -46,7 +46,6 @@ xfail_issue_82039, xfail_issue_90649, xfail_issue_91151, - xfail_issue_91490, xfail_issue_99949, xfail_issue_99950, xfail_issue_99952, @@ -290,11 +289,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None xfail_issue_38710, "OnnxBackendNodeModelTest.test_reshape_allowzero_reordered_cpu", ), - ( - xfail_issue_91490, - "OnnxBackendNodeModelTest.test_tril_zero_cpu", - "OnnxBackendNodeModelTest.test_triu_zero_cpu", - ), ( skip_dynamic_model, "OnnxBackendNodeModelTest.test_triu_one_row_cpu", @@ -575,8 +569,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None xfail_issue_99973, "OnnxBackendNodeModelTest.test_split_1d_uneven_split_opset18_cpu", "OnnxBackendNodeModelTest.test_split_2d_uneven_split_opset18_cpu", - "OnnxBackendNodeModelTest.test_split_zero_size_splits_opset13_cpu", - "OnnxBackendNodeModelTest.test_split_zero_size_splits_opset18_cpu", ), ( xfail_issue_101965, diff --git a/src/frontends/onnx/tests/tests_python/test_ops_reshape.py b/src/frontends/onnx/tests/tests_python/test_ops_reshape.py index 216672436ed8eb..952983c0c5385a 100644 --- a/src/frontends/onnx/tests/tests_python/test_ops_reshape.py +++ b/src/frontends/onnx/tests/tests_python/test_ops_reshape.py @@ -15,8 +15,7 @@ run_model, run_node, ) -from tests import (xfail_issue_35927, - xfail_issue_44858, +from tests import (xfail_issue_44858, xfail_dynamic_rank) @@ -126,7 +125,6 @@ def test_transpose(): assert np.array_equal(graph_results, [expected_output]) -@xfail_issue_35927 def test_slice_opset1(): data = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) diff --git a/src/frontends/tensorflow/tests/compilation.cpp b/src/frontends/tensorflow/tests/compilation.cpp index fbb6ad94430c55..73a22ceed92644 100644 --- a/src/frontends/tensorflow/tests/compilation.cpp +++ b/src/frontends/tensorflow/tests/compilation.cpp @@ -20,7 +20,8 @@ TEST_F(CompileModelsTests, NgramCompilation) { ov::CompiledModel compiled_model = core.compile_model(model, "CPU"); const auto runtime_model = compiled_model.get_runtime_model(); - EXPECT_EQ(runtime_model->get_ordered_ops().size(), 4); + // A convert node will be inserted for CPU plugin API 2.0 + EXPECT_EQ(runtime_model->get_ordered_ops().size(), 5); EXPECT_EQ(runtime_model->get_parameters().size(), 2); EXPECT_EQ(runtime_model->get_results().size(), 1); } diff --git a/src/inference/src/blob_transform.cpp b/src/inference/src/blob_transform.cpp index 75fafaf077ff82..0ccbe03a388a14 100644 --- a/src/inference/src/blob_transform.cpp +++ b/src/inference/src/blob_transform.cpp @@ -363,7 +363,7 @@ void blob_copy(Blob::Ptr src, Blob::Ptr dst) { if (src->getTensorDesc().getPrecision() != dst->getTensorDesc().getPrecision()) IE_THROW() << "Unimplemented blob transformation from precision " << src->getTensorDesc().getPrecision() - << " to " << src->getTensorDesc().getPrecision(); + << " to " << dst->getTensorDesc().getPrecision(); if (src->getTensorDesc().getDims() != dst->getTensorDesc().getDims()) IE_THROW() << "Unimplemented blob transformation from different shapes "; diff --git a/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp b/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp index 61694962a331fd..af12db6c42915d 100644 --- a/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp +++ b/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp @@ -155,7 +155,7 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us ? InferenceEngine::details::product(foundInput->getTensorDesc().getDims()) : 1; if (!isInputDynamic && dataSize != inputSize) { - IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize + IE_THROW() << "Input tensor size is not equal network input size (" << dataSize << "!=" << inputSize << ")."; } _inputs[name] = userBlob; diff --git a/src/inference/src/dev/converter_utils.cpp b/src/inference/src/dev/converter_utils.cpp index 47b2ec6a155e0b..a698911f45140d 100644 --- a/src/inference/src/dev/converter_utils.cpp +++ b/src/inference/src/dev/converter_utils.cpp @@ -495,7 +495,7 @@ class IInferRequestInternalWrapper : public InferenceEngine::IInferRequestIntern if (get_legacy_name_from_port(port) == legacy_name) return port; } - OPENVINO_ASSERT(false, "Cannot find port with name: ", legacy_name); + OPENVINO_THROW("Failed to find input or output with name: \'", legacy_name, "\'"); } public: diff --git a/src/inference/src/dev/preprocessing/preprocessing.cpp b/src/inference/src/dev/preprocessing/preprocessing.cpp index c2447ed23421d1..69fb991da1eb32 100644 --- a/src/inference/src/dev/preprocessing/preprocessing.cpp +++ b/src/inference/src/dev/preprocessing/preprocessing.cpp @@ -129,11 +129,12 @@ bool ov::pass::AddPreprocessing::run_on_model(const std::shared_ptr& output_info->getLayout() != InferenceEngine::Layout::SCALAR) { std::stringstream stream; stream << output_info->getLayout(); - preproc.output(i).tensor().set_layout(ov::Layout{stream.str()}); + if (stream.str() == "NHWC") { + if (const_output.get_partial_shape().is_static() && const_output.get_shape().size() == 4) + preproc.output(i).model().set_layout("NCHW"); + preproc.output(i).postprocess().convert_layout(ov::Layout{stream.str()}); + } } - - if (const_output.get_partial_shape().is_static() && const_output.get_shape().size() == 4) - preproc.output(i).model().set_layout("NCHW"); } ov::pass::Manager manager(get_pass_config()); diff --git a/src/inference/src/ie_common.cpp b/src/inference/src/ie_common.cpp index 88aea53b19b6ca..a111adaca6a2e9 100644 --- a/src/inference/src/ie_common.cpp +++ b/src/inference/src/ie_common.cpp @@ -18,6 +18,7 @@ #include "ie_parameter.hpp" #include "ngraph/opsets/opset.hpp" #include "openvino/core/except.hpp" +#include "openvino/runtime/exception.hpp" namespace InferenceEngine { IE_SUPPRESS_DEPRECATED_START @@ -64,6 +65,8 @@ void Rethrow() { throw e; } catch (const InferenceEngine::InferCancelled& e) { throw e; + } catch (const ov::Cancelled& e) { + IE_THROW(InferCancelled) << e.what(); } catch (const std::exception& e) { IE_THROW() << e.what(); } catch (...) { diff --git a/src/plugins/intel_cpu/src/async_infer_request.cpp b/src/plugins/intel_cpu/src/async_infer_request.cpp index f1a9f5eb52b560..94fb3acf6c8eef 100644 --- a/src/plugins/intel_cpu/src/async_infer_request.cpp +++ b/src/plugins/intel_cpu/src/async_infer_request.cpp @@ -3,15 +3,19 @@ // #include "async_infer_request.h" -#include -ov::intel_cpu::AsyncInferRequest::AsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr& inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) - : InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor) { - static_cast(inferRequest.get())->SetAsyncRequest(this); +ov::intel_cpu::AsyncInferRequest::AsyncInferRequest( + const std::shared_ptr& request, + const std::shared_ptr& task_executor, + const std::shared_ptr& callback_executor) + : ov::IAsyncInferRequest(request, task_executor, callback_executor) { + static_cast(request.get())->set_async_request(this); } ov::intel_cpu::AsyncInferRequest::~AsyncInferRequest() { - StopAndWait(); + stop_and_wait(); +} + +void ov::intel_cpu::AsyncInferRequest::throw_if_canceled() const { + check_cancelled_state(); } diff --git a/src/plugins/intel_cpu/src/async_infer_request.h b/src/plugins/intel_cpu/src/async_infer_request.h index da3601ea99a694..a41a803dca62e9 100644 --- a/src/plugins/intel_cpu/src/async_infer_request.h +++ b/src/plugins/intel_cpu/src/async_infer_request.h @@ -4,22 +4,21 @@ #pragma once -#include -#include -#include #include "infer_request.h" +#include "openvino/runtime/iasync_infer_request.hpp" namespace ov { namespace intel_cpu { -class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { +class AsyncInferRequest : public ov::IAsyncInferRequest { public: - AsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr &taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr &callbackExecutor); + AsyncInferRequest(const std::shared_ptr& request, + const std::shared_ptr& task_executor, + const std::shared_ptr& callback_executor); ~AsyncInferRequest(); -}; -} // namespace intel_cpu -} // namespace ov + void throw_if_canceled() const; +}; +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp similarity index 54% rename from src/plugins/intel_cpu/src/exec_network.cpp rename to src/plugins/intel_cpu/src/compiled_model.cpp index d694ab523788fd..b6079b97e42d00 100644 --- a/src/plugins/intel_cpu/src/exec_network.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -1,149 +1,131 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "ie_metric_helpers.hpp" // must be included first -#include -#include -#include "exec_network.h" -#include - +#include "compiled_model.h" #include "async_infer_request.h" #include "infer_request.h" -#include "memory_state.h" #include "itt.h" +#include "low_precision/low_precision.hpp" +#include "memory_state.h" +#include "nodes/memory.hpp" +#include "openvino/core/type/element_type.hpp" #include "openvino/runtime/intel_cpu/properties.hpp" #include "serialize.h" -#include "ngraph/type/element_type.hpp" -#include "nodes/memory.hpp" -#include +#include "threading/ie_executor_manager.hpp" +#include "transformations/transformation_pipeline.h" #define FIX_62820 0 #if FIX_62820 && ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO)) -#include +# include #endif -#include -#include -#include -#include -#include -#include "cpp_interfaces/interface/ie_iplugin_internal.hpp" -#include "ie_icore.hpp" + #include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" +#include "threading/ie_cpu_streams_executor.hpp" +#include "transformations/utils/utils.hpp" -#include -#include -#include +#include #include +#include -using namespace InferenceEngine; -using namespace InferenceEngine::details; +using namespace ov::threading; namespace ov { namespace intel_cpu { -InferenceEngine::IInferRequestInternal::Ptr -ExecNetwork::CreateInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs) { - if (!this->_plugin || !_plugin->IsNewAPI()) - return nullptr; - return std::make_shared(inputs, outputs, std::static_pointer_cast(shared_from_this())); -} - -InferenceEngine::IInferRequestInternal::Ptr -ExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs) { - return std::make_shared(networkInputs, networkOutputs, std::static_pointer_cast(shared_from_this())); -} - -struct ImmediateSerialExecutor : public ITaskExecutor { - void run(InferenceEngine::Task task) override { +struct ImmediateSerialExecutor : public ov::threading::ITaskExecutor { + void run(ov::threading::Task task) override { std::lock_guard l{_mutex}; task(); } std::mutex _mutex; }; -ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network, - const Config &cfg, - const ExtensionManager::Ptr& extMgr, - const std::shared_ptr& plugin) : - InferenceEngine::ExecutableNetworkThreadSafeDefault{nullptr, nullptr}, - extensionManager(extMgr), - _network(network), - _cfg{cfg}, - _name{network.getName()} { - SetPointerToPlugin(plugin); - auto function = network.getFunction(); - if (function == nullptr) { - IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!"; - } - bool isFloatModel = !ov::op::util::has_op_with_type(function); +CompiledModel::CompiledModel(const std::shared_ptr& model, + const std::shared_ptr& plugin, + const Config& cfg, + const ExtensionManager::Ptr& extMgr, + const bool loaded_from_cache) + : ov::ICompiledModel::ICompiledModel(model, plugin), + m_model(model), + m_plugin(plugin), + m_cfg{cfg}, + extensionManager(extMgr), + m_name{model->get_name()}, + m_loaded_from_cache(loaded_from_cache) { + bool isFloatModel = !ov::op::util::has_op_with_type(m_model); - _mutex = std::make_shared(); - const auto& core = _plugin->GetCore(); + m_mutex = std::make_shared(); + const auto& core = m_plugin->get_core(); if (!core) - IE_THROW() << "Unable to get API version. Core is unavailable"; - _cfg.isLegacyApi = !core->isNewAPI(); - + OPENVINO_THROW("Unable to get API version. Core is unavailable"); + m_cfg.isLegacyApi = !core->is_new_api(); if (cfg.exclusiveAsyncRequests) { // special case when all InferRequests are muxed into a single queue - _taskExecutor = _plugin->executorManager()->getExecutor("CPU"); + m_task_executor = m_plugin->get_executor_manager()->get_executor("CPU"); } else { auto streamsExecutorConfig = is_cpu_map_available() - ? _cfg.streamExecutorConfig - : InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig, - isFloatModel); + ? m_cfg.streamExecutorConfig + : IStreamsExecutor::Config::make_default_multi_threaded(m_cfg.streamExecutorConfig, isFloatModel); streamsExecutorConfig._name = "CPUStreamsExecutor"; - _cfg.streamExecutorConfig._threads = streamsExecutorConfig._threads; + m_cfg.streamExecutorConfig._threads = streamsExecutorConfig._threads; #if FIX_62820 && (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) - _taskExecutor = std::make_shared(streamsExecutorConfig); + m_task_executor = std::make_shared(streamsExecutorConfig); #else - _taskExecutor = _plugin->executorManager()->getIdleCPUStreamsExecutor(streamsExecutorConfig); + m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(streamsExecutorConfig); #endif } if (0 != cfg.streamExecutorConfig._streams) { #if FIX_62820 && (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) // There is no additional threads but we still need serialize callback execution to preserve legacy behaviour - _callbackExecutor = std::make_shared(); + m_callback_executor = std::make_shared(); #else - _callbackExecutor = _plugin->executorManager()->getIdleCPUStreamsExecutor( - IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE}); + m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor( + IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE}); #endif } else { - _callbackExecutor = _taskExecutor; + m_callback_executor = m_task_executor; } - int streams = std::max(1, _cfg.streamExecutorConfig._streams); - std::vector tasks; tasks.resize(streams); - _graphs.resize(streams); - if (_cfg.streamExecutorConfig._streams != 0) { + + if (m_task_executor) + set_task_executor(m_task_executor); + if (m_callback_executor) + set_callback_executor(m_callback_executor); + + int streams = std::max(1, m_cfg.streamExecutorConfig._streams); + std::vector tasks; + tasks.resize(streams); + m_graphs.resize(streams); + if (m_cfg.streamExecutorConfig._streams != 0) { auto all_graphs_ready = [&] { - return std::all_of(_graphs.begin(), _graphs.end(), [&] (Graph& graph) { + return std::all_of(m_graphs.begin(), m_graphs.end(), [&](Graph& graph) { return graph.IsReady(); }); }; do { for (auto&& task : tasks) { task = [this] { - ExecNetwork::GetGraph(); + CompiledModel::get_graph(); }; } - _taskExecutor->runAndWait(tasks); + m_task_executor->run_and_wait(tasks); } while (!all_graphs_ready()); } else { - ExecNetwork::GetGraph(); + CompiledModel::get_graph(); } // Save all MemoryLayer data tensors. Will use insight about mechanics // of MemoryLayer implementation. It uses output edge of MemoryLayer // producer as storage for tensor to keep it between infer calls. - if (_graphs.size() == 1) { - for (auto &node : GetGraph()._graph.GetNodes()) { + if (m_graphs.size() == 1) { + for (auto& node : get_graph()._graph.GetNodes()) { if (node->getType() == Type::MemoryInput) { auto memoryNode = dynamic_cast(node.get()); if (!memoryNode) { - IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; + OPENVINO_THROW("Cannot cast ", node->getName(), " to MemoryInput"); } auto state_store = memoryNode->getStore(); auto state_name = memoryNode->getId(); @@ -153,44 +135,44 @@ ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network, if (suffix_idx != std::string::npos) state_name = state_name.substr(0, suffix_idx); - memoryStates.emplace_back(new VariableState(state_name, state_store)); + m_memory_states.emplace_back(new VariableState(state_name, state_store)); } } } } -ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const { +CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const { int streamId = 0; int socketId = 0; - auto streamsExecutor = dynamic_cast(_taskExecutor.get()); + auto streamsExecutor = std::dynamic_pointer_cast(m_task_executor); if (nullptr != streamsExecutor) { - streamId = streamsExecutor->GetStreamId(); - socketId = streamsExecutor->GetSocketId(); + streamId = streamsExecutor->get_stream_id(); + socketId = streamsExecutor->get_socket_id(); } - auto graphLock = GraphGuard::Lock(_graphs[streamId % _graphs.size()]); + auto graphLock = GraphGuard::Lock(m_graphs[streamId % m_graphs.size()]); if (!graphLock._graph.IsReady()) { std::exception_ptr exception; auto makeGraph = [&] { try { GraphContext::Ptr ctx; { - std::lock_guard lock{*_mutex.get()}; + std::lock_guard lock{*m_mutex.get()}; // disable weights caching if graph was created only once - auto weightsCache = _cfg.streamExecutorConfig._streams != 1 ? _socketWeights[socketId] : nullptr; - + auto weightsCache = m_cfg.streamExecutorConfig._streams != 1 ? m_socketWeights[socketId] : nullptr; auto isQuantizedFlag = - (_cfg.lpTransformsMode == Config::On) && - ov::pass::low_precision::LowPrecision::isFunctionQuantized(_network.getFunction()); + (m_cfg.lpTransformsMode == Config::On) && + ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model); - ctx = std::make_shared(_cfg, extensionManager, weightsCache, isQuantizedFlag); + ctx = std::make_shared(m_cfg, extensionManager, weightsCache, isQuantizedFlag); } - graphLock._graph.CreateGraph(_network, ctx); + const std::shared_ptr model = m_model; + graphLock._graph.CreateGraph(model, ctx); } catch (...) { exception = std::current_exception(); } }; if (nullptr != streamsExecutor) { - streamsExecutor->Execute(makeGraph); + streamsExecutor->execute(makeGraph); } else { makeGraph(); } @@ -201,44 +183,29 @@ ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const { return graphLock; } -InferenceEngine::IInferRequestInternal::Ptr ExecNetwork::CreateInferRequest() { - return CreateAsyncInferRequestFromSync(); +std::shared_ptr CompiledModel::create_sync_infer_request() const { + m_numRequests++; + return std::make_shared(std::static_pointer_cast(shared_from_this())); } -std::shared_ptr ExecNetwork::GetExecGraphInfo() { - if (_graphs.empty()) - IE_THROW() << "No graph was found"; - - return GetGraph()._graph.dump(); +std::shared_ptr CompiledModel::create_infer_request() const { + auto internal_request = create_sync_infer_request(); + auto async_infer_request = + std::make_shared(std::static_pointer_cast(internal_request), + get_task_executor(), + get_callback_executor()); + return async_infer_request; } -Parameter ExecNetwork::GetConfigLegacy(const std::string &name) const { - if (_graphs.empty()) - IE_THROW() << "No graph was found"; - /* legacy implementation return all the parameters which is actually not correct - * since they are not reconfigurable. Fixed for new API */ - Config engConfig = GetGraph()._graph.getConfig(); - auto option = engConfig._config.find(name); - if (option != engConfig._config.end()) { - return option->second; - } else { - IE_THROW() << "Unsupported ExecutableNetwork config key: " << name; - } -} +std::shared_ptr CompiledModel::get_runtime_model() const { + if (m_graphs.empty()) + OPENVINO_THROW("No graph was found"); -/** - * Only legacy parameters are supported. - * No RW peroperties supported for new API. - * All the RO properties are covered with GetMetric() method and - * GetConfig() is not expected to be called by new API with params from new configuration API. - */ -Parameter ExecNetwork::GetConfig(const std::string &name) const { - /* Internally legacy parameters are used with new API as part of migration procedure. - * This fallback can be removed as soon as migration completed */ - return GetConfigLegacy(name); + return get_graph()._graph.dump(); } -InferenceEngine::Parameter ExecNetwork::GetMetricLegacy(const std::string &name, const GraphGuard& graph) const { +ov::Any CompiledModel::get_metric_legacy(const std::string& name, const GraphGuard& graph) const { + OPENVINO_SUPPRESS_DEPRECATED_START if (name == METRIC_KEY(NETWORK_NAME)) { IE_SET_METRIC_RETURN(NETWORK_NAME, graph.dump()->get_friendly_name()); } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { @@ -250,7 +217,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetricLegacy(const std::string &name, IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { std::vector configKeys; - for (auto && key : graph.getConfig()._config) { + for (auto&& key : graph.getConfig()._config) { configKeys.push_back(key.first); } IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); @@ -259,31 +226,38 @@ InferenceEngine::Parameter ExecNetwork::GetMetricLegacy(const std::string &name, auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)); IE_ASSERT(option != engConfig._config.end()); auto streams = std::stoi(option->second); - IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast( - streams ? streams : 1)); + IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast(streams ? streams : 1)); } else { - IE_THROW() << "Unsupported ExecutableNetwork metric: " << name; + OPENVINO_THROW("Unsupported property: ", name); } + OPENVINO_SUPPRESS_DEPRECATED_END } -InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const { - if (_graphs.empty()) - IE_THROW() << "No graph was found"; +ov::Any CompiledModel::get_property(const std::string& name) const { + if (m_graphs.empty()) + OPENVINO_THROW("No graph was found"); + + if (name == ov::loaded_from_cache) { + return m_loaded_from_cache; + } + + Config engConfig = get_graph()._graph.getConfig(); + auto option = engConfig._config.find(name); + if (option != engConfig._config.end()) { + return option->second; + } + // @todo Can't we just use local copy (_cfg) instead? - auto graphLock = GetGraph(); + auto graphLock = get_graph(); const auto& graph = graphLock._graph; const auto& config = graph.getConfig(); - if (_cfg.isLegacyApi) { - return GetMetricLegacy(name, graph); - } - auto RO_property = [](const std::string& propertyName) { return ov::PropertyName(propertyName, ov::PropertyMutability::RO); }; if (name == ov::supported_properties) { - return std::vector { + return std::vector{ RO_property(ov::supported_properties.name()), RO_property(ov::model_name.name()), RO_property(ov::optimal_number_of_infer_requests.name()), @@ -310,20 +284,22 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const return decltype(ov::model_name)::value_type(modelName); } else if (name == ov::optimal_number_of_infer_requests) { const auto streams = config.streamExecutorConfig._streams; - return decltype(ov::optimal_number_of_infer_requests)::value_type(streams); // ov::optimal_number_of_infer_requests has no negative values + return decltype(ov::optimal_number_of_infer_requests)::value_type( + streams); // ov::optimal_number_of_infer_requests has no negative values } else if (name == ov::num_streams) { const auto streams = config.streamExecutorConfig._streams; - return decltype(ov::num_streams)::value_type(streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) + return decltype(ov::num_streams)::value_type( + streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) } else if (name == ov::affinity) { const auto affinity = config.streamExecutorConfig._threadBindingType; switch (affinity) { - case InferenceEngine::IStreamsExecutor::ThreadBindingType::NONE: + case IStreamsExecutor::ThreadBindingType::NONE: return ov::Affinity::NONE; - case InferenceEngine::IStreamsExecutor::ThreadBindingType::CORES: + case IStreamsExecutor::ThreadBindingType::CORES: return ov::Affinity::CORE; - case InferenceEngine::IStreamsExecutor::ThreadBindingType::NUMA: + case IStreamsExecutor::ThreadBindingType::NUMA: return ov::Affinity::NUMA; - case InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: + case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: return ov::Affinity::HYBRID_AWARE; } return ov::Affinity::NONE; @@ -353,21 +329,23 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const const auto perfHintNumRequests = config.perfHintsConfig.ovPerfHintNumRequests; return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests); } else if (name == ov::execution_devices) { - return decltype(ov::execution_devices)::value_type{_plugin->GetName()}; + return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()}; } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(config.denormalsOptMode == Config::DenormalsOptMode::DO_On); + return decltype(ov::intel_cpu::denormals_optimization)::value_type(config.denormalsOptMode == + Config::DenormalsOptMode::DO_On); } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type(config.fcSparseWeiDecompressionRate); + return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( + config.fcSparseWeiDecompressionRate); } /* Internally legacy parameters are used with new API as part of migration procedure. * This fallback can be removed as soon as migration completed */ - return GetMetricLegacy(name, graph); + return get_metric_legacy(name, graph); } -void ExecNetwork::Export(std::ostream& modelStream) { - CNNNetworkSerializer serializer(modelStream, extensionManager); - serializer <<_network; +void CompiledModel::export_model(std::ostream& modelStream) const { + ModelSerializer serializer(modelStream, extensionManager); + serializer << m_model; } -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h new file mode 100644 index 00000000000000..80e00d52d8cf2d --- /dev/null +++ b/src/plugins/intel_cpu/src/compiled_model.h @@ -0,0 +1,89 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "extension_mngr.h" +#include "graph.h" +#include "graph_context.h" +#include "openvino/runtime/icompiled_model.hpp" +#include "openvino/runtime/iinfer_request.hpp" +#include "openvino/runtime/iplugin.hpp" +#include "openvino/runtime/isync_infer_request.hpp" +#include "openvino/runtime/threading/thread_local.hpp" + +namespace ov { +namespace intel_cpu { + +class CompiledModel : public ov::ICompiledModel { +public: + typedef std::shared_ptr Ptr; + + CompiledModel(const std::shared_ptr& model, + const std::shared_ptr& plugin, + const Config& cfg, + const ExtensionManager::Ptr& extMgr, + const bool loaded_from_cache = false); + + std::shared_ptr create_infer_request() const override; + + void export_model(std::ostream& model) const override; + + std::shared_ptr get_runtime_model() const override; + + ov::Any get_property(const std::string& name) const override; + + void set_property(const ov::AnyMap& properties) override { + OPENVINO_ASSERT_HELPER(::ov::NotImplemented, + "", + false, + "Not Implemented", + "CompiledModel::set_property is not supported by CPU plugin!"); + }; + +private: + std::shared_ptr create_sync_infer_request() const override; + friend class SyncInferRequest; + + const std::shared_ptr m_model; + std::vector> m_memory_states; + const std::shared_ptr m_plugin; + std::shared_ptr m_task_executor = nullptr; //!< Holds a task executor + std::shared_ptr m_callback_executor = nullptr; //!< Holds a callback executor + + // Generic synchronization primitive on CompiledModel level. + // Usage example: helps to avoid data races during CPU Graph initialization in multi-streams scenario + std::shared_ptr m_mutex; + Config m_cfg; + ExtensionManager::Ptr extensionManager; + mutable std::atomic_int m_numRequests = {0}; + std::string m_name; + struct GraphGuard : public Graph { + std::mutex _mutex; + struct Lock : public std::unique_lock { + explicit Lock(GraphGuard& graph) : std::unique_lock(graph._mutex), _graph(graph) {} + GraphGuard& _graph; + }; + }; + + const bool m_loaded_from_cache; + // WARNING: Do not use m_graphs directly. + mutable std::deque m_graphs; + mutable SocketsWeights m_socketWeights; + + /* WARNING: Use get_graph() function to get access to graph in current stream. + * NOTE: Main thread is interpreted as master thread of external stream so use this function to get access to graphs + * even from main thread + */ + GraphGuard::Lock get_graph() const; + + ov::Any get_metric_legacy(const std::string& name, const GraphGuard& graph) const; +}; + +} // namespace intel_cpu +} // namespace ov + diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 90e65a15495719..f9f712bf475797 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -16,6 +16,7 @@ #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "openvino/core/type/element_type_traits.hpp" +#include "openvino/runtime/intel_cpu/properties.hpp" #include "openvino/runtime/properties.hpp" #include "utils/debug_capabilities.h" #include "cpu/x64/cpu_isa_traits.hpp" @@ -23,31 +24,31 @@ namespace ov { namespace intel_cpu { -using namespace InferenceEngine; +using namespace ov::threading; using namespace dnnl::impl::cpu::x64; Config::Config() { // this is default mode #if defined(__APPLE__) || defined(_WIN32) - streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NONE; + streamExecutorConfig._threadBindingType = IStreamsExecutor::NONE; #else - streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::CORES; + streamExecutorConfig._threadBindingType = IStreamsExecutor::CORES; #endif // for the TBB code-path, additional configuration depending on the OS and CPU types #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) # if defined(__APPLE__) || defined(_WIN32) // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default - auto numaNodes = getAvailableNUMANodes(); + auto numaNodes = get_available_numa_nodes(); if (numaNodes.size() > 1) { - streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA; + streamExecutorConfig._threadBindingType = IStreamsExecutor::NUMA; } else { - streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NONE; + streamExecutorConfig._threadBindingType = IStreamsExecutor::NONE; } # endif - if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) { - streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::HYBRID_AWARE; + if (get_available_cores_types().size() > 1 /*Hybrid CPU*/) { + streamExecutorConfig._threadBindingType = IStreamsExecutor::HYBRID_AWARE; } #endif @@ -69,16 +70,17 @@ void Config::applyDebugCapsProperties() { } #endif -void Config::readProperties(const std::map &prop, const ModelType modelType) { - const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys(); +void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { + const auto streamExecutorConfigKeys = + streamExecutorConfig.get_property(ov::supported_properties.name()).as>(); const auto hintsConfigKeys = perfHintsConfig.SupportedKeys(); for (const auto& kvp : prop) { const auto& key = kvp.first; - const auto& val = kvp.second; + const auto& val = kvp.second.as(); IE_SUPPRESS_DEPRECATED_START if (streamExecutorConfigKeys.end() != std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { - streamExecutorConfig.SetConfig(key, val); + streamExecutorConfig.set_property(key, val); if (key == ov::affinity.name()) { const auto affinity_val = ov::util::from_string(val, ov::affinity); if (affinity_val == ov::Affinity::CORE || affinity_val == ov::Affinity::HYBRID_AWARE) { @@ -92,10 +94,10 @@ void Config::readProperties(const std::map &prop, cons } else if (hintsConfigKeys.end() != std::find(hintsConfigKeys.begin(), hintsConfigKeys.end(), key)) { perfHintsConfig.SetConfig(key, val); } else if (key == ov::hint::enable_cpu_pinning.name()) { - if (val == PluginConfigParams::YES) { + if (val == InferenceEngine::PluginConfigParams::YES) { enableCpuPinning = true; changedCpuPinning = true; - } else if (val == PluginConfigParams::NO) { + } else if (val == InferenceEngine::PluginConfigParams::NO) { enableCpuPinning = false; changedCpuPinning = true; } else { @@ -115,71 +117,76 @@ void Config::readProperties(const std::map &prop, cons << ov::hint::SchedulingCoreType::ECORE_ONLY << std::endl; } } else if (key == ov::hint::enable_hyper_threading.name()) { - if (val == PluginConfigParams::YES) { + if (val == InferenceEngine::PluginConfigParams::YES) { enableHyperThreading = true; changedHyperThreading = true; - } else if (val == PluginConfigParams::NO) { + } else if (val == InferenceEngine::PluginConfigParams::NO) { enableHyperThreading = false; changedHyperThreading = true; } else { IE_THROW() << "Wrong value " << val << "for property key " << ov::hint::enable_hyper_threading.name() << ". Expected only true/false." << std::endl; } - } else if (key == CPUConfigParams::KEY_CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE) { + } else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) { float val_f = 0.0f; try { val_f = std::stof(val); } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << CPUConfigParams::KEY_CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE - << ". Expected only float numbers"; + IE_THROW() << "Wrong value for property key " << ov::intel_cpu::sparse_weights_decompression_rate.name() + << ". Expected only float numbers"; } if (val_f < 0.f || val_f > 1.f) { - IE_THROW() << "Wrong value for property key " << CPUConfigParams::KEY_CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE - << ". Sparse rate must be in range [0.0f,1.0f]"; + IE_THROW() << "Wrong value for property key " << ov::intel_cpu::sparse_weights_decompression_rate.name() + << ". Sparse rate must be in range [0.0f,1.0f]"; } else { fcSparseWeiDecompressionRate = val_f; } - } else if (key == PluginConfigParams::KEY_PERF_COUNT) { - if (val == PluginConfigParams::YES) collectPerfCounters = true; - else if (val == PluginConfigParams::NO) collectPerfCounters = false; + } else if (key == ov::enable_profiling.name()) { + if (val == InferenceEngine::PluginConfigParams::YES) + collectPerfCounters = true; + else if (val == InferenceEngine::PluginConfigParams::NO) + collectPerfCounters = false; else - IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_PERF_COUNT - << ". Expected only YES/NO"; - } else if (key == PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) { - if (val == PluginConfigParams::YES) exclusiveAsyncRequests = true; - else if (val == PluginConfigParams::NO) exclusiveAsyncRequests = false; + IE_THROW() << "Wrong value for property key " << ov::enable_profiling.name() + << ". Expected only YES/NO"; + } else if (key == ov::exclusive_async_requests.name()) { + if (val == InferenceEngine::PluginConfigParams::YES) + exclusiveAsyncRequests = true; + else if (val == InferenceEngine::PluginConfigParams::NO) + exclusiveAsyncRequests = false; else - IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS + IE_THROW() << "Wrong value for property key " << ov::exclusive_async_requests.name() << ". Expected only YES/NO"; IE_SUPPRESS_DEPRECATED_START - } else if (key.compare(PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT) == 0) { + } else if (key.compare(InferenceEngine::PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT) == 0) { IE_SUPPRESS_DEPRECATED_END // empty string means that dumping is switched off dumpToDot = val; - } else if (key.compare(PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE) == 0) { - if (val == PluginConfigParams::NO) + } else if (key.compare(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE) == 0) { + if (val == InferenceEngine::PluginConfigParams::NO) lpTransformsMode = LPTransformsMode::Off; - else if (val == PluginConfigParams::YES) + else if (val == InferenceEngine::PluginConfigParams::YES) lpTransformsMode = LPTransformsMode::On; else - IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE; + IE_THROW() << "Wrong value for property key " + << InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE; } else if (key == ov::device::id.name()) { device_id = val; if (!device_id.empty()) { IE_THROW() << "CPU plugin supports only '' as device id"; } - } else if (key == PluginConfigParams::KEY_ENFORCE_BF16) { - if (val == PluginConfigParams::YES) { + } else if (key == InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16) { + if (val == InferenceEngine::PluginConfigParams::YES) { if (mayiuse(avx512_core)) { inferencePrecision = ov::element::bf16; } else { IE_THROW() << "Platform doesn't support BF16 format"; } - } else if (val == PluginConfigParams::NO) { + } else if (val == InferenceEngine::PluginConfigParams::NO) { inferencePrecision = ov::element::f32; } else { - IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16 - << ". Expected only YES/NO"; + IE_THROW() << "Wrong value for property key " << InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16 + << ". Expected only YES/NO"; } inferencePrecisionSetExplicitly = true; } else if (key == ov::hint::inference_precision.name()) { @@ -204,39 +211,41 @@ void Config::readProperties(const std::map &prop, cons inferencePrecisionSetExplicitly = true; } else { IE_THROW() << "Wrong value for property key " << ov::hint::inference_precision.name() - << ". Supported values: bf16, f16, f32"; + << ". Supported values: bf16, f32"; } - } else if (PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY == key) { + } else if (InferenceEngine::PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY == key) { int val_i = -1; try { val_i = std::stoi(val); } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY + IE_THROW() << "Wrong value for property key " + << InferenceEngine::PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY << ". Expected only integer numbers"; } // any negative value will be treated // as zero that means disabling the cache rtCacheCapacity = std::max(val_i, 0); - } else if (CPUConfigParams::KEY_CPU_DENORMALS_OPTIMIZATION == key) { - if (val == PluginConfigParams::YES) { + } else if (ov::intel_cpu::denormals_optimization.name() == key) { + if (val == InferenceEngine::PluginConfigParams::YES) { denormalsOptMode = DenormalsOptMode::DO_On; - } else if (val == PluginConfigParams::NO) { + } else if (val == InferenceEngine::PluginConfigParams::NO) { denormalsOptMode = DenormalsOptMode::DO_Off; } else { denormalsOptMode = DenormalsOptMode::DO_Keep; - IE_THROW() << "Wrong value for property key " << CPUConfigParams::KEY_CPU_DENORMALS_OPTIMIZATION - << ". Expected only YES/NO"; + IE_THROW() << "Wrong value for property key " << ov::intel_cpu::denormals_optimization.name() + << ". Expected only YES/NO"; } - } else if (key == PluginConfigInternalParams::KEY_SNIPPETS_MODE) { - if (val == PluginConfigInternalParams::ENABLE) + } else if (key == InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE) { + if (val == InferenceEngine::PluginConfigInternalParams::ENABLE) snippetsMode = SnippetsMode::Enable; - else if (val == PluginConfigInternalParams::IGNORE_CALLBACK) + else if (val == InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK) snippetsMode = SnippetsMode::IgnoreCallback; - else if (val == PluginConfigInternalParams::DISABLE) + else if (val == InferenceEngine::PluginConfigInternalParams::DISABLE) snippetsMode = SnippetsMode::Disable; else - IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_SNIPPETS_MODE - << ". Expected values: ENABLE/DISABLE/IGNORE_CALLBACK"; + IE_THROW() << "Wrong value for property key " + << InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE + << ". Expected values: ENABLE/DISABLE/IGNORE_CALLBACK"; } else if (key == ov::hint::execution_mode.name()) { if (val == "PERFORMANCE") { executionMode = ov::hint::ExecutionMode::PERFORMANCE; @@ -244,7 +253,7 @@ void Config::readProperties(const std::map &prop, cons executionMode = ov::hint::ExecutionMode::ACCURACY; } else { IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name() - << ". Supported values: PERFORMANCE, ACCURACY"; + << ". Supported values: PERFORMANCE, ACCURACY"; } } else { IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin"; @@ -273,7 +282,7 @@ void Config::readProperties(const std::map &prop, cons if (!prop.empty()) _config.clear(); - if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams + if (exclusiveAsyncRequests) { // Exclusive request feature disables the streams streamExecutorConfig._streams = 1; streamExecutorConfig._streams_changed = true; } @@ -293,6 +302,7 @@ void Config::updateProperties() { if (!_config.empty()) return; + using namespace InferenceEngine; switch (streamExecutorConfig._threadBindingType) { case IStreamsExecutor::ThreadBindingType::NONE: _config.insert({ PluginConfigParams::KEY_CPU_BIND_THREAD, PluginConfigParams::NO }); diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index fab215e64c10b0..f7c72d29e0f6ed 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -4,9 +4,8 @@ #pragma once -#include +#include #include -#include #include #include #include "utils/debug_caps_config.h" @@ -62,7 +61,7 @@ struct Config { // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives size_t rtCacheCapacity = 0ul; #endif - InferenceEngine::IStreamsExecutor::Config streamExecutorConfig; + ov::threading::IStreamsExecutor::Config streamExecutorConfig; InferenceEngine::PerfHintsConfig perfHintsConfig; bool enableCpuPinning = true; bool changedCpuPinning = false; @@ -88,7 +87,8 @@ struct Config { // is reserved. bool DAZOn = false; - void readProperties(const std::map &config, const ModelType modelType = ModelType::Unknown); + void readProperties(const ov::AnyMap& config, const ModelType modelType = ModelType::Unknown); + void updateProperties(); std::map _config; diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 6c34123a65b046..2b7fe3f4001987 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -23,6 +23,16 @@ using namespace dnnl; namespace ov { namespace intel_cpu { +template <> +DnnlMemoryDescPtr IMemory::getDescWithType() const { + return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr()); +} + +template <> +BlockedMemoryDescPtr IMemory::getDescWithType() const { + return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr()); +} + namespace { inline void setSubnormalsToZero(float *data, size_t size) { uint32_t *u32data = reinterpret_cast(data); @@ -36,21 +46,27 @@ namespace { void transferData(const IMemory& src, const IMemory& dst, bool ftz) { node::Reorder::reorderData(src, dst); - auto localPrim = dst.getPrimitive(); - auto desc = localPrim.get_desc(); - dnnl::impl::memory_desc_wrapper wrapper(desc.get()); - - if (ftz - && src.getDataType() == memory::data_type::f32 - && !wrapper.is_wino_desc() - // WA: to avoid zero filling auxiliary information - && !wrapper.is_rnn_packed_desc() - && dst.getDataType() != memory::data_type::bf16) { - // Internal blobs don't have strides yet. - auto *memData = static_cast(dst.getData()); - memData += wrapper.offset0(); - setSubnormalsToZero(memData, dst.getSize() / sizeof(float)); + if (!ftz) { + return; } + if (src.getDesc().getPrecision() != Precision::FP32 || dst.getDesc().getPrecision() == Precision::BF16) { + return; + } + size_t offset = 0; + if (dst.getDesc().getType() & MemoryDescType::Dnnl) { + // here we can safely cast to DnnlMemoryDesc + auto dnnl_desc = dst.getDescWithType(); + auto desc = dnnl_desc->getDnnlDesc(); + dnnl::impl::memory_desc_wrapper wrapper(desc.get()); + offset = wrapper.offset0(); + if (wrapper.is_wino_desc() || wrapper.is_rnn_packed_desc()) { + return; + } + } + // actual FTZ + auto* memData = static_cast(dst.getData()); + memData += offset; + setSubnormalsToZero(memData, dst.getSize() / sizeof(float)); } } // namespace @@ -122,16 +138,6 @@ void Memory::redefineDesc(MemoryDescPtr desc) { this->create(desc, nullptr, false); } -template<> -DnnlMemoryDescPtr IMemory::getDescWithType() const { - return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr()); -} - -template<> -BlockedMemoryDescPtr IMemory::getDescWithType() const { - return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr()); -} - void Memory::update() { if (dnnlMemHandle.isInit()) { auto prim = dnnlMemHandle.getPrim(); diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index f9e336bcaba279..b000c35c646648 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -18,7 +18,7 @@ #include "performance_heuristics.hpp" using namespace ov; -using namespace threading; +using namespace ov::threading; #define INIT_VAL -100 @@ -412,7 +412,7 @@ std::vector> get_streams_info_table(const int input_streams, int get_model_prefer_threads(const int num_streams, const std::vector> proc_type_table, - const std::shared_ptr& ngraphFunc, + const std::shared_ptr& model, Config& config) { const int sockets = get_default_latency_streams(config.latencyThreadingMode); auto model_prefer = 0; @@ -441,7 +441,7 @@ int get_model_prefer_threads(const int num_streams, const float memThresholdAssumeLimitedForISA = ov::MemBandwidthPressure::LIMITED / isaSpecificThreshold; const float L2_cache_size = dnnl::utils::get_cache_size(2 /*level*/, true /*per core */); ov::MemBandwidthPressure networkToleranceForLowCache = - ov::MemBandwidthPressureTolerance(ngraphFunc, L2_cache_size, memThresholdAssumeLimitedForISA); + ov::MemBandwidthPressureTolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA); config.modelPreferThreads = ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) || @@ -469,7 +469,7 @@ int get_model_prefer_threads(const int num_streams, model_prefer = proc_type_table[0][ALL_PROC]; } #else - bool fp_intesive = !ov::op::util::has_op_with_type(ngraphFunc); + bool fp_intesive = !ov::op::util::has_op_with_type(model); const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores; const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores; // by default the latency case uses (faster) Big cores only, depending on the compute ratio @@ -487,14 +487,14 @@ int get_model_prefer_threads(const int num_streams, } std::vector> generate_stream_info(const int streams, - const std::shared_ptr& ngraphFunc, + const std::shared_ptr& model, Config& config, std::vector>& proc_type_table, int preferred_nthreads_per_stream) { int model_prefer_threads = preferred_nthreads_per_stream; - InferenceEngine::IStreamsExecutor::Config& executor_config = config.streamExecutorConfig; - + IStreamsExecutor::Config& executor_config = config.streamExecutorConfig; proc_type_table = apply_scheduling_core_type(config.schedulingCoreType, proc_type_table); + proc_type_table = apply_hyper_threading(config.enableHyperThreading, config.changedHyperThreading, config.perfHintsConfig.ovPerfHint, @@ -505,7 +505,7 @@ std::vector> generate_stream_info(const int streams, config.latencyThreadingMode, proc_type_table); if (-1 == preferred_nthreads_per_stream) { - model_prefer_threads = get_model_prefer_threads(streams, proc_type_table, ngraphFunc, config); + model_prefer_threads = get_model_prefer_threads(streams, proc_type_table, model, config); } executor_config._streams_info_table = get_streams_info_table(executor_config._streams, @@ -519,13 +519,13 @@ std::vector> generate_stream_info(const int streams, return proc_type_table; } -void get_num_streams(const int streams, const std::shared_ptr& ngraphFunc, Config& config) { - InferenceEngine::IStreamsExecutor::Config& executor_config = config.streamExecutorConfig; +void get_num_streams(const int streams, const std::shared_ptr& model, Config& config) { + IStreamsExecutor::Config& executor_config = config.streamExecutorConfig; std::vector> proc_type_table = get_proc_type_table(); - generate_stream_info(streams, ngraphFunc, config, proc_type_table); + generate_stream_info(streams, model, config, proc_type_table); - executor_config = InferenceEngine::IStreamsExecutor::Config::reserve_cpu_threads(executor_config); + executor_config = IStreamsExecutor::Config::reserve_cpu_threads(executor_config); executor_config._threadsPerStream = executor_config._streams_info_table[0][THREADS_PER_STREAM]; } diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp index 44d676014aad75..dcf79b3bd835e0 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp @@ -58,19 +58,19 @@ std::vector> get_streams_info_table(const int input_streams, * - LATENCY hint equals 1 stream. * @param[in] proc_type_table candidate processors available at this time * - candidate processors have benn updated based on properties like "Ecore only" in previous function - * @param[in] ngraphFunc ngraph function + * @param[in] model model * @param[in] config intel cpu configuration * @return model_prefer_threads "0" means generating the optimal threads per stream based on platform */ int get_model_prefer_threads(const int num_streams, const std::vector> proc_type_table, - const std::shared_ptr& ngraphFunc, + const std::shared_ptr& model, Config& config); /** * @brief Generate streams information according to processors type table * @param[in] streams number of streams - * @param[in] ngraphFunc graph handle + * @param[in] model graph handle * @param[in] config intel cpu configuration * @param[in] proc_type_table candidate processors available at current platform * @param[in] preferred_nthreads_per_stream is initial preferred number of threads per stream @@ -78,7 +78,7 @@ int get_model_prefer_threads(const int num_streams, * ov::hint::enable_hyper_threading */ std::vector> generate_stream_info(const int streams, - const std::shared_ptr& ngraphFunc, + const std::shared_ptr& model, Config& config, std::vector>& proc_type_table, int preferred_nthreads_per_stream = -1); @@ -96,11 +96,11 @@ struct StreamCfg { /** * @brief Get information about number of streams, threads and pinning threads on different processors * @param[in] streams number of streams - * @param[in] ngraphFunc graph handle + * @param[in] model graph handle * @param[in] config intel cpu configuration */ void get_num_streams(const int streams, - const std::shared_ptr& ngraphFunc, + const std::shared_ptr& model, Config& config); /** diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 1185a79f31c086..dd4757ac562778 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -20,6 +20,8 @@ namespace intel_cpu { uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) { switch (dataType) { + case dnnl::memory::data_type::f64: + return 8; case dnnl::memory::data_type::f32: case dnnl::memory::data_type::s32: return 4; @@ -36,7 +38,7 @@ uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) { case dnnl::memory::data_type::undef: return 0; default: - IE_THROW() << "Unsupported data type."; + OPENVINO_THROW("Unsupported data type."); } } @@ -66,7 +68,7 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin case InferenceEngine::Precision::UNSPECIFIED: return memory::data_type::undef; default: { - IE_THROW() << "The plugin does not support " << prec.name(); + OPENVINO_THROW("The plugin does not support ", prec.name()); } } } @@ -87,6 +89,8 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat return InferenceEngine::Precision::BIN; case memory::data_type::f16: return InferenceEngine::Precision::FP16; + case memory::data_type::f64: + return InferenceEngine::Precision::FP64; case memory::data_type::nf4: return InferenceEngine::Precision::NF4; case memory::data_type::s4: diff --git a/src/plugins/intel_cpu/src/exec_network.h b/src/plugins/intel_cpu/src/exec_network.h deleted file mode 100644 index 711654f5df7087..00000000000000 --- a/src/plugins/intel_cpu/src/exec_network.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include "graph.h" -#include "extension_mngr.h" -#include "graph_context.h" -#include - -#include -#include -#include -#include -#include - -namespace ov { -namespace intel_cpu { - -class ExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefault { -public: - typedef std::shared_ptr Ptr; - - std::shared_ptr - CreateInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs) override; - - std::shared_ptr - CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs) override; - - InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override; - - ExecNetwork(const InferenceEngine::CNNNetwork &network, const Config &cfg, - const ExtensionManager::Ptr &extMgr, - const std::shared_ptr& plugin); - - InferenceEngine::Parameter GetConfig(const std::string &name) const override; - - InferenceEngine::Parameter GetMetric(const std::string &name) const override; - - std::shared_ptr GetExecGraphInfo() override; - - void Export(std::ostream& modelStream) override; - -protected: - friend class InferRequestBase; - ExtensionManager::Ptr extensionManager; - std::vector memoryStates; - const InferenceEngine::CNNNetwork _network; - // Generic synchronization primitive on ExecNetwork level. - // Usage example: helps to avoid data races during CPU Graph initialization in multi-streams scenario - mutable std::shared_ptr _mutex; - Config _cfg; - std::atomic_int _numRequests = {0}; - std::string _name; - struct GraphGuard : public Graph { - std::mutex _mutex; - struct Lock : public std::unique_lock { - explicit Lock(GraphGuard& graph) : std::unique_lock(graph._mutex), _graph(graph) {} - GraphGuard& _graph; - }; - }; - - // WARNING: Do not use _graphs directly. - mutable std::deque _graphs; - mutable SocketsWeights _socketWeights; - - /* WARNING: Use GetGraph() function to get access to graph in current stream. - * NOTE: Main thread is interpreted as master thread of external stream so use this function to get access to graphs - * even from main thread - */ - GraphGuard::Lock GetGraph() const; - - InferenceEngine::Parameter GetConfigLegacy(const std::string &name) const; - - InferenceEngine::Parameter GetMetricLegacy(const std::string &name, const GraphGuard& graph) const; -}; - -} // namespace intel_cpu -} // namespace ov - diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 5275259886ed91..b4efe7d0f3bd66 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -2,59 +2,55 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "graph.h" + #include -#include +#include +#include #include -#include +#include +#include #include -#include -#include -#include #include -#include +#include #include +#include -#include "graph.h" -#include "graph_dumper.h" -#include "graph_optimizer.h" +#include "common/primitive_desc.hpp" +#include "common/primitive_desc_iface.hpp" #include "dnnl_extension_utils.h" #include "extension_mngr.h" -#include "ie_ngraph_utils.hpp" -#include "memory_solver.hpp" -#include "itt.h" +#include "graph_dumper.h" +#include "graph_optimizer.h" +#include "ie_algorithm.hpp" #include "infer_request.h" -#include "nodes/input.h" -#include +#include "itt.h" +#include "low_precision/low_precision.hpp" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "memory_solver.hpp" +#include "nodes/common/cpu_convert.h" +#include "nodes/common/cpu_memcpy.h" #include "nodes/convert.h" -#include "nodes/subgraph.h" #include "nodes/fullyconnected.h" - -#include -#include -#include "nodes/common/cpu_memcpy.h" -#include "nodes/common/cpu_convert.h" - +#include "nodes/input.h" +#include "nodes/reorder.h" +#include "nodes/subgraph.h" +#include "openvino/core/model.hpp" +#include "openvino/core/node.hpp" +#include "openvino/op/ops.hpp" #include "precision_utils.h" -#include - -#include "utils/general_utils.h" +#include "transformations/utils/utils.hpp" +#include "utils/cpu_utils.hpp" #include "utils/debug_capabilities.h" -#include "utils/node_dumper.h" +#include "utils/general_utils.h" #include "utils/ngraph_utils.hpp" -#include "utils/cpu_utils.hpp" +#include "utils/node_dumper.h" #include "utils/verbose.h" #include "memory_desc/cpu_memory_desc_utils.h" -#include -#include -#include -#include -#include -#include "memory_desc/dnnl_blocked_memory_desc.h" -#include -#include #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO) -# include +# include #endif using namespace dnnl; @@ -87,10 +83,10 @@ void Graph::CreateGraph(NET &net, const GraphContext::CPtr ctx) { CPU_DEBUG_CAP_ENABLE(serialize(*this)); } -void Graph::CreateGraph(const std::vector &graphNodes, - const std::vector &graphEdges, - const GraphContext::CPtr ctx, - std::string name) { +void Graph::CreateGraph(const std::vector& graphNodes, + const std::vector& graphEdges, + const GraphContext::CPtr ctx, + std::string name) { if (IsReady()) ForgetGraphData(); @@ -116,10 +112,9 @@ void Graph::CreateGraph(const std::vector &graphNodes, } template void Graph::CreateGraph(const std::shared_ptr&, const GraphContext::CPtr); -template void Graph::CreateGraph(const CNNNetwork&, const GraphContext::CPtr); - -void Graph::Replicate(const std::shared_ptr &subgraph) { - this->_name = "subgraph"; +void Graph::Replicate(const std::shared_ptr &model) { + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "ov::Model"); + this->_name = model->get_friendly_name(); this->reuse_io_tensors = false; // Map data object onto producer node @@ -129,7 +124,8 @@ void Graph::Replicate(const std::shared_ptr &subgraph) { // Will be stored as fake output separately. std::deque> unusedOutputs; - auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, + auto getParentOutputPort = [](const std::shared_ptr childOp, + const std::shared_ptr parentOp, const size_t childInputPort) -> int { for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) { if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) { @@ -140,19 +136,21 @@ void Graph::Replicate(const std::shared_ptr &subgraph) { return -1; }; - for (const auto& op : subgraph->get_ordered_ops()) { + const bool is_legacy_api = getConfig().isLegacyApi; + for (const auto& op : model->get_ordered_ops()) { const NodePtr node {Node::factory().create(op, context)}; graphNodes.push_back(node); - if (op->get_type_info() == op::v0::Parameter::get_type_info_static()) { - inputNodesMap[node->getName()] = node; + const std::string name = get_port_name(ov::Output(op, 0), is_legacy_api); + inputNodesMap[name] = node; + if (node->isDynamicNode()) { + graphHasDynamicInput = true; + } } if (op->get_type_info() == op::v0::Result::get_type_info_static()) { - const auto prev = op->input_value(0); - const std::string inputID = op::util::get_ie_output_name(prev); - + const std::string inputID = get_port_name(op->output(0), is_legacy_api); outputNodesMap[inputID] = node; } @@ -193,124 +191,16 @@ void Graph::Replicate(const std::shared_ptr &subgraph) { graphNodes.push_back(outNode); } - EnforceInferencePrecision(); -} - -void Graph::Replicate(const CNNNetwork &network) { - OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "CNNNetwork"); - - const InputsDataMap& inputsInfo = network.getInputsInfo(); - const OutputsDataMap& outputsInfo = network.getOutputsInfo(); - - this->_name = network.getName(); - - std::shared_ptr func = network.getFunction(); - - if (!func) { - IE_THROW() << "Function pointer inside CNNNetwork is nullptr"; - } - - auto orderedOps = func->get_ordered_ops(); - - // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ov::Node - std::map, NodePtr> op2node; - std::deque> unusedOutputs; // nodes which has no consumers (output or just unused) - - auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, - const size_t childInputPort) -> int { - for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) { - if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) { - return static_cast(parentPort); - } - } - - return -1; - }; - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AllNodes"); - - // Replicate All Nodes in topological order - for (const auto& op : orderedOps) { - const NodePtr node(Node::factory().create(op, context)); - - graphNodes.push_back(node); - - if (op->get_type_info() == op::v0::Parameter::get_type_info_static()) { - const auto inInfo = inputsInfo.find(node->getName()); - if (inInfo != inputsInfo.end()) { - inputNodesMap[node->getName()] = node; - if (node->isDynamicNode()) { - graphHasDynamicInput = true; - } - } - } - - if (op->get_type_info() == op::v0::Result::get_type_info_static()) { - const auto &input = op->input_value(0); - const auto name = op::util::get_ie_output_name(input); - - if (outputsInfo.count(name) != 0) { - outputNodesMap[name] = node; - } - } - - op2node[op] = node; - - for (size_t port = 0; port < op->get_input_size(); port++) { - auto parentOp = op->get_input_node_shared_ptr(port); - auto parentNode = op2node[parentOp]; - - EdgePtr edge(new Edge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast(port))); - node->addEdge(edge); - graphEdges.push_back(edge); - } - - if (!one_of(op->get_type_info(), - op::v0::Result::get_type_info_static(), - op::v3::Assign::get_type_info_static(), - op::v6::Assign::get_type_info_static())) { - for (size_t oi = 0; oi < op->get_output_size(); oi++) { - if (op->get_output_target_inputs(oi).empty()) { - unusedOutputs.push_back(op->output(oi)); - } - } - } - } - - // Add stub output node for unused outputs - for (auto unusedOutput : unusedOutputs) { - auto parentNode = op2node[unusedOutput.get_node_shared_ptr()]; - const auto port = unusedOutput.get_index(); - const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); - const NodePtr outNode = std::make_shared(parentNode->outputShapes[port], - parentNode->getOriginalOutputPrecisionAtPort(port), - nodeName, "Result", context); - EdgePtr edge(new Edge(parentNode, outNode, port, 0)); - outNode->addEdge(edge); - graphEdges.push_back(edge); - graphNodes.push_back(outNode); - } - - auto hasSubgraphConsumers = [] (const NodePtr& node) -> bool { - const auto & childEdges = node->getChildEdges(); - return std::any_of(childEdges.begin(), childEdges.end(), - [] (const EdgeWeakPtr& edge) -> bool { - auto edgePtr = edge.lock(); - if (!edgePtr) - return false; - return edgePtr->getChild()->getType() == Type::Subgraph; - }); + auto hasSubgraphConsumers = [](const NodePtr& node) -> bool { + const auto& childEdges = node->getChildEdges(); + return std::any_of(childEdges.begin(), childEdges.end(), [](const EdgeWeakPtr& edge) -> bool { + auto edgePtr = edge.lock(); + if (!edgePtr) + return false; + return edgePtr->getChild()->getType() == Type::Subgraph; + }); }; - // change precision for input/output nodes to avoid extra data conversion when set input/output blobs - for (auto &input : inputNodesMap) { - const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision()); - input.second->setOriginalOutputPrecisionAtPort(0, precToSet); - } - for (auto &output : outputNodesMap) { - const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision()); - output.second->setOriginalInputPrecisionAtPort(0, precToSet); - } // enforce must be performed after inputs and outputs info are taken into account EnforceInferencePrecision(); // also we need to change input/output precisions for consumers/producers to avoid inserting reorder @@ -337,20 +227,6 @@ void Graph::Replicate(const CNNNetwork &network) { parent->setOriginalOutputPrecisionAtPort(parentEdges[i]->getInputNum(), precToSet); } } - - // Loading mean images - for (const auto& input : inputsInfo) { - Shape outShape; - if (!inputNodesMap[input.first]->outputShapes.front().getRank()) { - outShape = Shape(SizeVector({1, 1})); - } else { - outShape = inputNodesMap[input.first]->outputShapes.front(); - } - InputInfo::Ptr ii = input.second; - if (ii && ii->getPreProcess().getNumberOfChannels()) { - _normalizePreprocMap[input.first].Load(outShape, ii); - } - } } void Graph::InitGraph() { @@ -401,12 +277,6 @@ void Graph::InitDescriptors() { OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "InitDescriptors", "Prepare"); for (auto &node : graphNodes) { - if (node->getType() == Type::Input && _normalizePreprocMap.find(node->getName()) != _normalizePreprocMap.end()) { - auto *inputNode = dynamic_cast(node.get()); - if (inputNode) - inputNode->withMeanImage(); - } - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.getSupportedDescriptors); DEBUG_LOG("Get supported primitive descriptors for node: ", node->getName()); node->getSupportedDescriptors(); @@ -998,45 +868,76 @@ bool Graph::ProcessDynNodes() { return result; } -void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) { - if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready."; +void Graph::PushInputData(const std::string& name, const ov::SoPtr& input) { + if (!IsReady()) OPENVINO_THROW("Wrong state. Topology not ready."); + auto input_itr = inputNodesMap.find(name); + if (input_itr != inputNodesMap.end()) { + auto create_mem_desc = [&](const ov::SoPtr& tensor) -> CpuBlockedMemoryDesc { + auto element_type = tensor->get_element_type(); + auto shape = tensor->get_shape(); + if (shape.empty()) + shape = {tensor->get_size()}; + std::vector blk_order(shape.size()); + std::iota(blk_order.begin(), blk_order.end(), 0); + std::vector dim_offset(shape.size(), 0); + std::vector blk_strides; + auto byte_strides = element_type.bitwidth() >= 8 ? tensor->get_strides() : Strides{}; + if (byte_strides.empty()) { + blk_strides = ov::row_major_strides(shape); + } else { + // ROI tensor need figure out correct blk_strides + blk_strides.resize(byte_strides.size()); + std::transform(byte_strides.begin(), + byte_strides.end(), + blk_strides.begin(), + [&element_type](size_t byte_stride) { + OPENVINO_ASSERT(byte_stride % element_type.size() == 0, + "Limitation: Stride in bytes ", + byte_stride, + " should be divisible by size of element ", + element_type.size()); + return byte_stride / element_type.size(); + }); + } + InferenceEngine::TensorDesc tensorDesc( + InferenceEngine::details::convertPrecision(tensor->get_element_type()), + shape, + InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}); + return MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensorDesc); + }; - auto input = inputNodesMap.find(name); - if (input != inputNodesMap.end()) { - auto& inTensorDesc = in->getTensorDesc(); - auto node = input->second; + auto node = input_itr->second; auto childEdge = node->getChildEdgeAt(0); const auto& outDims = node->getOutputShapeAtPort(0); - const void *ext_data_ptr = in->cbuffer(); - void *inter_data_ptr = childEdge->getMemory().getData(); + const void* ext_data_ptr = input->data(); + void* inter_data_ptr = childEdge->getMemory().getData(); if (ext_data_ptr != inter_data_ptr) { - auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); - - Memory ext_mem(getEngine(), ext_tdesc, ext_data_ptr, false); - + auto ext_tensor_desc = create_mem_desc(input); + Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false); childEdge->getMemory().load(ext_mem, false); } // todo: make sure 'name' exists in this map... if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) { - if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) { - _normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast(inter_data_ptr), - inTensorDesc.getLayout()); + if (input->get_element_type() == ov::element::f32) { + _normalizePreprocMap[name].NormalizeImage(outDims, + reinterpret_cast(inter_data_ptr), + TensorDesc::getLayoutByDims(input->get_shape())); } else { - IE_THROW() << "Mean image of type " << inTensorDesc.getPrecision().name() << " is unsupported"; + OPENVINO_THROW("Mean image of type ", input->get_element_type().get_type_name(), " is unsupported"); } } } else { - IE_THROW() << "Input blob for infer '" << name << "' doesn't correspond to input in network"; + OPENVINO_THROW("Input blob for infer '", name, "' doesn't correspond to input in network"); } } // suppose always being shared infer_request intel_cpu::Tensor to Graph if isDynamic. -void Graph::PullOutputData(BlobMap &out) { +void Graph::PullOutputData(std::unordered_map>& output) { if (!IsReady()) - IE_THROW() << "Wrong state. Topology not ready."; + OPENVINO_THROW("Wrong state. Topology not ready."); for (auto &outputMap : outputNodesMap) { auto name = outputMap.first; @@ -1044,16 +945,19 @@ void Graph::PullOutputData(BlobMap &out) { auto parentEdge = node->getParentEdgeAt(0); const auto& intr_blob = parentEdge->getMemory(); - const auto ext_blob_map = out.find(name); + const auto ext_blob_map = output.find(name); const auto ext_blob = ext_blob_map->second; - if (ext_blob_map == out.end()) { - IE_THROW(Unexpected) << "The CPU plugin graph doesn't contain output node with name: \"" << name << "\""; + if (ext_blob_map == output.end()) { + OPENVINO_THROW("The CPU plugin graph doesn't contain output node with name: ", name.c_str()); } - DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast(out[name]->buffer())); + InferenceEngine::TensorDesc expectedDesc( + InferenceEngine::details::convertPrecision(ext_blob->get_element_type()), + ext_blob->get_shape(), + InferenceEngine::TensorDesc::getLayoutByRank(ext_blob->get_shape().size())); + DEBUG_LOG(name, ", tensor data addr ", static_cast(output[name]->data())); const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc()); - auto &expectedDesc = ext_blob->getTensorDesc(); // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar @@ -1069,18 +973,22 @@ void Graph::PullOutputData(BlobMap &out) { } auto outDims = intr_blob.getStaticDims(); - if (out[name]->getTensorDesc().getDims() != outDims && !isScalarOutput) { + if (ext_blob->get_shape() != outDims && !isScalarOutput) { // WA: because input/output info initially contains non empty dims, order etc. // and setDims (called inside setShape) can't correct modify blocked desc for desc with blocked layout if (expectedDesc.getLayout() == InferenceEngine::Layout::BLOCKED) { expectedDesc = TensorDesc(expectedDesc.getPrecision(), expectedDesc.getLayout()); } - DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast(out[name]->buffer()), - " dims ", PartialShape(out[name]->getTensorDesc().getDims()), " -> ", PartialShape(outDims), + DEBUG_LOG(name, ", tensor data addr ", static_cast(output[name]->data()), + " dims ", PartialShape(output[name]->get_shape()), " -> ", PartialShape(outDims), ", intr ptr ", intr_blob.getData(), " , parentedge's memory object ", parentEdge->getMemoryPtr().get()); - out[name]->setShape(outDims); - DEBUG_LOG(name, ", blob ", out[name], ", addr ", static_cast(out[name]->buffer()), - " dims ", PartialShape(out[name]->getTensorDesc().getDims()), ", intr ptr ", intr_blob.getData()); + ext_blob->set_shape(outDims); + DEBUG_LOG(name, ", tensor data addr ", static_cast(output[name]->data()), + " dims ", PartialShape(output[name]->get_shape()), ", intr ptr ", intr_blob.getData()); + expectedDesc = + InferenceEngine::TensorDesc(InferenceEngine::details::convertPrecision(ext_blob->get_element_type()), + ext_blob->get_shape(), + InferenceEngine::TensorDesc::getLayoutByRank(ext_blob->get_shape().size())); } // check for empty output blob @@ -1090,14 +998,15 @@ void Graph::PullOutputData(BlobMap &out) { auto srcPrec = actualDesc.getPrecision(); auto dstPrec = expectedDesc.getPrecision(); - - if (!getConfig().isLegacyApi && srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.getSize()) - IE_THROW() << "Output blob byte size is not equal network output byte size (" << ext_blob->byteSize() - << "!=" << intr_blob.getSize() << ")."; - - void *ext_blob_ptr = ext_blob->buffer(); + if (!getConfig().isLegacyApi && srcPrec == dstPrec && ext_blob->get_byte_size() != intr_blob.getSize()) + OPENVINO_THROW("Output blob byte size is not equal network output byte size (", + ext_blob->get_byte_size(), + "!=", + intr_blob.getSize(), + ")."); + + void *ext_blob_ptr = ext_blob->data(); void *intr_blob_ptr = intr_blob.getData(); - DEBUG_LOG(name, " @ ", intr_blob_ptr, " -> ", ext_blob_ptr, " zero-copy: ", intr_blob_ptr == ext_blob_ptr, " graph ", this, "\r\n"); // That is the same memory. No need to copy @@ -1107,19 +1016,19 @@ void Graph::PullOutputData(BlobMap &out) { // User can initialize output via SetOutput API using tensorDesc with ANY layout. // For these cases we create planar memory descriptor. auto outBlobDesc = expectedDesc.getLayout() == InferenceEngine::Layout::ANY - ? DnnlBlockedMemoryDesc(expectedDesc.getPrecision(), Shape(expectedDesc.getDims())) - : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); + ? DnnlBlockedMemoryDesc(expectedDesc.getPrecision(), Shape(expectedDesc.getDims())) + : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); Memory outBloMem(getEngine(), outBlobDesc, ext_blob_ptr, false); outBloMem.load(intr_blob, false); } else { size_t size_to_copy = intr_blob.getDescWithType()->getPaddedElementsCount(); - + DEBUG_LOG("pull_output: convert ", srcPrec, " to ", dstPrec); cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy); } } } -void Graph::InferStatic(InferRequestBase* request) { +void Graph::InferStatic(SyncInferRequest* request) { dnnl::stream stream(getEngine()); for (const auto& node : executableGraphNodes) { @@ -1127,7 +1036,7 @@ void Graph::InferStatic(InferRequestBase* request) { PERF(node, getConfig().collectPerfCounters); if (request) - request->ThrowIfCanceled(); + request->throw_if_canceled(); ExecuteNode(node, stream); } } @@ -1327,7 +1236,7 @@ class UpdateNodes : public UpdateNodesBase { } // namespace -void Graph::InferDynamic(InferRequestBase* request) { +void Graph::InferDynamic(SyncInferRequest* request) { dnnl::stream stream(getEngine()); std::set syncIndsWorkSet; @@ -1355,7 +1264,7 @@ void Graph::InferDynamic(InferRequestBase* request) { PERF(node, getConfig().collectPerfCounters); if (request) - request->ThrowIfCanceled(); + request->throw_if_canceled(); ExecuteNode(node, stream); } } @@ -1373,9 +1282,9 @@ inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) } } -void Graph::Infer(InferRequestBase* request) { +void Graph::Infer(SyncInferRequest* request) { if (!IsReady()) { - IE_THROW() << "Wrong state of the ov::intel_cpu::Graph. Topology is not ready."; + OPENVINO_THROW("Wrong state of the ov::intel_cpu::Graph. Topology is not ready."); } if (Status::ReadyDynamic == status) { @@ -1383,7 +1292,7 @@ void Graph::Infer(InferRequestBase* request) { } else if (Status::ReadyStatic == status) { InferStatic(request); } else { - IE_THROW() << "Unknown ov::intel_cpu::Graph state: " << static_cast(status); + OPENVINO_THROW("Unknown ov::intel_cpu::Graph state: " , static_cast(status)); } if (infer_count != -1) infer_count++; @@ -1476,30 +1385,27 @@ void Graph::SortTopologically() { } } -void Graph::GetPerfData(std::map &perfMap) const { - unsigned i = 0; - std::function &, const NodePtr&)> - getPerfMapFor = [&](std::map &perfMap, const NodePtr& node) { - InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()]; - pc.execution_index = i++; - // TODO: Why time counter is signed? - pc.cpu_uSec = pc.realTime_uSec = (long long) node->PerfCounter().avg(); - pc.status = pc.cpu_uSec > 0 ? InferenceEngine::InferenceEngineProfileInfo::EXECUTED - : InferenceEngine::InferenceEngineProfileInfo::NOT_RUN; - std::string pdType = node->getPrimitiveDescriptorType(); - size_t typeLen = sizeof(pc.exec_type) / sizeof(pc.exec_type[0]); - pdType.copy(pc.exec_type, typeLen, 0); - size_t layerTypeLen = sizeof(pc.layer_type) / sizeof(pc.layer_type[0]); - node->typeStr.copy(pc.layer_type, layerTypeLen, 0); - - for (auto& fusedNode : node->fusedWith) { - getPerfMapFor(perfMap, fusedNode); - } +void Graph::GetPerfData(std::vector& perfMap) const { + std::function&, const NodePtr&)> getPerfMapFor = + [&](std::vector& perfMap, const NodePtr& node) { + ov::ProfilingInfo pc; + pc.node_name = node->getName(); + // pc.execution_index = i++; + uint64_t avg_time = node->PerfCounter().avg(); + pc.cpu_time = pc.real_time = std::chrono::microseconds(avg_time); + pc.status = avg_time > 0 ? ov::ProfilingInfo::Status::EXECUTED : ov::ProfilingInfo::Status::NOT_RUN; + pc.exec_type = node->getPrimitiveDescriptorType(); + pc.node_type = node->typeStr; + perfMap.emplace_back(pc); + + for (auto& fusedNode : node->fusedWith) { + getPerfMapFor(perfMap, fusedNode); + } - for (auto& mergedWith : node->mergedWith) { - getPerfMapFor(perfMap, mergedWith); - } - }; + for (auto& mergedWith : node->mergedWith) { + getPerfMapFor(perfMap, mergedWith); + } + }; for (size_t i = 0; i < graphNodes.size(); i++) { if (graphNodes[i]->isConstant()) @@ -1639,7 +1545,7 @@ NodePtr Graph::InsertReorder(EdgePtr edge, std::string layerName, const MemoryDe NodePtr newReorder(new node::Reorder(layerName, context)); auto *reorderPtr = dynamic_cast(newReorder.get()); if (reorderPtr == nullptr) { - IE_THROW() << "Graph::InsertReorder: Cannot cast to Reorder"; + OPENVINO_THROW("Graph::InsertReorder: Cannot cast to Reorder"); } reorderPtr->setDescs(inDesc, outDesc); reorderPtr->setOptimized(isOptimized); @@ -1665,10 +1571,13 @@ bool Graph::InsertNode(EdgePtr edge, NodePtr node, bool initNode) { auto oIndex = edge->getOutputNum(); auto iIndex = edge->getInputNum(); if (iIndex < 0 || oIndex < 0) - IE_THROW() << "Cannot insert node '" << node->getName() << "' between nodes: " - << edge->getParent()->getName() << " and " - << edge->getChild()->getName() << "."; - + OPENVINO_THROW("Cannot insert node '", + node->getName(), + "' between nodes: ", + edge->getParent()->getName(), + " and ", + edge->getChild()->getName(), + "."); edge->drop(); return InsertNode(edge->getParent(), edge->getChild(), node, iIndex, oIndex, initNode); @@ -1837,7 +1746,7 @@ void Graph::resolveInPlaceDirection(const NodePtr& node) const { } else if (inPlaceOutPort < 0) { return InplaceDirectionType::DOWN; } else { - IE_THROW() << "Non trivial inPlace memory dependency has been detected"; + OPENVINO_THROW("Non trivial inPlace memory dependency has been detected"); } } // the requested port has a negative inPlace tag, let's check whether it is referenced from the output @@ -1856,7 +1765,7 @@ void Graph::resolveInPlaceDirection(const NodePtr& node) const { } else if (inPlaceInpPort < 0) { return InplaceDirectionType::UP; } else { - IE_THROW() << "Non trivial inPlace memory dependency has been detected"; + OPENVINO_THROW("Non trivial inPlace memory dependency has been detected"); } } // the requested port has a negative inPlace tag, let's check whether it is referenced from the input @@ -1935,7 +1844,7 @@ void Graph::resolveInPlaceDirection(const NodePtr& node) const { config.outConfs[inPlaceInpPort].inPlace(-1); node->initDescriptor(config); } else { - IE_THROW() << "A node without an inPlace memory cyclic dependency has not been found"; + OPENVINO_THROW("A node without an inPlace memory cyclic dependency has not been found"); } } } diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index b94db195c100b1..dc566920628dd8 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -4,28 +4,29 @@ #pragma once -#include "cpp/ie_cnn_network.h" +#include "cache/multi_cache.h" #include "config.h" #include "cpu_memory.h" -#include "normalize_preprocess.h" -#include "node.h" -#include "edge.h" -#include "cache/multi_cache.h" #include "dnnl_scratch_pad.h" +#include "edge.h" #include "graph_context.h" +#include "node.h" +#include "normalize_preprocess.h" +#include "openvino/runtime/make_tensor.hpp" +#include "openvino/runtime/profiling_info.hpp" + +#include #include +#include #include #include -#include -#include #include "proxy_mem_mgr.h" namespace ov { namespace intel_cpu { -class InferRequestBase; -class InferRequest; +class SyncInferRequest; class Graph { public: @@ -60,10 +61,10 @@ class Graph { return _normalizePreprocMap.find(name) != _normalizePreprocMap.end(); } - void PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in); - void PullOutputData(InferenceEngine::BlobMap &out); + void PushInputData(const std::string& name, const ov::SoPtr& input); + void PullOutputData(std::unordered_map>& output); - void Infer(InferRequestBase* request = nullptr); + void Infer(SyncInferRequest* request = nullptr); const std::vector& GetNodes() const { return graphNodes; @@ -92,14 +93,14 @@ class Graph { NodePtr getInputNodeByName(const std::string &name) { auto input = inputNodesMap.find(name); if (input == inputNodesMap.end()) - IE_THROW() << "CPU execution graph doesn't contain input node with name: " << name; + OPENVINO_THROW("CPU execution graph doesn't contain input node with name: ", name); return input->second; } NodePtr getOutputNodeByName(const std::string &name) { auto output = outputNodesMap.find(name); if (output == outputNodesMap.end()) - IE_THROW() << "CPU execution graph doesn't contain output node with name: " << name; + OPENVINO_THROW("CPU execution graph doesn't contain output node with name: ", name); return output->second; } @@ -115,7 +116,7 @@ class Graph { return context; } - void GetPerfData(std::map &perfMap) const; + void GetPerfData(std::vector &perfMap) const; void RemoveDroppedNodes(); void RemoveDroppedEdges(); @@ -182,7 +183,7 @@ class Graph { */ bool InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPort, int childPort, bool initNode = false); - std::shared_ptr dump() const; + std::shared_ptr dump() const; void ResetInferCount() { infer_count = 0; } @@ -225,7 +226,6 @@ class Graph { bool graphHasDynamicInput = false; - void Replicate(const InferenceEngine::CNNNetwork &network); void Replicate(const std::shared_ptr &subgraph); void InitGraph(); void InitNodes(); @@ -239,13 +239,11 @@ class Graph { void ExtractExecutableNodes(); void ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const; void CreatePrimitivesAndExecConstants() const; - void InferStatic(InferRequestBase* request); - void InferDynamic(InferRequestBase* request); + void InferStatic(SyncInferRequest* request); + void InferDynamic(SyncInferRequest* request); - friend class LegacyInferRequest; - friend class intel_cpu::InferRequest; - friend class intel_cpu::InferRequestBase; - friend std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph); + friend class intel_cpu::SyncInferRequest; + friend std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph); private: // TODO: change std::map to std::unordered_map @@ -268,5 +266,5 @@ class Graph { void resolveInPlaceDirection(const NodePtr& node) const; }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 749342edae152c..f8de75257a9d73 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -3,44 +3,63 @@ // #include "infer_request.h" + +#include "async_infer_request.h" +#include "compiled_model.h" +#include "debug.h" #include "dnnl_extension_utils.h" -#include -#include -#include -#include -#include "nodes/concat.h" -#include "nodes/split.h" -#include -#include -#include "exec_network.h" +#include "ie_common.h" +#include "ie_ngraph_utils.hpp" #include "itt.h" -#include "nodes/common/cpu_convert.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" #include "memory_state.h" -#include "nodes/memory.hpp" +#include "nodes/common/cpu_convert.h" #include "nodes/common/cpu_memcpy.h" -#include "async_infer_request.h" -#include -#include "utils/general_utils.h" -#include "utils/cpu_utils.hpp" -#include "memory_desc/dnnl_blocked_memory_desc.h" -#include -#include -#include "proxy_mem_mgr.h" +#include "nodes/concat.h" +#include "nodes/memory.hpp" +#include "nodes/split.h" +#include "openvino/core/shape.hpp" #include "openvino/runtime/make_tensor.hpp" -#include +#include "openvino/runtime/tensor.hpp" +#include "proxy_mem_mgr.h" +#include "transformations/utils/utils.hpp" +#include "utils/cpu_utils.hpp" +#include "utils/general_utils.h" namespace ov { namespace intel_cpu { +SyncInferRequest::SyncInferRequest(std::shared_ptr compiled_model) + : ov::ISyncInferRequest(compiled_model), + m_compiled_model(compiled_model) { + m_is_legacy_api = m_compiled_model->get_graph()._graph.getConfig().isLegacyApi; -void InferRequestBase::CreateInferRequest() { - auto id = (execNetwork->_numRequests)++; - profilingTask = openvino::itt::handle("INTEL_CPU_INFER_" + execNetwork->_name + "_" + std::to_string(id)); + for (const auto& in : get_inputs()) { + auto port_name = get_port_name(in, m_is_legacy_api); + m_input_ports_map[port_name] = in; + } + for (const auto& out : get_outputs()) { + auto port_name = get_port_name(out, m_is_legacy_api); + m_output_ports_map[port_name] = out; + } + create_infer_request(); +} - if (execNetwork->_graphs.size() == 0) - IE_THROW() << "No graph was found"; - graph = &(execNetwork->GetGraph()._graph); +void SyncInferRequest::create_infer_request() { + auto id = (m_compiled_model->m_numRequests)++; + m_profiling_task = openvino::itt::handle("INTEL_CPU_INFER_" + m_compiled_model->m_name + "_" + std::to_string(id)); - initBlobs(); + if (m_compiled_model->m_graphs.size() == 0) { + OPENVINO_THROW("No graph was found"); + } + graph = &(m_compiled_model->get_graph()._graph); + + // Alocate memory for each tensor if static shape + for (const auto& it : m_input_ports_map) { + init_tensor(it.first); + } + for (const auto& it : m_output_ports_map) { + init_tensor(it.first); + } // Save all MemoryLayer data tensors. Will use insight about mechanics // of MemoryLayer implementation. It uses output edge of MemoryLayer @@ -49,7 +68,7 @@ void InferRequestBase::CreateInferRequest() { if (node->getType() == Type::MemoryInput) { auto memoryNode = dynamic_cast(node.get()); if (!memoryNode) { - IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; + OPENVINO_THROW("Cannot cast ", node->getName(), " to MemoryInput"); } auto state_store = memoryNode->getStore(); auto state_name = memoryNode->getId(); @@ -59,55 +78,28 @@ void InferRequestBase::CreateInferRequest() { if (suffix_idx != std::string::npos) state_name = state_name.substr(0, suffix_idx); - memoryStates.emplace_back(new VariableState(state_name, state_store)); + m_memory_states.emplace_back(std::make_shared(state_name, state_store)); } } } -InferRequestBase::~InferRequestBase() { - --(execNetwork->_numRequests); -} - -void InferRequestBase::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) { - auto& tensorDesc = inputBlob->getTensorDesc(); - bool needConvert = inPrec != tensorDesc.getPrecision(); - - const void* srcData = inputBlob->cbuffer().as(); - if (srcData == nullptr) { - IE_THROW() << "Input blob has no allocated memory"; - } - - InferenceEngine::Blob::Ptr iconv; - if (needConvert) { - iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, tensorDesc.getDims(), tensorDesc.getLayout())); - iconv->allocate(); - if (inputBlob->size() != iconv->size()) - IE_THROW() << "Can't copy tensor: input and converted tensors have different number of elements: " << inputBlob->size() << " and " - << iconv->size(); - - void *dstData = iconv->buffer().as(); - if (dstData == nullptr) { - IE_THROW() << "Converted input blob has no allocated memory"; - } - cpu_convert(srcData, dstData, tensorDesc.getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size()); - } - - graph->PushInputData(inputName, needConvert ? iconv : inputBlob); +SyncInferRequest::~SyncInferRequest() { + --(m_compiled_model->m_numRequests); } -void InferRequestBase::PushStates() { - for (auto &node : graph->GetNodes()) { +void SyncInferRequest::push_states() { + for (auto& node : graph->GetNodes()) { if (node->getType() == Type::MemoryInput) { auto cur_node = dynamic_cast(node.get()); if (!cur_node) { - IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; + OPENVINO_THROW("Cannot cast ", node->getName(), " to MemoryInput"); } auto cur_id = cur_node->getId(); - for (const auto& state : memoryStates) { - if (state->GetName() == cur_id) { + for (const auto& state : m_memory_states) { + if (state->get_name() == cur_id) { auto cur_state_mem = cur_node->getStore(); - auto data_ptr = state->GetState()->cbuffer().as(); - auto data_size = state->GetState()->byteSize(); + auto data_ptr = state->get_state()->data(); + auto data_size = state->get_state()->get_byte_size(); auto cur_state_mem_buf = static_cast(cur_state_mem->getData()); cpu_memcpy(cur_state_mem_buf, data_ptr, data_size); @@ -117,19 +109,19 @@ void InferRequestBase::PushStates() { } } -void InferRequestBase::PullStates() { - for (auto &node : graph->GetNodes()) { +void SyncInferRequest::pull_states() { + for (auto& node : graph->GetNodes()) { if (node->getType() == Type::MemoryInput) { auto cur_node = dynamic_cast(node.get()); if (!cur_node) { - IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; + OPENVINO_THROW("Cannot cast ", node->getName(), " to MemoryInput"); } auto cur_id = cur_node->getId(); - for (const auto& state : memoryStates) { - if (state->GetName() == cur_id) { + for (const auto& state : m_memory_states) { + if (state->get_name() == cur_id) { auto cur_state_mem = cur_node->getStore(); - auto data_ptr = state->GetState()->cbuffer().as(); - auto data_size = state->GetState()->byteSize(); + auto data_ptr = state->get_state()->data(); + auto data_size = state->get_state()->get_byte_size(); auto cur_state_mem_buf = static_cast(cur_state_mem->getData()); cpu_memcpy(data_ptr, cur_state_mem_buf, data_size); @@ -139,51 +131,72 @@ void InferRequestBase::PullStates() { } } -void InferRequestBase::redefineMemoryForInputNodes() { +void SyncInferRequest::redefine_memory_for_input_nodes() { const auto cpuInputNodes = graph->GetInputNodesMap(); - - for (const auto &blob : _inputs) { - const auto inputNode = cpuInputNodes.find(blob.first); + for (const auto& port : get_inputs()) { + std::string name = get_port_name(port, m_is_legacy_api); + if (name.empty()) { + OPENVINO_THROW("compiled model doesn't contain this input port."); + } + const auto inputNode = cpuInputNodes.find(name); if (inputNode == cpuInputNodes.end()) - IE_THROW() << "CPU execution graph doesn't contain input node with name: " << blob.first; + OPENVINO_THROW("CPU execution graph doesn't contain input node with name: ", name.c_str()); if (inputNode->second->isDynamicNode()) { - inputNode->second->redefineOutputMemory({blob.second->getTensorDesc().getDims()}); + auto tensor = get_tensor(port); + inputNode->second->redefineOutputMemory({tensor->get_shape()}); } } } -void InferRequestBase::InferImpl() { +void SyncInferRequest::update_external_tensor_ptrs() { + // Update it due to batched_tensors case will update input tensor + for (auto input : get_inputs()) { + std::string input_name = get_port_name(input, m_is_legacy_api); + if (input_name.empty()) { + OPENVINO_THROW("Input tensor map contains not registered during IPlugin::compile_model tensor with name ", + input_name); + } + if (external_ptr.find(input_name) != external_ptr.end()) { + auto tensor = get_tensor(input); + external_ptr[input_name] = tensor; + } + } +} + +void SyncInferRequest::infer() { using namespace openvino::itt; - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, profilingTask); - auto graphLock = execNetwork->GetGraph(); + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, m_profiling_task); + auto graphLock = m_compiled_model->get_graph(); graph = &(graphLock._graph); - ThrowIfCanceled(); - convertBatchedInputBlobs(); + throw_if_canceled(); + convert_batched_tensors(); + if (m_batched_tensors.size() > 0) { + // batched_tensors will be updated for each infer, external_ptr should be update together + update_external_tensor_ptrs(); + } if (graph->hasDynamicInput()) { - redefineMemoryForInputNodes(); + redefine_memory_for_input_nodes(); } - execDataPreprocessing(_inputs); - - changeDefaultPtr(); + change_default_ptr(); - ThrowIfCanceled(); + throw_if_canceled(); - PushInputData(); + push_input_data(); - if (memoryStates.size() != 0) { - PushStates(); + if (m_memory_states.size() != 0) { + push_states(); } graph->Infer(this); - if (memoryStates.size() != 0) { - PullStates(); + if (m_memory_states.size() != 0) { + pull_states(); } - ThrowIfCanceled(); + throw_if_canceled(); // update output control blocks, if any, in order to refresh internal buffers if (Graph::Status::ReadyDynamic == graph->getStatus()) { @@ -192,49 +205,50 @@ void InferRequestBase::InferImpl() { } } - graph->PullOutputData(_outputs); + graph->PullOutputData(m_outputs); } -std::map InferRequestBase::GetPerformanceCounts() const { +std::vector SyncInferRequest::get_profiling_info() const { if (!graph || !graph->IsReady()) - IE_THROW() << "Graph is not ready!"; - std::map perfMap; + OPENVINO_THROW("Graph is not ready!"); + std::vector perfMap; graph->GetPerfData(perfMap); return perfMap; } -static inline void changeEdgePtr(const EdgePtr &edge, InferenceEngine::Blob::Ptr blob) { - auto size = blob->byteSize(); +static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr& tensor) { + auto size = tensor->get_byte_size(); auto& mem = edge->getMemory(); auto memMngr = mem.getMemoryMngr(); IE_ASSERT(memMngr); - memMngr->setExtBuff(blob->buffer(), size); + memMngr->setExtBuff(tensor->data(), size); } -void InferRequestBase::changeDefaultPtr() { +void SyncInferRequest::change_default_ptr() { const auto& inputNodesMap = graph->GetInputNodesMap(); const auto& outputNodesMap = graph->GetOutputNodesMap(); + std::unordered_set inputPtrs; - std::function changeInpPtr; + std::function& tensor)> changeInpPtr; if (Graph::Status::ReadyDynamic == graph->getStatus()) { - changeInpPtr = [&inputPtrs](const EdgePtr &edge, InferenceEngine::Blob::Ptr blob) { - changeEdgePtr(edge, blob); - inputPtrs.insert(blob->buffer()); + changeInpPtr = [&inputPtrs](const EdgePtr &edge, ov::SoPtr& tensor) { + change_edge_ptr(edge, tensor); + inputPtrs.insert(tensor->data()); }; } else { - changeInpPtr = [](const EdgePtr &edge, InferenceEngine::Blob::Ptr blob) { - changeEdgePtr(edge, blob); + changeInpPtr = [](const EdgePtr &edge, ov::SoPtr& tensor) { + change_edge_ptr(edge, tensor); }; } - for (auto& it : externalPtr) { + for (auto& it : external_ptr) { auto input = inputNodesMap.find(it.first); if (inputNodesMap.end() == input) { OPENVINO_ASSERT(outputNodesMap.count(it.first), "Cannot find input/output blob: ", it.first); continue; } NodePtr inputNodePtr = input->second; - if (inputNodePtr->getChildEdgeAt(0)->getMemory().getData() == static_cast(it.second->buffer())) + if (inputNodePtr->getChildEdgeAt(0)->getMemory().getData() == static_cast(it.second->data())) continue; auto& childEdges = inputNodePtr->getChildEdges(); // Perform checks that the user's memory will not be modified @@ -242,7 +256,7 @@ void InferRequestBase::changeDefaultPtr() { for (auto& childEdge : childEdges) { auto ce = childEdge.lock(); if (!ce) - IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge"; + OPENVINO_THROW("Node ", inputNodePtr->getName(), " contains empty child edge"); auto& child = ce->getChild(); @@ -272,22 +286,20 @@ void InferRequestBase::changeDefaultPtr() { for (auto& edge : childEdges) { auto e = edge.lock(); if (!e) - IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge"; - + OPENVINO_THROW("Node ", inputNodePtr->getName(), " contains empty child edge"); changeInpPtr(e, it.second); } } } - for (auto& it : externalPtr) { + for (auto& it : external_ptr) { const auto& name = it.first; auto output = outputNodesMap.find(name); if (outputNodesMap.end() == output) { continue; } auto parentEdge = output->second->getParentEdgeAt(0); - - if (parentEdge->getMemory().getData() == static_cast(it.second->buffer())) + if (parentEdge->getMemory().getData() == static_cast(it.second->data())) continue; bool canBeInPlace = true; @@ -306,7 +318,7 @@ void InferRequestBase::changeDefaultPtr() { for (auto& edge : parentEdges) { auto e = edge.lock(); if (!e) - IE_THROW() << "Node " << parent->getName() << " contains empty parent edge"; + OPENVINO_THROW("Node ", parent->getName(), " contains empty parent edge"); if (e->getMemory().getData() == defaultPtr) { parent = e->getParent(); @@ -315,7 +327,7 @@ void InferRequestBase::changeDefaultPtr() { } } while (previousParent != parent); if (canBeInPlace) - changeEdgePtr(parentEdge, it.second); + change_edge_ptr(parentEdge, it.second); } if (Graph::Status::ReadyDynamic == graph->getStatus()) { @@ -342,7 +354,7 @@ void InferRequestBase::changeDefaultPtr() { outputMemMngr->setMemMngr(memMngr); DEBUG_LOG("reset proxy ", outputMemMngr, ", actual ", controlBlock.currentMemMngr(), " graph ", graph, " inferrequest ", this); - DEBUG_LOG(name, ", blob ", controlBlock.blob(), ", tensor ", controlBlock.tensor()); + DEBUG_LOG(name, ", tensor ", controlBlock.tensor()); } else { outputMemMngr->reset(); // switch to the internal memory since memory sharing is no longer possible } @@ -350,381 +362,115 @@ void InferRequestBase::changeDefaultPtr() { } } -std::vector InferRequestBase::QueryState() { - return memoryStates; +std::vector> SyncInferRequest::query_state() const { + return m_memory_states; } -void InferRequestBase::SetAsyncRequest(AsyncInferRequest* asyncRequest) { - _asyncRequest = asyncRequest; +void SyncInferRequest::set_async_request(AsyncInferRequest* asyncRequest) { + m_asyncRequest = asyncRequest; } -void InferRequestBase::ThrowIfCanceled() const { - if (_asyncRequest != nullptr) { - _asyncRequest->ThrowIfCanceled(); +void SyncInferRequest::throw_if_canceled() const { + if (m_asyncRequest != nullptr) { + m_asyncRequest->throw_if_canceled(); } } -InferenceEngine::Precision -InferRequestBase::normToInputSupportedPrec(const std::pair& input) const { - const auto& inputTensorDesc = input.second->getTensorDesc(); - auto inPrec = inputTensorDesc.getPrecision(); - if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) { - inPrec = InferenceEngine::Precision::FP32; +static InferenceEngine::TensorDesc create_tensor_desc(const ov::SoPtr& tensor) { + auto element_type = tensor->get_element_type(); + auto shape = tensor->get_shape(); + std::vector blk_order(shape.size()); + std::iota(blk_order.begin(), blk_order.end(), 0); + std::vector dim_offset(shape.size(), 0); + std::vector blk_strides; + auto byte_strides = element_type.bitwidth() >= 8 ? tensor->get_strides() : Strides{}; + if (byte_strides.empty()) { + blk_strides = ov::row_major_strides(shape); } else { - inPrec = normalizeToSupportedPrecision(inPrec); - } - - if (inPrec == InferenceEngine::Precision::UNSPECIFIED) { - IE_THROW() << "Unsupported input precision " << inputTensorDesc.getPrecision(); - } - - return inPrec; -} - -/* ========================================== LegacyInferRequest ========================================== */ -LegacyInferRequest::LegacyInferRequest(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - std::shared_ptr execNetwork) - : InferRequestBase(networkInputs, networkOutputs, execNetwork) { - CreateInferRequest(); + blk_strides.resize(byte_strides.size()); + std::transform(byte_strides.begin(), + byte_strides.end(), + blk_strides.begin(), + [&element_type](size_t byte_stride) { + OPENVINO_ASSERT(byte_stride % element_type.size() == 0, + "Limitation: Stride in bytes ", + byte_stride, + " should be divisible by size of element ", + element_type.size()); + return byte_stride / element_type.size(); + }); + } + OPENVINO_SUPPRESS_DEPRECATED_START + return InferenceEngine::TensorDesc{InferenceEngine::details::convertPrecision(element_type), + shape, + InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}}; + OPENVINO_SUPPRESS_DEPRECATED_END } -void LegacyInferRequest::initBlobs() { - for (const auto& it : _networkInputs) { - LegacyInferRequest::GetBlob(it.first); - } - for (const auto& it : _networkOutputs) { - LegacyInferRequest::GetBlob(it.first); - } +ov::SoPtr SyncInferRequest::get_tensor(const ov::Output& in_port) const { + auto port = get_internal_port(in_port); + return ov::ISyncInferRequest::get_tensor(port); } -void LegacyInferRequest::changeDefaultPtr() { - // renew external pointers before infer - const auto &inMap = graph->inputNodesMap; - for (auto &it : inMap) { - const auto &name = it.first; - auto itr = externalPtr.find(name); - if (itr != externalPtr.end() && !(itr->second->buffer() == _inputs[name]->buffer())) { - itr->second = _inputs[name]; - } - } - const auto &outMap = graph->outputNodesMap; - for (auto &it : outMap) { - const auto &name = it.first; - auto itr = externalPtr.find(name); - if (itr != externalPtr.end() && !(itr->second->buffer() == _outputs[name]->buffer())) { - itr->second = _outputs[name]; - } - } - InferRequestBase::changeDefaultPtr(); -} - -void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) { - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "SetBlobLegacy"); - if (name.empty()) { - IE_THROW(NotFound) << "Failed to set blob with empty name"; - } - - if (!data) - IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'"; - const bool compoundBlobPassed = data->is(); - if (!compoundBlobPassed && data->buffer() == nullptr) - IE_THROW(NotAllocated) << "Input data was not allocated. Input name: \'" << name << "\'"; - if (data->size() == 0) { - IE_THROW() << "Input data is empty. Input name: \'" << name << "\'"; - } - - InferenceEngine::InputInfo::Ptr foundInput; - InferenceEngine::DataPtr foundOutput; - size_t dataSize = data->size(); - findInputAndOutputBlobByName(name, foundInput, foundOutput); - - if (foundInput) { - if (foundInput->getPrecision() != data->getTensorDesc().getPrecision()) { - IE_THROW(ParameterMismatch) << "Failed to set input blob with precision: " - << data->getTensorDesc().getPrecision() << ", if CNNNetwork input blob precision is: " << foundInput->getPrecision(); - } - - const bool preProcRequired = preProcessingRequired(foundInput, data); - if (compoundBlobPassed && !preProcRequired) { - IE_THROW(NotImplemented) - << "cannot set compound blob: supported only for input pre-processing"; - } - - if (preProcRequired) { - if (_preProcData.find(name) == _preProcData.end()) { - _preProcData.emplace(name, InferenceEngine::CreatePreprocDataHelper()); - } - _preProcData[name]->isApplicable(data, _inputs[name]); - // Stores the given blob as ROI blob. It will be used to fill in network input during - // pre-processing - _preProcData[name]->setRoiBlob(data); - } else { - size_t inputSize = foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR - ? InferenceEngine::details::product(foundInput->getTensorDesc().getDims()) - : 1; - if (dataSize != inputSize) { - IE_THROW() << "Input blob size is not equal network input size (" - << dataSize << "!=" << inputSize << ")."; - } - - if (foundInput->getTensorDesc().getDims() != data->getTensorDesc().getDims()) { - IE_THROW(ParameterMismatch) << "Failed to set input blob. Dimensions mismatch."; - } - - if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && - foundInput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) { - IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch."; - } - - auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory()); - if (data->getTensorDesc() == pBlobDesc && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = data; - } else if (externalPtr.find(name) != externalPtr.end()) { - externalPtr.erase(name); - } - _inputs[name] = data; - } - } - if (foundOutput) { - if (compoundBlobPassed) { - IE_THROW(NotImplemented) - << "cannot set compound blob: supported only for input pre-processing"; - } - if (foundOutput->getPrecision() != data->getTensorDesc().getPrecision()) { - IE_THROW(ParameterMismatch) << "Failed to set output blob with precision: " - << data->getTensorDesc().getPrecision() << ", if CNNNetwork output blob precision is: " << foundOutput->getPrecision(); - } - size_t outputSize = foundOutput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR - ? InferenceEngine::details::product(foundOutput->getDims()) - : 1; - if (dataSize != outputSize) { - IE_THROW() << "Output blob size is not equal network output size (" - << dataSize << "!=" << outputSize << ")."; - } - if (foundOutput->getTensorDesc().getDims() != data->getTensorDesc().getDims()) { - IE_THROW(ParameterMismatch) << "Failed to set output Blob. Dimensions mismatch."; - } - if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && foundOutput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && - foundOutput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) { - IE_THROW(ParameterMismatch) << "Failed to set output blob. Blocking descriptor mismatch."; - } - - auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory()); - if (data->getTensorDesc() == pBlobDesc) { - externalPtr[name] = data; - } else if (externalPtr.find(name) != externalPtr.end()) { - externalPtr.erase(name); - } - _outputs[name] = data; - } +std::vector> SyncInferRequest::get_tensors(const ov::Output& in_port) const { + auto port = get_internal_port(in_port); + return ov::ISyncInferRequest::get_tensors(port); } -InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) { - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "GetBlobLegacy"); - - if (!graph || !graph->IsReady()) - IE_THROW() << "Graph is not ready!"; - - InferenceEngine::Blob::Ptr data; - - const auto &inMap = graph->inputNodesMap; - auto input = inMap.find(name); - if (input != inMap.end()) { - // ROI blob is returned only if it was set previously. - auto it = _preProcData.find(name); - if (it != _preProcData.end()) { - data = it->second->getRoiBlob(); - return data; - } - - if (_inputs.find(name) == _inputs.end()) { - auto pBlob = MemoryDescUtils::interpretAsBlob(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory()); - if (!pBlob) { - IE_THROW() << "Can not interpret cpu plugin memory object as InferenceEngine::Blob. Input node name: " << name; - } - - InferenceEngine::TensorDesc desc = pBlob->getTensorDesc(); - auto itr = _networkInputs.find(name); - if (itr != _networkInputs.end()) { - const InferenceEngine::Layout &l = itr->second->getLayout(); - const InferenceEngine::Precision &p = itr->second->getPrecision(); - const InferenceEngine::SizeVector &dims = itr->second->getTensorDesc().getDims(); - desc = InferenceEngine::TensorDesc(p, dims, l); - } - - _inputs[name] = make_blob_with_precision(desc); - _inputs[name]->allocate(); - if (pBlob->getTensorDesc() == desc && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = _inputs[name]; - } - } - data = _inputs[name]; - checkBlob(data, name, true); - // check if preprocess required, but still wasn't set - auto preProcessedInput = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), - [&](const std::pair& pair) { - return pair.first == name; - }); - if (preProcessedInput != std::end(_networkInputs)) { - InferenceEngine::InputInfo::Ptr foundInput; - InferenceEngine::DataPtr foundOutput; - if (!findInputAndOutputBlobByName(name, foundInput, foundOutput)) { - IE_THROW() << "Blob with name: " << name << " absents in network inputs"; - } - if (preProcessingRequired(foundInput, data)) { - _preProcData.emplace(name, InferenceEngine::CreatePreprocDataHelper()); - _preProcData[name]->isApplicable(data, _inputs[name]); - _preProcData[name]->setRoiBlob(data); - } - } - } - - if (graph->hasOutputWithName(name)) { - if (_outputs.find(name) == _outputs.end()) { - auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory()); - if (!data) { - InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc(); - desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision())); - - // WA: need to avoid exception thrown when we compare blocking desc in SetBlob - // in situation if we push output blobs as inputs for next network (in Hetero plugin) - // it may be that output tensor desc will be different from real input tensor desc for next network - // because the optimal descriptor was chosen (e.g. inPlace case for Split node) - auto currBlockDesc = InferenceEngine::BlockingDesc(desc.getBlockingDesc().getBlockDims(), desc.getBlockingDesc().getOrder()); - desc = InferenceEngine::TensorDesc(desc.getPrecision(), desc.getDims(), currBlockDesc); - - data = make_blob_with_precision(desc); - data->allocate(); - } else { - const auto& expectedTensorDesc = pBlobDesc; - - if (expectedTensorDesc.getPrecision() != data->getTensorDesc().getPrecision()) { - IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different precision: " - << data->getTensorDesc().getPrecision() << " for input and " << expectedTensorDesc.getPrecision() - << " for output."; - } - - if (expectedTensorDesc.getDims() != data->getTensorDesc().getDims()) { - IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different shapes."; - } - - if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && expectedTensorDesc.getLayout() != InferenceEngine::Layout::ANY && - expectedTensorDesc.getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) { - IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name - << " but expect blobs with different blocking descriptors."; - } - } - - _outputs[name] = data; - if (!externalPtr.count(name) && data->getTensorDesc() == pBlobDesc) { - externalPtr[name] = data; - } - } - data = _outputs[name]; - checkBlob(data, name, false); - } - if (!data) { - IE_THROW() << "Cannot find blob with name: " << name; - } - return data; -} - -void LegacyInferRequest::PushInputData() { - for (auto input : _inputs) { - auto inputName = input.first; - if (!_networkInputs[inputName]) { - IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << inputName; - } - - // User can initialize input via setBlob API using tensorDesc with default (ANY) layout. - // Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input. - auto inputBlob = input.second; - if (inputBlob->getTensorDesc().getLayout() == InferenceEngine::ANY) { - inputBlob->getTensorDesc().setLayout(_networkInputs[inputName]->getLayout()); - } - - pushInput(inputName, inputBlob, normToInputSupportedPrec(input)); - } -} - -/* ========================================== InferRequest ========================================== */ -InferRequest::InferRequest(const std::vector>& inputs, - const std::vector>& outputs, - ExecNetwork::Ptr execNetwork) -: InferRequestBase(inputs, outputs, execNetwork) { - for (const std::shared_ptr& in : inputs) { - modelInputsMap[ov::op::util::get_ie_output_name(ngraph::Output(in))] = in; - } - for (const std::shared_ptr& out : outputs) { - modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out; - } - - CreateInferRequest(); -} - -void InferRequest::initBlobs() { - for (const auto& it : modelInputsMap) { - InferRequest::GetBlob(it.first); - } - for (const auto& it : modelOutputsMap) { - InferRequest::GetBlob(it.first); - } -} - -void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) { - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "SetBlob"); - if (name.empty()) { - IE_THROW(NotFound) << "Failed to set blob with empty name"; - } - - if (!data) - IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'"; - - bool isInput = false; - const auto inputNodeItr = modelInputsMap.find(name); - const auto outputNodeItr = modelOutputsMap.find(name); - - if (inputNodeItr != modelInputsMap.end()) { - if (!inputNodeItr->second) { - IE_THROW() << "Can't set blob with name: " << name << ", because has null pointer to input node"; - } - isInput = true; - } else if (outputNodeItr != modelOutputsMap.end()) { - if (!outputNodeItr->second) { - IE_THROW() << "Can't set blob with name: " << name << ", because has null pointer to output node"; - } - isInput = false; +const ov::Output& SyncInferRequest::get_internal_port(const ov::Output& port) const { + auto name = get_port_name(port, m_is_legacy_api); + bool is_input = ov::op::util::is_parameter(port.get_node()); + if (is_input) { + return m_input_ports_map.at(name); } else { - IE_THROW(NotFound) << "Can't set blob with name: " << name << ", because input/output with this name doesn't exist"; + return m_output_ports_map.at(name); } +} - const bool compoundBlobPassed = data->is(); - if (!compoundBlobPassed && data->buffer() == nullptr) - IE_THROW(NotAllocated) << "Input data was not allocated. Input name: \'" << name << "\'"; - - const auto &blobDesc = data->getTensorDesc(); - - if (isInput) { - const auto netInPrc = InferenceEngine::details::convertPrecision(inputNodeItr->second->get_output_element_type(0)); - if (netInPrc != blobDesc.getPrecision()) { - IE_THROW(ParameterMismatch) << "Failed to set input blob with precision: " - << blobDesc.getPrecision() << ", if CNNNetwork input blob precision is: " << netInPrc; - } - - const auto shape = inputNodeItr->second->get_output_partial_shape(0); +void SyncInferRequest::set_tensor(const ov::Output& in_port, const ov::SoPtr& in_tensor) { + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "set_tensor"); + if (!in_tensor) + OPENVINO_THROW("Failed to set empty tensor for port!"); + auto port = get_internal_port(in_port); + auto tensor = in_tensor; + + // WA: legacy api create blob with ANY layout will not set BlockingDesc, which will lead to tensor.get_shape() + // return empty shape but tensor.get_size() return correct value, and tensor.reshape() cannot update + // BlockingDesc, so to construct new tensor with original tensor's data, which is only for ov legacy api usage. + if (in_port.get_partial_shape().is_static() && in_tensor->get_size() > 0 && in_tensor->get_shape().size() == 0 && + in_tensor->get_size() == ov::shape_size(in_port.get_shape()) && in_port.get_shape().size() > 0) { + tensor = ov::make_tensor(in_tensor->get_element_type(), in_port.get_shape(), in_tensor->data()); + } + auto name = get_port_name(in_port, m_is_legacy_api); + auto tensor_desc = create_tensor_desc(tensor); + bool is_input = ov::op::util::is_parameter(port.get_node()); + if (is_input) { + const auto netInPrc = port.get_element_type(); + if (netInPrc != tensor->get_element_type()) { + IE_THROW(ParameterMismatch) << "Failed to set input tensor with precision: " << tensor->get_element_type() + << ", since the model input tensor precision is: " << netInPrc; + } + + const auto& shape = port.get_partial_shape(); const bool isDynamic = shape.is_dynamic(); - if (!shape.compatible(ov::PartialShape(data->getTensorDesc().getDims()))) { - IE_THROW() << "Can't set input blob with name: " << name - << ", because model input (shape=" << shape - << ") and blob (shape=" << vec2str(data->getTensorDesc().getDims()) << ") are incompatible"; - } - - if (!isDynamic && ngraph::shape_size(shape.to_shape()) != data->size()) { - IE_THROW() << "Can't set input blob with name: " << name << ", because model input size = " << ngraph::shape_size(shape.to_shape()) - << " and blob size = " << data->size() << " are different."; + if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) { + OPENVINO_THROW("Can't set the input tensor with name: ", + name, + ", because the model input (shape=", + shape, + ") and the tensor (shape=", + vec2str(tensor->get_shape()), + ") are incompatible"); + } + + if (!isDynamic && ov::shape_size(shape.to_shape()) != tensor->get_size()) { + OPENVINO_THROW("Can't set input tensor with name: ", + name, + ", because the model input size = ", + ov::shape_size(shape.to_shape()), + " and the tensor size = ", + tensor->get_size(), + " are different."); } MemoryDescPtr actualDesc = graph->getInputNodeByName(name)->getBaseMemDescAtOutputPort(0); @@ -732,210 +478,221 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob: // we must define desc for dynamic case // otherwise we got incorrect check on shape compatibility inside isCompatible // because lower and upper bound will be compared - actualDesc = actualDesc->cloneWithNewDims(blobDesc.getLayout() == InferenceEngine::Layout::SCALAR ? InferenceEngine::SizeVector{1} : - blobDesc.getDims()); + OPENVINO_SUPPRESS_DEPRECATED_START + actualDesc = actualDesc->cloneWithNewDims(tensor_desc.getLayout() == InferenceEngine::Layout::SCALAR + ? InferenceEngine::SizeVector{1} + : tensor_desc.getDims()); + OPENVINO_SUPPRESS_DEPRECATED_END } - if (actualDesc->isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(blobDesc)) && + if (actualDesc->isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensor_desc)) && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = data; - } else if (externalPtr.find(name) != externalPtr.end()) { - externalPtr.erase(name); + external_ptr[name] = tensor; + } else if (external_ptr.find(name) != external_ptr.end()) { + external_ptr.erase(name); } - _inputs[name] = data; - _batched_inputs.erase(name); } else { - if (compoundBlobPassed) { - IE_THROW(NotImplemented) << "Can't set compound blob: supported only for input pre-processing"; - } - const auto netOutPrc = InferenceEngine::details::convertPrecision(outputNodeItr->second->get_input_element_type(0)); - if (netOutPrc != blobDesc.getPrecision()) { - IE_THROW(ParameterMismatch) << "Failed to set input blob with precision: " - << blobDesc.getPrecision() << ", if CNNNetwork output blob precision is: " << netOutPrc; + const auto netOutPrc = port.get_element_type(); + if (netOutPrc != tensor->get_element_type()) { + IE_THROW(ParameterMismatch) << "Failed to set output tensor with precision: " << tensor->get_element_type() + << ", if model output tensor precision is: " << netOutPrc; } - const auto shape = outputNodeItr->second->get_input_partial_shape(0); + const auto& shape = port.get_partial_shape(); const bool isDynamic = shape.is_dynamic(); - if (!shape.compatible(ov::PartialShape(data->getTensorDesc().getDims()))) { - IE_THROW() << "Can't set output blob with name: " << name - << ", because model output (shape=" << shape - << ") and blob (shape=" << vec2str(data->getTensorDesc().getDims()) << ") are incompatible"; + if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) { + OPENVINO_THROW("Can't set the output tensor with name: ", + name, + ", because the model output tensor (shape=", + shape, + ") and the current tensor (shape=", + vec2str(tensor->get_shape()), + ") are incompatible"); } - if (!isDynamic && ngraph::shape_size(shape.to_shape()) != data->size()) { - IE_THROW() << "Can't set output blob with name: " << name << ", because model output size = " << ngraph::shape_size(shape.to_shape()) - << " and blob size = " << data->size() << " are different."; + if (!isDynamic && ov::shape_size(shape.to_shape()) != tensor->get_size()) { + OPENVINO_THROW("Can't set the output tensor with name: ", + name, + ", because the model output size = ", + ov::shape_size(shape.to_shape()), + " and the currernt tensor size = ", + tensor->get_size(), + " are different."); } - const auto &desc = graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); - if (!isDynamic && blobDesc == MemoryDescUtils::convertToTensorDesc(desc)) { - externalPtr[name] = data; - } else if (externalPtr.find(name) != externalPtr.end()) { - externalPtr.erase(name); + const auto& desc = graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); + if (!isDynamic && tensor_desc == MemoryDescUtils::convertToTensorDesc(desc)) { + external_ptr[name] = tensor; + } else if (external_ptr.find(name) != external_ptr.end()) { + external_ptr.erase(name); } - _outputs[name] = data; + + m_outputs[name] = tensor; outputControlBlocks.erase(name); // now the memory is under user's control } + ov::ISyncInferRequest::set_tensor(port, tensor); } -void InferRequest::SetBlobsImpl(const std::string& name, const InferenceEngine::BatchedBlob::Ptr& batched_blob) { - _batched_inputs[name] = batched_blob; +void SyncInferRequest::set_tensors_impl(const ov::Output port, const std::vector>& tensors) { + for (const auto& input : get_inputs()) { + if (input == port) { + m_batched_tensors[input.get_tensor_ptr()] = tensors; + return; + } + } + OPENVINO_THROW("Cannot find port to set_tensors!"); } -InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "GetBlob"); +void SyncInferRequest::init_tensor(const std::string& name) { + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "init_tensor"); if (!graph || !graph->IsReady()) - IE_THROW() << "Graph is not ready!"; + OPENVINO_THROW("Graph is not ready!"); - InferenceEngine::Blob::Ptr data; + OPENVINO_ASSERT(!name.empty(), "Can't prepare tensor for empty name! "); - const auto &inMap = graph->inputNodesMap; + ov::SoPtr tensor; + const auto& inMap = graph->inputNodesMap; auto input = inMap.find(name); if (input != inMap.end()) { - if (_inputs.find(name) == _inputs.end()) { - auto inputNode = modelInputsMap.find(name); - if (inputNode != modelInputsMap.end()) { - if (!inputNode->second) { - IE_THROW() << "Can't get blob with name: " << name << ", because has null pointer to input node"; - } - - const auto shape = inputNode->second->get_output_partial_shape(0); - const bool isDynamic = shape.is_dynamic(); - InferenceEngine::SizeVector dims; - if (isDynamic) { - dims = InferenceEngine::SizeVector(shape.rank().get_length(), 0); - } else { - dims = shape.to_shape(); - } - - InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(inputNode->second->get_output_element_type(0)), - dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size())); + auto input_port = m_input_ports_map.find(name); + OPENVINO_ASSERT(input_port != m_input_ports_map.end(), + "Tensor with name: ", + name, + " exists in CPU plugin graph, but absents in network inputs"); + auto& port = input_port->second; + tensor = ov::ISyncInferRequest::get_tensor(port); + + if (!tensor) { + const auto& shape = port.get_partial_shape(); + const bool isDynamic = shape.is_dynamic(); + ov::Shape tensor_shape; + if (isDynamic) { + tensor_shape = ov::Shape(shape.rank().get_length(), 0); + } else { + tensor_shape = shape.to_shape(); + } - _inputs[name] = make_blob_with_precision(desc); - _inputs[name]->allocate(); + tensor = ov::make_tensor(port.get_element_type(), tensor_shape); + ov::ISyncInferRequest::set_tensor(port, tensor); - if (!isDynamic && - desc == MemoryDescUtils::convertToTensorDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { - externalPtr[name] = _inputs[name]; - } - } else { - IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network inputs"; + auto desc = create_tensor_desc(tensor); + if (!isDynamic && + desc == MemoryDescUtils::convertToTensorDesc( + graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) { + external_ptr[name] = tensor; } } - data = _inputs[name]; } - const auto &outMap = graph->outputNodesMap; + const auto& outMap = graph->outputNodesMap; auto output = outMap.find(name); if (output != outMap.end()) { - if (_outputs.find(name) == _outputs.end()) { - auto outputNode = modelOutputsMap.find(name); - if (modelOutputsMap.find(name) != modelOutputsMap.end()) { - const auto& model_shape = outputNode->second->get_input_partial_shape(0); - const auto& graph_shape = output->second->getInputShapeAtPort(0); - - // WA, due to the transformations and constant folding, shape inference of the resulting model may - // have static shapes, while they are dynamic in the initial representation - const auto& shape = graph_shape.isDynamic() ? model_shape : - (model_shape.is_dynamic() ? graph_shape.toPartialShape() : model_shape); - - const bool isDynamic = shape.is_dynamic(); - - if (!data) { - InferenceEngine::SizeVector dims; - if (isDynamic) { - const auto model_prec = InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)); - const auto graph_prec = output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); - OutputControlBlock control_block{model_prec, Shape{shape}}; - - DEBUG_LOG(name, - ", blob ", control_block.blob(), - ", tensor ", control_block.tensor(), - ", memmngr ", control_block.tensor()->get_memory()->getMemoryMngr(), - "memory object ", control_block.tensor()->get_memory().get()); - - data = control_block.blob(); - if (model_prec == graph_prec) outputControlBlocks.emplace(std::make_pair(name, std::move(control_block))); - } else { - dims = shape.to_shape(); - - InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)), - dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size())); - data = make_blob_with_precision(desc); - data->allocate(); - } + if (m_outputs.find(name) == m_outputs.end()) { + auto output_port = m_output_ports_map.find(name); + OPENVINO_ASSERT(m_output_ports_map.find(name) != m_output_ports_map.end(), + "Tensor with name: ", + name, + " exists in CPU plugin graph, but absents in network outputs"); + auto port = output_port->second; + const auto& port_shape = port.get_partial_shape(); + const auto& graph_shape = output->second->getInputShapeAtPort(0); + + // WA, due to the transformations and constant folding, shape inference of the resulting model may + // have static shapes, while they are dynamic in the initial representation + const auto& shape = graph_shape.isDynamic() + ? port_shape + : (port_shape.is_dynamic() ? graph_shape.toPartialShape() : port_shape); + + const bool isDynamic = shape.is_dynamic(); + tensor = ov::ISyncInferRequest::get_tensor(port); + + if (!tensor) { + ov::Shape tensor_shape; + if (isDynamic) { + const auto model_prec = InferenceEngine::details::convertPrecision(port.get_element_type()); + const auto graph_prec = + output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); + OutputControlBlock control_block{model_prec, Shape{shape}}; + + DEBUG_LOG(name, + ", tensor ", + control_block.tensor(), + ", memmngr ", + control_block.tensor()->get_memory()->getMemoryMngr(), + "memory object ", + control_block.tensor()->get_memory().get()); + + tensor = control_block.tensor(); + if (model_prec == graph_prec) + outputControlBlocks.emplace(std::make_pair(name, std::move(control_block))); } else { - const auto& blobDims = data->getTensorDesc().getDims(); - // in static shape case is enough information that shapes are incompatible to throw exception - // but in dynamic shape case we also need to handle following corner case: - // on blob initialization stage we create empty blob with dimensions equal 0 - // so if we have blob with all zero dimension we mustn't throw exception - if (!shape.compatible(ov::PartialShape(blobDims)) && - (!isDynamic || static_cast(blobDims.size()) != shape.rank().get_length() || - std::any_of(blobDims.begin(), blobDims.end(), [](const size_t& dims) { - return dims != 0; - }))) { - IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name - << ", but expect blobs with different shapes. Input shape: " - << ov::PartialShape(blobDims) << ", output shape: " << shape; - } - - const auto netOutPrc = InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)); - if (netOutPrc != data->getTensorDesc().getPrecision()) { - IE_THROW(ParameterMismatch) - << "Network input and output use the same name: " << name << " but expect blobs with different precision: " - << data->getTensorDesc().getPrecision() << " for input and " << netOutPrc - << " for output."; - } + tensor_shape = shape.to_shape(); + tensor = ov::make_tensor(port.get_element_type(), tensor_shape); + } + ov::ISyncInferRequest::set_tensor(port, tensor); + } else { + const auto& blobDims = tensor->get_shape(); + const bool isDynamic = port_shape.is_dynamic(); + // Static shape case is enough information that shapes are incompatible to throw exception + // but in dynamic shape case we also need to handle following corner case: + // on tensor initialization stage we create empty tensor with dimensions equal 0 + // so if we have tensor with all zero dimension we mustn't throw exception + if (!port_shape.compatible(ov::PartialShape(blobDims)) && + (!isDynamic || static_cast(blobDims.size()) != port_shape.rank().get_length() || + std::any_of(blobDims.begin(), blobDims.end(), [](const size_t& dims) { + return dims != 0; + }))) { + IE_THROW(ParameterMismatch) + << "Network input and output use the same name: " << name + << ", but expect tensors with different shapes. Input shape: " << ov::PartialShape(blobDims) + << ", output shape: " << port_shape; } - _outputs[name] = data; - if (!isDynamic && !externalPtr.count(name) && - data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) { - externalPtr[name] = data; + const auto netOutPrc = port.get_element_type(); + if (netOutPrc != tensor->get_element_type()) { + IE_THROW(ParameterMismatch) + << "Network input and output use the same name: " << name + << " but expect tensor with different precision: " << tensor->get_element_type() + << " for input and " << netOutPrc << " for output."; + } + } + m_outputs[name] = tensor; + auto desc = create_tensor_desc(tensor); + if (!port_shape.is_dynamic() && !external_ptr.count(name) && + desc == MemoryDescUtils::convertToTensorDesc( + output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) { + external_ptr[name] = tensor; + } + // update tensors in case of multiple output ports with the same name + for (const auto& out : get_outputs()) { + auto port_name = get_port_name(out, m_is_legacy_api); + if ((name == port_name) && tensor && port != out) { + ov::ISyncInferRequest::set_tensor(out, tensor); } - } else { - IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network outputs"; } } - data = _outputs[name]; } - - if (!data) { - IE_THROW() << "Cannot find blob with name: " << name; + if (!tensor) { + OPENVINO_THROW("Cannot find tensor with name: ", name); } - - DEBUG_LOG(name, ", blob ", data, ", ", static_cast(data->buffer())); - return data; + return; } -void InferRequest::checkBlobs() { - for (auto const& input : _inputs) { - checkBlob(input.second, input.first, true); - } - - // won't check dynamic output blobs as they are not allocated. - for (auto const& output : _outputs) { - const auto out_node = findOutputByNodeName(output.first); - const auto isDynamic = out_node && out_node->get_output_partial_shape(0).is_dynamic(); - if (!isDynamic) checkBlob(output.second, output.first, false); - } -} - -void InferRequest::PushInputData() { - for (auto input : _inputs) { - auto inputName = input.first; - if (!modelInputsMap[inputName]) { - IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << inputName; +void SyncInferRequest::push_input_data() { + for (auto input : get_inputs()) { + std::string input_name = get_port_name(input, m_is_legacy_api); + if (input_name.empty()) { + OPENVINO_THROW("Input tensor map contains not registered during IPlugin::compile_model tensor with name ", + input_name); } - - pushInput(inputName, input.second, normToInputSupportedPrec(input)); + auto tensor = get_tensor(input); + graph->PushInputData(input_name, tensor); } } -InferRequestBase::OutputControlBlock::OutputControlBlock(const InferenceEngine::Precision& precision, const Shape& shape) { +SyncInferRequest::OutputControlBlock::OutputControlBlock(const InferenceEngine::Precision& precision, const Shape& shape) { dnnl::engine eng(dnnl::engine::kind::cpu, 0); m_buffers[m_buffIndx] = std::make_shared(); m_proxyMemMngr = std::make_shared(m_buffers[m_buffIndx]); @@ -949,8 +706,8 @@ InferRequestBase::OutputControlBlock::OutputControlBlock(const InferenceEngine:: auto memory = std::make_shared(eng, desc, m_proxyMemMngr); m_tensor = std::make_shared(memory); - m_blob = tensor_to_blob({m_tensor, nullptr}); } } // namespace intel_cpu } // namespace ov + diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index afb8521df9e201..c59bb1754c9c20 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -5,55 +5,48 @@ #pragma once #include "graph.h" -#include -#include -#include -#include #include "cpu_tensor.h" +#include "openvino/runtime/iinfer_request.hpp" +#include "openvino/runtime/isync_infer_request.hpp" namespace ov { namespace intel_cpu { -class ExecNetwork; +class CompiledModel; class AsyncInferRequest; -class InferRequestBase : public InferenceEngine::IInferRequestInternal { +class SyncInferRequest : public ov::ISyncInferRequest { public: - virtual ~InferRequestBase(); + SyncInferRequest(std::shared_ptr compiled_model); + virtual ~SyncInferRequest(); - void InferImpl() override; + void infer() override; - std::map GetPerformanceCounts() const override; + std::vector get_profiling_info() const override; - std::vector> QueryState() override; + std::vector> query_state() const override; + + void set_tensor(const ov::Output& port, const ov::SoPtr& tensor) override; + + void set_tensors_impl(const ov::Output port, const std::vector>& tensors) override; + + ov::SoPtr get_tensor(const ov::Output& port) const override; + std::vector> get_tensors(const ov::Output& _port) const override; /** * @brief Sets the pointer to asynchronous inference request that holds this request * @param[in] asyncRequest Pointer to asynchronous inference request */ - void SetAsyncRequest(AsyncInferRequest* asyncRequest); + void set_async_request(AsyncInferRequest* asyncRequest); /** - * @brief If `_asyncRequest` is initialized throw exception with `InferenceEngine::INFER_CANCELLED` status if inference request is canceled + * @brief If `m_asyncRequest` is initialized throw exception with `ov::Cancelled` status if inference request is + * canceled */ - void ThrowIfCanceled() const; -protected: - InferRequestBase(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - std::shared_ptr execNetwork_) - : IInferRequestInternal(networkInputs, networkOutputs), execNetwork(execNetwork_) {} + void throw_if_canceled() const; - InferRequestBase(const std::vector>& inputs, - const std::vector>& outputs, - std::shared_ptr execNetwork_) - : IInferRequestInternal(inputs, outputs), execNetwork(execNetwork_) {} - - void CreateInferRequest(); - InferenceEngine::Precision normToInputSupportedPrec(const std::pair& input) const; - void pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision dataType); - -protected: +private: class OutputControlBlock { public: using MemMngrPtr = std::shared_ptr; @@ -67,10 +60,6 @@ class InferRequestBase : public InferenceEngine::IInferRequestInternal { OutputControlBlock(OutputControlBlock&&) = default; OutputControlBlock& operator=(OutputControlBlock&&) = default; - InferenceEngine::Blob::Ptr blob() const { - return m_blob; - } - std::shared_ptr tensor() const { return m_tensor; } @@ -93,74 +82,43 @@ class InferRequestBase : public InferenceEngine::IInferRequestInternal { void update() { m_proxyMemMngr->setMemMngr(currentMemMngr()); - m_blob->allocate(); // WA: update handle } private: std::shared_ptr m_tensor = nullptr; - InferenceEngine::Blob::Ptr m_blob = nullptr; ProxyMemoryMngrPtr m_proxyMemMngr = nullptr; std::array m_buffers; int m_buffIndx = 0; }; - -protected: - virtual void initBlobs() = 0; - virtual void PushInputData() = 0; - - Graph* graph = nullptr; - std::unordered_map externalPtr; - std::unordered_map outputControlBlocks; -private: - void PushStates(); - void PullStates(); - void redefineMemoryForInputNodes(); - - std::shared_ptr execNetwork; - openvino::itt::handle_t profilingTask; - std::vector> memoryStates; - AsyncInferRequest* _asyncRequest = nullptr; - -protected: - virtual void changeDefaultPtr(); -}; - -class LegacyInferRequest : public InferRequestBase { -public: - LegacyInferRequest(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - std::shared_ptr execNetwork); + void create_infer_request(); - void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; - InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; + void init_tensor(const std::string& name); + void push_input_data(); -private: - void PushInputData() override; - void initBlobs() override; - void changeDefaultPtr() override; -}; + Graph* graph = nullptr; + std::unordered_map> external_ptr; -class InferRequest : public InferRequestBase { -public: - InferRequest(const std::vector>& inputs, - const std::vector>& outputs, - std::shared_ptr execNetwork); + void push_states(); + void pull_states(); + void redefine_memory_for_input_nodes(); - void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; - void SetBlobsImpl(const std::string& name, const InferenceEngine::BatchedBlob::Ptr& batched_blob) override; - InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; + void update_external_tensor_ptrs(); + const ov::Output& get_internal_port(const ov::Output& port) const; + bool m_is_legacy_api = false; - void checkBlobs() override; + std::shared_ptr m_compiled_model; + openvino::itt::handle_t m_profiling_task; + std::vector> m_memory_states; + AsyncInferRequest* m_asyncRequest = nullptr; -private: - void PushInputData() override; - void initBlobs() override; + std::unordered_map> m_input_ports_map; + std::unordered_map> m_output_ports_map; + std::unordered_map> m_outputs; - std::unordered_map> modelInputsMap; - std::unordered_map> modelOutputsMap; + void change_default_ptr(); }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/memory_state.cpp b/src/plugins/intel_cpu/src/memory_state.cpp index 7c97b99f21cb0f..b1836d950010d0 100644 --- a/src/plugins/intel_cpu/src/memory_state.cpp +++ b/src/plugins/intel_cpu/src/memory_state.cpp @@ -3,18 +3,17 @@ // #include "memory_state.h" + #include "dnnl_extension_utils.h" -#include "blob_factory.hpp" using namespace InferenceEngine; namespace ov { namespace intel_cpu { -void VariableState::Reset() { - std::memset(state->buffer(), 0, state->byteSize()); +void VariableState::reset() { + std::memset(m_state->data(), 0, m_state->get_byte_size()); } -} // namespace intel_cpu -} // namespace ov - +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/memory_state.h b/src/plugins/intel_cpu/src/memory_state.h index 286de6a5353b12..c1dd1bcb7354e6 100644 --- a/src/plugins/intel_cpu/src/memory_state.h +++ b/src/plugins/intel_cpu/src/memory_state.h @@ -4,28 +4,29 @@ #pragma once -#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp" -#include "blob_factory.hpp" +#include + #include "cpu_memory.h" -#include "nodes/common/cpu_memcpy.h" +#include "ie_ngraph_utils.hpp" #include "memory_desc/cpu_memory_desc_utils.h" - -#include +#include "nodes/common/cpu_memcpy.h" +#include "openvino/runtime/ivariable_state.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include "openvino/runtime/tensor.hpp" namespace ov { namespace intel_cpu { -class VariableState : public InferenceEngine::IVariableStateInternal { +class VariableState : public ov::IVariableState { public: - VariableState(std::string name, MemoryPtr storage) - : InferenceEngine::IVariableStateInternal{name} { - state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->getDesc())); - state->allocate(); - cpu_memcpy(state->buffer(), storage->getData(), storage->getSize()); + VariableState(std::string name, MemoryPtr storage) : ov::IVariableState{name} { + const auto& memDesc = MemoryDescUtils::convertToTensorDesc(storage->getDesc()); + m_state = ov::make_tensor(InferenceEngine::details::convertPrecision(memDesc.getPrecision()), memDesc.getDims()); + cpu_memcpy(m_state->data(), storage->getData(), storage->getSize()); } - void Reset() override; + void reset() override; }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index c36815ee048091..a80908451c0281 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -1247,7 +1247,7 @@ std::vector Node::getInputPrecisions() const { for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { - inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->getDataType()))); + inputPrecisions.emplace_back(parentEdge->getMemoryPtr()->getDesc().getPrecision()); } } return inputPrecisions; @@ -1258,7 +1258,7 @@ std::vector Node::getOutputPrecisions() const { for (size_t i = 0; i < getChildEdges().size(); i++) { auto childEdge = getChildEdgeAt(i); if (childEdge && childEdge->getStatus() == Edge::Status::Validated) { - outputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((childEdge->getMemoryPtr()->getDataType()))); + outputPrecisions.emplace_back(childEdge->getMemoryPtr()->getDesc().getPrecision()); } } return outputPrecisions; diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp index d8322c709e2288..11f24217f49765 100644 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp @@ -223,8 +223,15 @@ const std::tuple & Range::fit(const Precision & prec) { default: IE_THROW() << "Unsupported precision"; } - std::get<0>(_range) = static_cast(std::max(static_cast(std::get<0>(_range)), lbound)); - std::get<1>(_range) = static_cast(std::min(static_cast(std::get<1>(_range)), ubound)); + // If U is integral, its range always less than float, so not need update _range + // Else it will be overflow, for example static_cast double to int64_t: + // int64_t ubound = 9223372036854775807 + // double dd_ubound = static_cast(ubbound) + // static_cast(dd_ubound) will return -9223372036854775808 + if (!std::is_integral::value) { + std::get<0>(_range) = static_cast(std::max(static_cast(std::get<0>(_range)), lbound)); + std::get<1>(_range) = static_cast(std::min(static_cast(std::get<1>(_range)), ubound)); + } } else { int64_t lbound; uint64_t ubound; diff --git a/src/plugins/intel_cpu/src/nodes/convert.cpp b/src/plugins/intel_cpu/src/nodes/convert.cpp index 6e6e2c9f440c57..daeda2ccb5edd8 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/convert.cpp @@ -126,7 +126,19 @@ void Convert::initSupportedPrimitiveDescriptors() { config.outConfs.push_back(dataConfigOut); auto creators = BlockedDescCreator::getCommonCreators(); - auto range = BlockedDescCreator::makeFilteredRange(creators, insShape.getRank()); + + // As long as convert is placed right before the output, only planar layout makes sense since the output tensor + // is always in a planar layout (ngraph limitation), so there is no reason to convert in any other layout. + bool hasOutputChild = false; + for (auto& childEdge : getChildEdgesAtPort(0)) { + if (Type::Output == childEdge->getChild()->getType()) { + hasOutputChild = true; + break; + } + } + auto range = hasOutputChild + ? BlockedDescCreator::makeFilteredRange(creators, insShape.getRank(), {LayoutType::ncsp}) + : BlockedDescCreator::makeFilteredRange(creators, insShape.getRank()); for (auto itr = range.first; itr != range.second; ++itr) { config.inConfs[0].setMemDesc(std::make_shared(itr->second->createDesc(insPrecision, insShape))); diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index aae4f68bd14234..a9827a23372925 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -2180,6 +2180,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() { } else if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), prc) == supportedPrecisions.end()) { if (prc == Precision::U32 || prc == Precision::I64 || prc == Precision::U64) { return Precision(Precision::I32); + } else if (prc == Precision::FP64) { + return Precision(Precision::FP32); } else { IE_THROW() << "Eltwise node with name `" << getName() << "` doesn't support " << prc << " precision."; } diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 365bee75a4a42d..a7a54ba21b8f44 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -256,7 +256,7 @@ void Input::cloneBlobIfRequired() { Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); const auto prec = convertPrecision(constOp->get_element_type()); const size_t size = shape.getElementsCount(); - DnnlBlockedMemoryDesc memDesc(prec, shape); + CpuBlockedMemoryDesc memDesc(prec, shape); bool needFlushDenormalsToZero = true; if (context->getConfig().DAZOn) { @@ -408,10 +408,6 @@ Input::Input(MemoryDescPtr memDesc, const std::string& name, const std::string& extMemDesc = memDesc; } -void Input::withMeanImage() { - isMeanImage = true; -} - MemoryCPtr Input::getMemoryPtr() const { return memoryPtr; } @@ -470,9 +466,6 @@ void Input::initSupportedPdDefault() { if (getType() == Type::Input || getType() == Type::MemoryInput) { auto precision = getOriginalOutputPrecisionAtPort(0); - if (precision == Precision::U16 || isMeanImage) { - precision = Precision::FP32; - } outPortConfs.push_back({LayoutType::ncsp, precision}); if (!getParentEdges().empty()) { @@ -480,7 +473,6 @@ void Input::initSupportedPdDefault() { } } else if (getType() == Type::Output) { auto precision = getOriginalInputPrecisionAtPort(0); - if (precision == Precision::U16) precision = Precision::FP32; inPortConfs.push_back({LayoutType::ncsp, precision}); } diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index 80d7b42d3a1369..80c0602399f2b0 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -2073,7 +2073,7 @@ void TopK::createPrimitive() { layout = TopKLayoutType::topk_blocked; } - if (inputShapesDefined() && isExecutable()) { + if (!isDynamicNode() && isExecutable()) { if (needPrepareParams()) prepareParams(); updateLastInputDims(); diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index b785dd2e755c0b..ae37a2a1bf70b7 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -2,32 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ie_metric_helpers.hpp" // must be included first +#include "ie_metric_helpers.hpp" // must be included first -#include "openvino/runtime/properties.hpp" #include "plugin.h" -#include "transformations/transformation_pipeline.h" -#include "itt.h" -#include "extension_mngr.h" +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "extension.h" -#include "serialize.h" -#include "threading/ie_executor_manager.hpp" +#include "extension_mngr.h" -#include "ie_icore.hpp" +#include "ie_ngraph_utils.hpp" #include "ie_plugin_config.hpp" #include "ie_system_conf.h" +#include "itt.h" #include "openvino/runtime/threading/cpu_streams_info.hpp" -#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "openvino/runtime/intel_cpu/properties.hpp" - -#include -#include - -#include "performance_heuristics.hpp" #include "openvino/runtime/properties.hpp" -#include "weights_cache.hpp" +#include "openvino/runtime/threading/cpu_streams_info.hpp" +#include "openvino/runtime/threading/executor_manager.hpp" +#include "performance_heuristics.hpp" +#include "serialize.h" +#include "threading/ie_executor_manager.hpp" +#include "transformations/transformation_pipeline.h" +#include "transformations/utils/utils.hpp" #include "utils/denormals.hpp" +#include "weights_cache.hpp" #if defined(__linux__) # include @@ -36,15 +34,14 @@ #endif #include -#include + +using namespace ov::threading; #if defined(OV_CPU_WITH_ACL) #include "nodes/executors/acl/acl_ie_scheduler.hpp" #include "arm_compute/runtime/CPP/CPPScheduler.h" #endif -using namespace InferenceEngine; - #define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: " namespace ov { @@ -175,7 +172,7 @@ Engine::SchedulerGuard::~SchedulerGuard() { Engine::Engine() : deviceFullName(getDeviceFullName()), specialSetup(new CPUSpecialSetup) { - _pluginName = "CPU"; + set_device_name("CPU"); extensionManager->AddExtension(std::make_shared()); #if defined(OV_CPU_WITH_ACL) scheduler_guard = SchedulerGuard::instance(); @@ -183,26 +180,27 @@ Engine::Engine() : } Engine::~Engine() { - executorManager()->clear("CPU"); - executorManager()->clear("CPUStreamsExecutor"); - executorManager()->clear("CPUCallbackExecutor"); + executor_manager()->clear("CPU"); + executor_manager()->clear("CPUStreamsExecutor"); + executor_manager()->clear("CPUCallbackExecutor"); } -static bool streamsSet(const std::map& config) { - return config.count(PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) || +static bool streamsSet(const ov::AnyMap& config) { + return config.count(InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) || config.count(ov::num_streams.name()); } -void Engine::ApplyPerformanceHints(std::map &config, const std::shared_ptr& ngraphFunc) const { +void Engine::apply_performance_hints(ov::AnyMap& config, const std::shared_ptr& model) const { auto getNumStreamsLatency = [&]() { - return std::pair(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA)); + return std::pair(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), + ov::util::to_string(ov::streams::NUMA)); }; auto getNumStreamsThroughput = [&]() { const auto isa = dnnl::get_effective_cpu_isa(); float isaSpecificThreshold = 1.0f; switch (isa) { - case dnnl::cpu_isa::sse41 : + case dnnl::cpu_isa::sse41: isaSpecificThreshold = 0.5f; break; case dnnl::cpu_isa::avx2: @@ -220,38 +218,38 @@ void Engine::ApplyPerformanceHints(std::map &config, c isaSpecificThreshold = 1.0f; } // the more "capable" the CPU in general, the more streams we may want to keep to keep it utilized - const float memThresholdAssumeLimitedForISA = ov::MemBandwidthPressure::LIMITED/isaSpecificThreshold; + const float memThresholdAssumeLimitedForISA = ov::MemBandwidthPressure::LIMITED / isaSpecificThreshold; const float L2_cache_size = dnnl::utils::get_cache_size(2 /*level*/, true /*per core */); - ov::MemBandwidthPressure networkToleranceForLowCache = ov::MemBandwidthPressureTolerance( - ngraphFunc, - L2_cache_size, memThresholdAssumeLimitedForISA); - const auto default_streams = GetNumStreams(engConfig.streamExecutorConfig._threadBindingType, - IStreamsExecutor::Config::StreamMode::DEFAULT, + ov::MemBandwidthPressure networkToleranceForLowCache = + ov::MemBandwidthPressureTolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA); + const auto default_streams = get_streams_num(engConfig.streamExecutorConfig._threadBindingType, + ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT, engConfig.streamExecutorConfig._enable_hyper_thread); auto streams_info = default_streams; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) || (networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) { // all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams - streams_info = GetNumStreams(engConfig.streamExecutorConfig._threadBindingType, - IStreamsExecutor::Config::StreamMode::AGGRESSIVE, + streams_info = get_streams_num(engConfig.streamExecutorConfig._threadBindingType, + ov::threading::IStreamsExecutor::Config::StreamMode::AGGRESSIVE, engConfig.streamExecutorConfig._enable_hyper_thread); } // otherwise (no recognized layers) falling back to the default value } else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) { // network is below the ISA-specific threshold - streams_info = GetNumStreams(engConfig.streamExecutorConfig._threadBindingType, - IStreamsExecutor::Config::StreamMode::AGGRESSIVE, + streams_info = get_streams_num(engConfig.streamExecutorConfig._threadBindingType, + ov::threading::IStreamsExecutor::Config::StreamMode::AGGRESSIVE, engConfig.streamExecutorConfig._enable_hyper_thread); } else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) { // network is below general threshold - streams_info = GetNumStreams(engConfig.streamExecutorConfig._threadBindingType, - IStreamsExecutor::Config::StreamMode::LESSAGGRESSIVE, + streams_info = get_streams_num(engConfig.streamExecutorConfig._threadBindingType, + ov::threading::IStreamsExecutor::Config::StreamMode::LESSAGGRESSIVE, engConfig.streamExecutorConfig._enable_hyper_thread); streams_info.num_streams = std::max(default_streams.num_streams, streams_info.num_streams); } - auto num_requests = config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)); + + auto num_requests = config.find(ov::hint::num_requests.name()); if (num_requests != config.end()) { // arrived with config to the LoadNetwork (and thus higher pri) - auto val = PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second); + auto val = InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second.as()); if (val > 0) streams_info.num_streams = std::min(streams_info.num_streams, val); } else if (engConfig.perfHintsConfig.ovPerfHintNumRequests) { // set thru SetConfig to the plugin, 2nd priority @@ -261,22 +259,24 @@ void Engine::ApplyPerformanceHints(std::map &config, c return std::pair(std::to_string(streams_info.num_streams), streams_info); }; + OPENVINO_SUPPRESS_DEPRECATED_START auto getPerfHintName = [&]() { const bool streamsExplicitlySetForModel = streamsSet(config); - // checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig) - if (streamsExplicitlySetForModel || - streamsExplicitlySetForEngine) + // checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via + // SetConfig) + if (streamsExplicitlySetForModel || streamsExplicitlySetForEngine) return std::string(); - const auto& perf_hint = config.find(CONFIG_KEY(PERFORMANCE_HINT)); + const auto& perf_hint = config.find(ov::hint::performance_mode.name()); // the perf_hint may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig if (perf_hint == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty()) return std::string(); /* performance hints set for network has higher pririty than engine ones. - * This applies for all the configuration parameters */ - const auto perf_hint_name = (perf_hint != config.end()) ? - PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second) : - engConfig.perfHintsConfig.ovPerfHint; + * This applies for all the configuration parameters */ + const auto perf_hint_name = + (perf_hint != config.end()) + ? InferenceEngine::PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second.as()) + : engConfig.perfHintsConfig.ovPerfHint; return perf_hint_name; }; @@ -291,7 +291,7 @@ void Engine::ApplyPerformanceHints(std::map &config, c const auto tput_name = std::string(CONFIG_VALUE(THROUGHPUT)) + "_" + std::string(ov::num_streams.name()); hints_props.insert({latency_name, latency_hints.second}); hints_props.insert({tput_name, std::to_string(tput_hints.second.num_streams)}); - ngraphFunc->set_rt_info(hints_props, "intel_cpu_hints_config"); + model->set_rt_info(hints_props, "intel_cpu_hints_config"); const auto perf_hint_name = getPerfHintName(); if (perf_hint_name == CONFIG_VALUE(LATENCY)) { @@ -308,13 +308,14 @@ void Engine::ApplyPerformanceHints(std::map &config, c std::to_string(tput_hints.second.threads_per_stream_small); config[CONFIG_KEY_INTERNAL(SMALL_CORE_OFFSET)] = std::to_string(tput_hints.second.small_core_offset); } + OPENVINO_SUPPRESS_DEPRECATED_END } -void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr& ngraphFunc) { +void Engine::get_performance_streams(Config& config, const std::shared_ptr& model) const{ const auto perf_hint_name = config.perfHintsConfig.ovPerfHint; const int latency_streams = get_default_latency_streams(config.latencyThreadingMode); int streams; - + OPENVINO_SUPPRESS_DEPRECATED_START if (config.streamExecutorConfig._streams_changed) { streams = config.streamExecutorConfig._streams; } else if (perf_hint_name == CONFIG_VALUE(LATENCY)) { @@ -326,25 +327,26 @@ void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr& function, bool imported) { +void Engine::calculate_streams(Config& conf, const std::shared_ptr& model, bool imported) const{ // import config props from caching model if (imported && !is_cpu_map_available()) { - if (function->has_rt_info("intel_cpu_hints_config") && !conf.perfHintsConfig.ovPerfHint.empty()) { + if (model->has_rt_info("intel_cpu_hints_config") && !conf.perfHintsConfig.ovPerfHint.empty()) { const auto mode_name = conf.perfHintsConfig.ovPerfHint; if (mode_name == CONFIG_VALUE(LATENCY) || mode_name == CONFIG_VALUE(THROUGHPUT)) { - const auto& hints_config = function->get_rt_info("intel_cpu_hints_config"); + const auto& hints_config = model->get_rt_info("intel_cpu_hints_config"); const auto hints_param_name = mode_name + "_" + std::string(ov::num_streams.name()); const auto it = hints_config.find(hints_param_name); if (it != hints_config.end()) { conf.readProperties({{std::string(ov::num_streams.name()), it->second.as()}}); } else { - IE_THROW() << "Cache file doesn't contain precalculated number of streams for mode " << mode_name; + OPENVINO_THROW("Cache file doesn't contain precalculated number of streams for mode ", mode_name); } } } @@ -352,10 +354,10 @@ void Engine::CalculateStreams(Config& conf, const std::shared_ptrhas_rt_info("intel_cpu_hints_config")) { + if (imported && model->has_rt_info("intel_cpu_hints_config")) { // load model_prefer_threads from cache int cache_model_prefer; - const auto& hints_config = function->get_rt_info("intel_cpu_hints_config"); + const auto& hints_config = model->get_rt_info("intel_cpu_hints_config"); const auto it_model_prefer = hints_config.find(model_prefer_name); if (it_model_prefer != hints_config.end()) { try { @@ -367,27 +369,27 @@ void Engine::CalculateStreams(Config& conf, const std::shared_ptrset_rt_info(hints_props, "intel_cpu_hints_config"); + model->set_rt_info(hints_props, "intel_cpu_hints_config"); } } } -StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBindingType thread_binding_type, +StreamCfg Engine::get_streams_num(ov::threading::IStreamsExecutor::ThreadBindingType thread_binding_type, int stream_mode, const bool enable_hyper_thread) const { - const int sockets = static_cast(getAvailableNUMANodes().size()); + const int sockets = static_cast(get_available_numa_nodes().size()); const int num_cores = thread_binding_type == IStreamsExecutor::ThreadBindingType::HYBRID_AWARE ? parallel_get_max_threads() - : (sockets == 1 ? (enable_hyper_thread ? parallel_get_max_threads() : getNumberOfCPUCores()) - : getNumberOfCPUCores()); - const int num_cores_phy = getNumberOfCPUCores(); - const int num_big_cores_phy = getNumberOfCPUCores(true); + : (sockets == 1 ? (enable_hyper_thread ? parallel_get_max_threads() : get_number_of_cpu_cores()) + : get_number_of_cpu_cores()); + const int num_cores_phy = get_number_of_cpu_cores(); + const int num_big_cores_phy = get_number_of_cpu_cores(true); const int num_small_cores = num_cores_phy - num_big_cores_phy; const int num_big_cores = num_cores > num_cores_phy ? num_big_cores_phy * 2 : num_big_cores_phy; StreamCfg stream_cfg = {0}; @@ -450,7 +452,7 @@ StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBinding stream_cfg.num_streams = num_cores_phy / 2; } } else { - IE_THROW() << "Wrong stream mode to get num of streams: " << stream_mode; + OPENVINO_THROW("Wrong stream mode to get num of streams: ", stream_mode); } stream_cfg.num_streams = stream_cfg.num_streams > 0 ? stream_cfg.num_streams @@ -459,21 +461,21 @@ StreamCfg Engine::GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBinding return stream_cfg; } -static bool shouldEnableLPT(const std::map& modelConfig, const Config& engineConfig) { +static bool shouldEnableLPT(const ov::AnyMap& modelConfig, const Config& engineConfig) { const auto& enableLPT = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE); if (enableLPT == modelConfig.end()) // model config has higher priority return engineConfig.lpTransformsMode == Config::LPTransformsMode::On; - const auto& val = enableLPT->second; - if (val == PluginConfigParams::YES) + const auto& val = enableLPT->second.as(); + if (val == InferenceEngine::PluginConfigParams::YES) return true; - else if (val == PluginConfigParams::NO) + else if (val == InferenceEngine::PluginConfigParams::NO) return false; else - IE_THROW() << "Wrong value for property key LP_TRANSFORMS_MODE. Expected values: YES/NO"; + OPENVINO_THROW("Wrong value for property key LP_TRANSFORMS_MODE. Expected values: YES/NO"); } -static ov::element::Type getInferencePrecision(const std::map& modelConfig, +static ov::element::Type getInferencePrecision(const ov::AnyMap& modelConfig, const Config& engineConfig, Config::ModelType modelType) { Config tempConf = engineConfig; @@ -487,42 +489,43 @@ static Config::ModelType getModelType(const std::shared_ptr& model) Config::ModelType::CNN : Config::ModelType::Unknown; } -static Config::SnippetsMode getSnippetsMode(const std::map& modelConfig, const Config& engineConfig) { +static Config::SnippetsMode getSnippetsMode(const ov::AnyMap& modelConfig, const Config& engineConfig) { const auto& snippetsMode = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE); - if (snippetsMode == modelConfig.end()) // not set explicitly - return Config::SnippetsMode::Enable; // enable by default + if (snippetsMode == modelConfig.end()) // not set explicitly + return Config::SnippetsMode::Enable; // enable by default - const auto& val = snippetsMode->second; - if (val == PluginConfigInternalParams::IGNORE_CALLBACK) + const auto& val = snippetsMode->second.as(); + if (val == InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK) return Config::SnippetsMode::IgnoreCallback; - else if (val == PluginConfigInternalParams::DISABLE) + else if (val == InferenceEngine::PluginConfigInternalParams::DISABLE) return Config::SnippetsMode::Disable; - else if (val == PluginConfigInternalParams::ENABLE) + else if (val == InferenceEngine::PluginConfigInternalParams::ENABLE) return Config::SnippetsMode::Enable; else - IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK"; + OPENVINO_THROW("Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK"); } -InferenceEngine::IExecutableNetworkInternal::Ptr -Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map &orig_config) { - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl"); +std::shared_ptr +Engine::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const{ + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::compile_model"); CREATE_DEBUG_TIMER(debugLoadTimer); // verification of supported input - for (const auto &ii : network.getInputsInfo()) { - auto input_precision = ii.second->getPrecision(); - - using hash_t = std::hash::type>; - - static const std::unordered_set supported_precisions = { - Precision::U8, Precision::I8, - Precision::U16, Precision::I16, - Precision::U32, Precision::I32, - Precision::U64, Precision::I64, - Precision::BF16, Precision::FP16, - Precision::FP32, Precision::FP64, - Precision::BOOL - }; + for (const auto &ii : model->inputs()) { + auto input_precision = ii.get_element_type(); + static const std::set supported_precisions = {ov::element::Type_t::u8, + ov::element::Type_t::i8, + ov::element::Type_t::u16, + ov::element::Type_t::i16, + ov::element::Type_t::u32, + ov::element::Type_t::i32, + ov::element::Type_t::u64, + ov::element::Type_t::i64, + ov::element::Type_t::bf16, + ov::element::Type_t::f16, + ov::element::Type_t::f32, + ov::element::Type_t::f64, + ov::element::Type_t::boolean}; if (!supported_precisions.count(input_precision)) { IE_CPU_PLUGIN_THROW(NotImplemented) @@ -531,46 +534,61 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std } auto config = orig_config; - - CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network); + const std::shared_ptr cloned_model = model->clone(); const bool enableLPT = shouldEnableLPT(config, engConfig); - auto nGraphFunc = clonedNetwork.getFunction(); - Config::ModelType modelType = getModelType(nGraphFunc); + Config::ModelType modelType = getModelType(model); ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig, modelType); - const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig); - - DEBUG_LOG(PrintableModel(*nGraphFunc, "org_")); + const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig); + DEBUG_LOG(PrintableModel(*cloned_model, "org_")); // update the props after the perf mode translated to configs // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not? Config conf = engConfig; - Transformations transformations(nGraphFunc, enableLPT, inferencePrecision, isLegacyAPI(), snippetsMode, conf); + Transformations transformations(cloned_model, enableLPT, inferencePrecision, is_legacy_api(), snippetsMode, conf); transformations.UpToLpt(); if (!is_cpu_map_available()) { - ApplyPerformanceHints(config, nGraphFunc); + apply_performance_hints(config, cloned_model); } conf.readProperties(config, modelType); - CalculateStreams(conf, nGraphFunc); + calculate_streams(conf, cloned_model); transformations.PostLpt(); transformations.Snippets(); // need to check that all outputs have static shapes // checking that all inputs have static shapes is performed in the common part - if (isLegacyAPI()) { - for (const auto& res : nGraphFunc->get_results()) { + if (is_legacy_api()) { + for (const auto& res : cloned_model->get_results()) { if (res->get_input_partial_shape(0).is_dynamic()) { - IE_THROW() << "CPU plug-in can't load a model with dynamic output shapes via legacy API."; + OPENVINO_THROW("CPU plug-in can't load a model with dynamic output shapes via legacy API."); } } } transformations.CpuSpecificOpSet(); - - DEBUG_LOG(PrintableModel(*nGraphFunc, "cpu_")); + DEBUG_LOG(PrintableModel(*cloned_model, "cpu_")); + + if ((cloned_model->inputs().size() != model->inputs().size()) || + (cloned_model->outputs().size() != model->outputs().size())) { + OPENVINO_THROW("Input/output ports count mismatch between the original model and after the transformation! " + "Original model inputs count: ", + model->inputs().size(), + " after the transformations ", + cloned_model->inputs().size(), + ". Original model outputs count:", + model->inputs().size(), + " after the transformations ", + cloned_model->outputs().size()); + } + // Make output ports have the same tensor names with original model + for (size_t idx = 0; idx < cloned_model->outputs().size(); idx++) { + auto new_result = cloned_model->output(idx); + auto orig_result = model->output(idx); + new_result.get_tensor().set_names(orig_result.get_tensor().get_names()); + } // SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE static Xbyak::util::Cpu cpu; @@ -583,52 +601,53 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std denormals_as_zero(false); } } - - return std::make_shared(clonedNetwork, conf, extensionManager, shared_from_this()); + return std::make_shared(cloned_model, shared_from_this(), conf, extensionManager); } -void Engine::SetConfig(const std::map &config) { +void Engine::set_property(const ov::AnyMap &config) { // @todo after Legacy configuration is dropped, use some wrapper class to keep both the property and "ifSetExplicitly" flag streamsExplicitlySetForEngine = streamsSet(config); engConfig.readProperties(config); } -bool Engine::isLegacyAPI() const { - return !IsNewAPI(); +bool Engine::is_legacy_api() const { + return !get_core()->is_new_api(); } -Parameter Engine::GetConfigLegacy(const std::string& name, const std::map& options) const { - Parameter result; +ov::Any Engine::get_property_legacy(const std::string& name, const ov::AnyMap& options) const { + ov::Any result; auto option = engConfig._config.find(name); if (option != engConfig._config.end()) { result = option->second; } else { - IE_CPU_PLUGIN_THROW() << ". Unsupported config parameter: " << name; + return get_metric_legacy(name, options); } return result; } -Parameter Engine::GetConfig(const std::string& name, const std::map& options) const { - if (isLegacyAPI()) - return GetConfigLegacy(name, options); +ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options) const { + if (is_legacy_api()) + return get_property_legacy(name, options); if (name == ov::optimal_number_of_infer_requests) { const auto streams = engConfig.streamExecutorConfig._streams; - return decltype(ov::optimal_number_of_infer_requests)::value_type(streams); // ov::optimal_number_of_infer_requests has no negative values + return decltype(ov::optimal_number_of_infer_requests)::value_type( + streams); // ov::optimal_number_of_infer_requests has no negative values } else if (name == ov::num_streams) { const auto streams = engConfig.streamExecutorConfig._streams; - return decltype(ov::num_streams)::value_type(streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) + return decltype(ov::num_streams)::value_type( + streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2) } else if (name == ov::affinity) { const auto affinity = engConfig.streamExecutorConfig._threadBindingType; switch (affinity) { - case InferenceEngine::IStreamsExecutor::ThreadBindingType::NONE: + case IStreamsExecutor::ThreadBindingType::NONE: return ov::Affinity::NONE; - case InferenceEngine::IStreamsExecutor::ThreadBindingType::CORES: + case IStreamsExecutor::ThreadBindingType::CORES: return ov::Affinity::CORE; - case InferenceEngine::IStreamsExecutor::ThreadBindingType::NUMA: + case IStreamsExecutor::ThreadBindingType::NUMA: return ov::Affinity::NUMA; - case InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: + case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: return ov::Affinity::HYBRID_AWARE; } return ov::Affinity::NONE; @@ -660,12 +679,11 @@ Parameter Engine::GetConfig(const std::string& name, const std::map& options) const { +ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& options) const { + OPENVINO_SUPPRESS_DEPRECATED_START if (name == METRIC_KEY(SUPPORTED_METRICS)) { std::vector metrics = { METRIC_KEY(AVAILABLE_DEVICES), @@ -717,11 +735,12 @@ Parameter Engine::GetMetricLegacy(const std::string& name, const std::map& options) const { - if (isLegacyAPI()) - return GetMetricLegacy(name, options); +ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& options) const { + if (is_legacy_api()) + return get_metric_legacy(name, options); auto RO_property = [](const std::string& propertyName) { return ov::PropertyName(propertyName, ov::PropertyMutability::RO); @@ -798,19 +817,14 @@ Parameter Engine::GetMetric(const std::string& name, const std::mapAddExtension(extension); + return get_metric_legacy(name, options); } -QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map& config) const { +ov::SupportedOpsMap Engine::query_model(const std::shared_ptr& model, const ov::AnyMap& config) const { WeightsSharing::Ptr fake_w_cache; - auto model = network.getFunction(); if (model == nullptr) { - IE_THROW() << "Only ngraph-based models are supported!"; + OPENVINO_THROW("Only ngraph-based models are supported!"); } Config conf = engConfig; @@ -818,65 +832,68 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma conf.readProperties(config, modelType); const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE); - const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/ - || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */; + const bool enableLPT = + (lptProp != config.end() && + lptProp->second.as() == InferenceEngine::PluginConfigParams::YES) /* enabled in the orig_config*/ + || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */; const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf); auto context = - std::make_shared(conf, extensionManager, fake_w_cache, false); - - auto supported = GetSupportedNodes(model, - [&](std::shared_ptr& model) { - Transformations transformation(model, enableLPT, conf.inferencePrecision, isLegacyAPI(), snippetsMode, engConfig); - transformation.UpToLpt(); - transformation.PostLpt(); - transformation.Snippets(); - transformation.CpuSpecificOpSet(); - }, - [&](const std::shared_ptr& op) { - std::unique_ptr ptr; - try { - ptr.reset(Node::factory().create(op, context)); - } catch (const InferenceEngine::Exception&) { - return false; - } - return true; - }); - - QueryNetworkResult res; + std::make_shared(conf, nullptr, fake_w_cache, false); + + auto supported = ov::get_supported_nodes( + model, + [&](std::shared_ptr& model) { + Transformations transformation(model, + enableLPT, + conf.inferencePrecision, + is_legacy_api(), + snippetsMode, + engConfig); + transformation.UpToLpt(); + transformation.PostLpt(); + transformation.Snippets(); + transformation.CpuSpecificOpSet(); + }, + [&](const std::shared_ptr& op) { + std::unique_ptr ptr; + try { + ptr.reset(Node::factory().create(op, context)); + } catch (const InferenceEngine::Exception&) { + return false; + } + return true; + }); + + ov::SupportedOpsMap res; for (auto&& layerName : supported) { - res.supportedLayersMap.emplace(layerName, GetName()); + res.emplace(layerName, get_device_name()); } return res; } -InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istream& networkModel, - const std::map& config) { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "ImportNetwork"); +std::shared_ptr Engine::import_model(std::istream& networkModel, + const ov::AnyMap& config) const{ + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - CNNNetworkDeserializer deserializer(networkModel, - [this](const std::string& model, const Blob::CPtr& weights) { - return GetCore()->ReadNetwork(model, weights, true); + ModelDeserializer deserializer(networkModel, + [this](const std::string& model, const ov::Tensor& weights) { + return get_core()->read_model(model, weights, true); }); - CNNNetwork cnnnetwork; - deserializer >> cnnnetwork; + std::shared_ptr model; + deserializer >> model; - auto function = cnnnetwork.getFunction(); - Config::ModelType modelType = getModelType(function); Config conf = engConfig; + Config::ModelType modelType = getModelType(model); conf.readProperties(config, modelType); - CalculateStreams(conf, function, true); - - auto execNetwork = std::make_shared(cnnnetwork, conf, extensionManager, shared_from_this()); - - execNetwork->setNetworkInputs(cnnnetwork.getInputsInfo()); - execNetwork->setNetworkOutputs(cnnnetwork.getOutputsInfo()); - SetExeNetworkInfo(execNetwork, cnnnetwork.getFunction()); + // import config props from caching model + calculate_streams(conf, model, true); - return execNetwork; + auto compiled_model = std::make_shared(model, shared_from_this(), conf, extensionManager, true); + return compiled_model; } } // namespace intel_cpu } // namespace ov @@ -884,13 +901,13 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr using namespace ov::intel_cpu; #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) -static const Version version = {{2, 1}, CI_BUILD_NUMBER, "openvino_arm_cpu_plugin"}; +static const ov::Version version = {CI_BUILD_NUMBER, "openvino_arm_cpu_plugin"}; #elif defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) -static const Version version = {{2, 1}, CI_BUILD_NUMBER, "openvino_intel_cpu_plugin"}; +static const ov::Version version = {CI_BUILD_NUMBER, "openvino_intel_cpu_plugin"}; #elif defined(OPENVINO_ARCH_RISCV64) -static const Version version = {{2, 1}, CI_BUILD_NUMBER, "openvino_riscv_cpu_plugin"}; +static const ov::Version version = {CI_BUILD_NUMBER, "openvino_riscv_cpu_plugin"}; #else #error "Undefined system processor" #endif -IE_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version) +OV_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version) diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 3e9d616dcec02c..4bb728f91376e0 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -4,56 +4,72 @@ #pragma once -#include "exec_network.h" +#include "compiled_model.h" #include "cpu_streams_calculation.hpp" -#include -#include -#include -#include - namespace ov { namespace intel_cpu { -class Engine : public InferenceEngine::IInferencePlugin { +class Engine : public ov::IPlugin { public: Engine(); ~Engine(); - std::shared_ptr - LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, - const std::map &config) override; - - void AddExtension(const InferenceEngine::IExtensionPtr& extension) override; - - void SetConfig(const std::map &config) override; - - InferenceEngine::Parameter GetConfig(const std::string& name, const std::map& options) const override; - - InferenceEngine::Parameter GetMetric(const std::string& name, const std::map& options) const override; + std::shared_ptr compile_model(const std::shared_ptr& model, + const ov::AnyMap& properties) const override; + std::shared_ptr compile_model(const std::shared_ptr& model, + const ov::AnyMap& properties, + const ov::SoPtr& context) const override { + OPENVINO_ASSERT_HELPER(::ov::NotImplemented, + "", + false, + "Not Implemented", + "compile_model with RemoteContext is not supported by CPU plugin!"); + }; - InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, - const std::map& config) const override; + void set_property(const ov::AnyMap& properties) override; + ov::Any get_property(const std::string& name, const ov::AnyMap& arguments) const override; + std::shared_ptr import_model(std::istream& model, const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& model, + const ov::SoPtr& context, + const ov::AnyMap& properties) const override { + OPENVINO_ASSERT_HELPER(::ov::NotImplemented, + "", + false, + "Not Implemented", + "import_model with RemoteContext is not supported by CPU plugin!"); + }; - InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& networkModel, - const std::map& config) override; + ov::SupportedOpsMap query_model(const std::shared_ptr& model, + const ov::AnyMap& properties) const override; + ov::SoPtr create_context(const ov::AnyMap& remote_properties) const override { + OPENVINO_ASSERT_HELPER(::ov::NotImplemented, + "", + false, + "Not Implemented", + "create_context is not supported by CPU plugin!"); + }; + ov::SoPtr get_default_context(const ov::AnyMap& remote_properties) const override { + OPENVINO_ASSERT_HELPER(::ov::NotImplemented, + "", + false, + "Not Implemented", + "get_default_context is not supported by CPU plugin!"); + }; private: - bool isLegacyAPI() const; - - InferenceEngine::Parameter GetMetricLegacy(const std::string& name, const std::map& options) const; - - InferenceEngine::Parameter GetConfigLegacy(const std::string& name, const std::map& options) const; - - void ApplyPerformanceHints(std::map &config, const std::shared_ptr& ngraphFunc) const; - - void GetPerformanceStreams(Config &config, const std::shared_ptr& ngraphFunc); - - void CalculateStreams(Config& conf, const std::shared_ptr& ngraphFunc, bool imported = false); - - StreamCfg GetNumStreams(InferenceEngine::IStreamsExecutor::ThreadBindingType thread_binding_type, - int stream_mode, - const bool enable_hyper_thread = true) const; + bool is_legacy_api() const; + + ov::Any get_ro_property(const std::string& name, const ov::AnyMap& options) const; + ov::Any get_metric_legacy(const std::string& name, const ov::AnyMap& options) const; + + ov::Any get_property_legacy(const std::string& name, const ov::AnyMap& options) const; + void apply_performance_hints(ov::AnyMap &config, const std::shared_ptr& model) const; + void get_performance_streams(Config &config, const std::shared_ptr& model) const; + StreamCfg get_streams_num(ov::threading::IStreamsExecutor::ThreadBindingType thread_binding_type, + int stream_mode, + const bool enable_hyper_thread = true) const; + void calculate_streams(Config& conf, const std::shared_ptr& model, bool imported = false) const; Config engConfig; ExtensionManager::Ptr extensionManager = std::make_shared(); diff --git a/src/plugins/intel_cpu/src/serialize.cpp b/src/plugins/intel_cpu/src/serialize.cpp index 406ecac74eb4ea..2951b7c1b33714 100644 --- a/src/plugins/intel_cpu/src/serialize.cpp +++ b/src/plugins/intel_cpu/src/serialize.cpp @@ -3,70 +3,36 @@ // #include "serialize.h" -#include - #include +#include "openvino/pass/serialize.hpp" +#include "transformations/utils/utils.hpp" + using namespace InferenceEngine; namespace ov { namespace intel_cpu { -namespace { - std::string to_string(InferenceEngine::Layout layout) { - std::stringstream ss; - ss << layout; - return ss.str(); - } - - InferenceEngine::Layout layout_from_string(const std::string & name) { - static const std::unordered_map layouts = { - { "ANY", InferenceEngine::Layout::ANY }, - { "NCHW", InferenceEngine::Layout::NCHW }, - { "NHWC", InferenceEngine::Layout::NHWC }, - { "NCDHW", InferenceEngine::Layout::NCDHW }, - { "NDHWC", InferenceEngine::Layout::NDHWC }, - { "OIHW", InferenceEngine::Layout::OIHW }, - { "C", InferenceEngine::Layout::C }, - { "CHW", InferenceEngine::Layout::CHW }, - { "HWC", InferenceEngine::Layout::HWC }, - { "HW", InferenceEngine::Layout::HW }, - { "NC", InferenceEngine::Layout::NC }, - { "CN", InferenceEngine::Layout::CN }, - { "BLOCKED", InferenceEngine::Layout::BLOCKED } - }; - auto it = layouts.find(name); - if (it != layouts.end()) { - return it->second; - } - IE_THROW(NetworkNotRead) << "Unknown layout with name '" << name << "'"; - } - template - void setInfo(pugi::xml_object_range&& nodes, T&& info) { - auto nodes_it = nodes.begin(); - auto info_iter = info.begin(); - for (; nodes_it != nodes.end(); ++nodes_it, ++info_iter) { - auto name_attr = nodes_it->attribute("name"); - auto precision_attr = nodes_it->attribute("precision"); - auto layout_attr = nodes_it->attribute("layout"); - - if (!name_attr || !precision_attr || !layout_attr || info_iter == info.end()) { - IE_THROW(NetworkNotRead) << "The inputs/outputs information is invalid."; - } - - info_iter->second->setName(name_attr.value()); - info_iter->second->setPrecision(Precision::FromStr(precision_attr.value())); - info_iter->second->setLayout(layout_from_string(layout_attr.value())); - } +static void setInfo(pugi::xml_node& root, std::shared_ptr& model) { + pugi::xml_node outputs = root.child("outputs"); + auto nodes_it = outputs.children("out").begin(); + size_t size = model->outputs().size(); + for (size_t i = 0; i < size; ++nodes_it, i++) { + std::string name = nodes_it->attribute("name").value(); + if (name.empty()) + continue; + auto result = model->output(i).get_node_shared_ptr(); + ov::descriptor::set_ov_tensor_legacy_name(result->input_value(0).get_tensor(), name); } -}; // namespace +} -CNNNetworkSerializer::CNNNetworkSerializer(std::ostream & ostream, ExtensionManager::Ptr extensionManager) +ModelSerializer::ModelSerializer(std::ostream & ostream, ExtensionManager::Ptr extensionManager) : _ostream(ostream) , _extensionManager(extensionManager) { } -void CNNNetworkSerializer::operator << (const CNNNetwork & network) { +void ModelSerializer::operator<<(const std::shared_ptr& model) { + OPENVINO_SUPPRESS_DEPRECATED_START auto getCustomOpSets = [this]() { std::map custom_opsets; @@ -81,75 +47,60 @@ void CNNNetworkSerializer::operator << (const CNNNetwork & network) { return custom_opsets; }; - auto serializeInputsAndOutputs = [&](std::ostream & stream) { + auto serializeInfo = [&](std::ostream& stream) { const std::string name = "cnndata"; pugi::xml_document xml_doc; pugi::xml_node root = xml_doc.append_child(name.c_str()); - pugi::xml_node inputs = root.append_child("inputs"); pugi::xml_node outputs = root.append_child("outputs"); - - for (const auto & in : network.getInputsInfo()) { - auto in_node = inputs.append_child("in"); - - in_node.append_attribute("name") - .set_value(in.first.c_str()); - in_node.append_attribute("precision") - .set_value(in.second->getPrecision().name()); - in_node.append_attribute("layout") - .set_value(to_string(in.second->getLayout()).c_str()); - } - - for (const auto & out : network.getOutputsInfo()) { + for (const auto& out : model->get_results()) { auto out_node = outputs.append_child("out"); - out_node.append_attribute("name") - .set_value(out.first.c_str()); - out_node.append_attribute("precision") - .set_value(out.second->getPrecision().name()); - out_node.append_attribute("layout") - .set_value(to_string(out.second->getLayout()).c_str()); + const std::string name = ov::descriptor::get_ov_tensor_legacy_name(out->input_value(0).get_tensor()); + out_node.append_attribute("name").set_value(name.c_str()); } - xml_doc.save(stream); }; // Serialize to old representation in case of old API - OPENVINO_SUPPRESS_DEPRECATED_START - ov::pass::StreamSerialize serializer(_ostream, getCustomOpSets(), serializeInputsAndOutputs); + ov::pass::StreamSerialize serializer(_ostream, getCustomOpSets(), serializeInfo); OPENVINO_SUPPRESS_DEPRECATED_END - serializer.run_on_model(std::const_pointer_cast(network.getFunction())); + serializer.run_on_model(std::const_pointer_cast(model->clone())); } -CNNNetworkDeserializer::CNNNetworkDeserializer(std::istream & istream, cnn_network_builder fn) +ModelDeserializer::ModelDeserializer(std::istream & istream, model_builder fn) : _istream(istream) - , _cnn_network_builder(fn) { + , _model_builder(fn) { } -void CNNNetworkDeserializer::operator >> (InferenceEngine::CNNNetwork & network) { +void ModelDeserializer::operator>>(std::shared_ptr& model) { using namespace ov::pass; - std::string xmlString, xmlInOutString; - InferenceEngine::Blob::Ptr dataBlob; + std::string xmlString; + ov::Tensor dataBlob; StreamSerialize::DataHeader hdr = {}; _istream.read(reinterpret_cast(&hdr), sizeof hdr); - // read CNNNetwork input/output precisions + // read model input/output precisions _istream.seekg(hdr.custom_data_offset); - xmlInOutString.resize(hdr.custom_data_size); - _istream.read(const_cast(xmlInOutString.c_str()), hdr.custom_data_size); + + OPENVINO_SUPPRESS_DEPRECATED_START pugi::xml_document xmlInOutDoc; - auto res = xmlInOutDoc.load_string(xmlInOutString.c_str()); - if (res.status != pugi::status_ok) { - IE_THROW(NetworkNotRead) << "The inputs and outputs information is invalid."; + if (hdr.custom_data_size > 0) { + std::string xmlInOutString; + xmlInOutString.resize(hdr.custom_data_size); + _istream.read(const_cast(xmlInOutString.c_str()), hdr.custom_data_size); + auto res = xmlInOutDoc.load_string(xmlInOutString.c_str()); + if (res.status != pugi::status_ok) { + OPENVINO_THROW("NetworkNotRead: The inputs and outputs information is invalid."); + } } + OPENVINO_SUPPRESS_DEPRECATED_END // read blob content _istream.seekg(hdr.consts_offset); if (hdr.consts_size) { - dataBlob = InferenceEngine::make_shared_blob( - InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, {hdr.consts_size}, InferenceEngine::Layout::C)); - dataBlob->allocate(); - _istream.read(dataBlob->buffer(), hdr.consts_size); + dataBlob = ov::Tensor(ov::element::u8, ov::Shape({hdr.consts_size})); + _istream.read(static_cast(dataBlob.data(ov::element::u8)), hdr.consts_size); } // read XML content @@ -157,15 +108,11 @@ void CNNNetworkDeserializer::operator >> (InferenceEngine::CNNNetwork & network) xmlString.resize(hdr.model_size); _istream.read(const_cast(xmlString.c_str()), hdr.model_size); - network = _cnn_network_builder(xmlString, std::move(dataBlob)); + model = _model_builder(xmlString, std::move(dataBlob)); - // Set input and output precisions + // Set Info pugi::xml_node root = xmlInOutDoc.child("cnndata"); - pugi::xml_node inputs = root.child("inputs"); - pugi::xml_node outputs = root.child("outputs"); - - setInfo(inputs.children("in"), network.getInputsInfo()); - setInfo(outputs.children("out"), network.getOutputsInfo()); + setInfo(root, model); } } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/serialize.h b/src/plugins/intel_cpu/src/serialize.h index 25a9c2e2ad5410..5bbb22661003c7 100644 --- a/src/plugins/intel_cpu/src/serialize.h +++ b/src/plugins/intel_cpu/src/serialize.h @@ -2,40 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // #pragma once -#include "extension_mngr.h" - -#include #include -#include +#include + +#include "cpp/ie_cnn_network.h" +#include "extension_mngr.h" namespace ov { namespace intel_cpu { -class CNNNetworkSerializer { +class ModelSerializer { public: - CNNNetworkSerializer(std::ostream & ostream, ExtensionManager::Ptr extensionManager); - void operator << (const InferenceEngine::CNNNetwork & network); + ModelSerializer(std::ostream& ostream, ExtensionManager::Ptr extensionManager); + void operator<<(const std::shared_ptr& model); private: - std::ostream & _ostream; + std::ostream& _ostream; ExtensionManager::Ptr _extensionManager; }; -class CNNNetworkDeserializer { +class ModelDeserializer { public: - typedef std::function< - InferenceEngine::CNNNetwork( - const std::string&, - const InferenceEngine::Blob::CPtr&)> cnn_network_builder; - CNNNetworkDeserializer(std::istream & istream, cnn_network_builder fn); - void operator >> (InferenceEngine::CNNNetwork & network); + typedef std::function(const std::string&, const ov::Tensor&)> model_builder; + ModelDeserializer(std::istream& istream, model_builder fn); + void operator>>(std::shared_ptr& model); private: - std::istream & _istream; - cnn_network_builder _cnn_network_builder; + std::istream& _istream; + model_builder _model_builder; }; -// const std::string& model, const Blob::CPtr& weights - } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp index cd055969b8a2eb..0ad94336caea1d 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/align_matmul_input_ranks.cpp @@ -112,14 +112,22 @@ ov::intel_cpu::AlignMatMulInputRanks::AlignMatMulInputRanks() { // Insert additional squeeze operation to preserve output shape const auto new_out_shape_size = matmul_new->get_output_partial_shape(0).size(); size_t squeeze_axis = 0; - if (input0shape.size() == 1) - squeeze_axis = new_out_shape_size - 2; - else if (input1shape.size() == 1) - squeeze_axis = new_out_shape_size - 1; - std::shared_ptr squeeze_output = std::make_shared( - matmul_new, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {squeeze_axis})); - + std::shared_ptr squeeze_output; + // If output data is scalar && new_out_shape is [1 1 .. 1], squeeze all the axis to produce a scalar + auto& new_output_partial_shape = matmul_new->get_output_partial_shape(0); + const bool can_squeeze_scalar = + new_output_partial_shape.is_static() ? ov::shape_size(new_output_partial_shape.to_shape()) == 1 : false; + if (ov::is_scalar(output_shape) && can_squeeze_scalar) { + squeeze_output = std::make_shared(matmul_new); + } else { + if (input0shape.size() == 1) + squeeze_axis = new_out_shape_size - 2; + else if (input1shape.size() == 1) + squeeze_axis = new_out_shape_size - 1; + squeeze_output = std::make_shared( + matmul_new, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {squeeze_axis})); + } new_ops.push_back(squeeze_output); matmul_new->set_friendly_name(matmul->get_friendly_name() + "/MM"); // Set the name of the last node after transformation to initial node name diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 3e2158e7a383f7..090237690f50da 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -48,7 +48,12 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr &nGraphF // after transformation "MoveEltwiseUpThroughDataMov" there can be reshaped sequences that should be eliminated or fused CPU_REGISTER_PASS_COMMON(manager, ov::pass::ReshapeSequenceFusion); CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding); - CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions_map {{ ngraph::element::i64, ngraph::element::i32 }}); + CPU_REGISTER_PASS_COMMON(manager, + ov::pass::ConvertPrecision, + precisions_map{{ngraph::element::i64, ngraph::element::i32}}, + type_to_fuse_map{{}}, + false, + false); auto symbolic_pipeline = CPU_REGISTER_PASS_COMMON(manager, ov::pass::SymbolicOptimizations, false); symbolic_pipeline->get_manager()->register_pass(); CPU_REGISTER_PASS_COMMON(manager, ov::pass::Validate); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 590af95e01812d..c16cfc2d648768 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -130,32 +130,46 @@ namespace intel_cpu { using const_node_ptr = const std::shared_ptr; -bool Transformations::fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions) { +bool Transformations::fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions) { + auto convert = ov::as_type_ptr(node); + if (!convert) + return false; const auto& from = node->get_output_element_type(0); auto it = precisions.find(from); if (it == precisions.end()) return false; const auto& to = it->second; - if (auto convert = ov::as_type_ptr(node)) { - // For Convert node, converting precision from floating point to boolean will lead to mathematical - // error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01 - // is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the - // Convert node for this scenario. - if (convert->input(0).get_element_type().is_real() && - convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) { + + // For Convert node, converting precision from floating point to boolean will lead to mathematical + // error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01 + // is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the + // Convert node for this scenario. + if (convert->input(0).get_element_type().is_real() && + convert->get_convert_element_type() == ov::element::boolean && to.is_integral_number()) { + const auto& in_prec = node->get_input_element_type(0); + auto item = precisions.find(in_prec); + if (item != precisions.end()) { + // Add convert node for unsupported precision, such as FP64 + auto pre_convert = + std::make_shared(convert->input_value(0).get_node_shared_ptr(), item->second); + auto abs = std::make_shared(pre_convert); + auto ceil = std::make_shared(abs); + auto new_convert = std::make_shared(ceil, to); + new_convert->set_friendly_name(convert->get_friendly_name()); + ov::copy_runtime_info(convert, {pre_convert, abs, ceil, new_convert}); + ov::replace_node(convert, new_convert); + } else { auto abs = std::make_shared(convert->input_value(0).get_node_shared_ptr()); auto ceil = std::make_shared(abs); auto new_convert = std::make_shared(ceil, to); new_convert->set_friendly_name(convert->get_friendly_name()); ov::copy_runtime_info(convert, {abs, ceil, new_convert}); ov::replace_node(convert, new_convert); - return true; - } else { - convert->set_convert_element_type(to); - return true; } + } else { + convert->set_convert_element_type(to); } - return false; + return true; } void Transformations::UpToLpt() { @@ -258,6 +272,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis {ov::element::i4, ov::element::i8}, {ov::element::u4, ov::element::u8} }; + // @todo should we always convert to f32 regardless of hardware support, as it is done for f16? if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) map.insert({ov::element::bf16, ov::element::f32}); @@ -269,20 +284,25 @@ void Transformations::PreLpt(const std::vector& defaultPrecis #endif return map; }; - static const auto precisions = get_convert_precisions(); + type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}}; #if defined(OV_CPU_ARM_ENABLE_FP16) + // It cannot be static data, because it may be difference for different inferencePrecision + const auto precisions = get_convert_precisions(); if (inferencePrecision == ov::element::f16) { - precisions_map fp_convert_precision_map = { - {ov::element::f32, ov::element::f16} - }; + precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}}; type_to_fuse_map empty_fuse_map = {}; const bool keep_precision_sensitive_in_fp32 = true; - CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, fp_convert_precision_map, - empty_fuse_map, - keep_precision_sensitive_in_fp32); + CPU_REGISTER_PASS_COMMON(manager, + ov::pass::ConvertPrecision, + fp_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32, + false); } +#else + static const auto precisions = get_convert_precisions(); #endif CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression); CPU_SET_CALLBACK_COMMON(manager, @@ -324,7 +344,8 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // However, if the extension operation produces an output precision that is not natively supported, this may lead to inconsistency during // element type propagation. This transformation is called before the ConvertPrecision pass to align the actual precisions with the list of supported ones. CPU_REGISTER_PASS_COMMON(manager, ov::pass::InsertConvertAfterExtension); - CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions, type_to_fuse); + // Precision convert is disabled. + CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions, type_to_fuse, false, false); CPU_REGISTER_PASS_COMMON(manager, ov::pass::EliminateConvert); CPU_REGISTER_PASS_COMMON(manager, SwapConvertTranspose); @@ -628,7 +649,7 @@ void Transformations::MainSnippets(void) { // [122706] Some 3D MHA Patterns have perf regressions when Transpose op is tokenized tokenization_config.mha_supported_transpose_ranks = { 4 }; - ngraph::pass::Manager snippetsManager; + ov::pass::Manager snippetsManager; snippetsManager.set_per_pass_validation(false); if (snippetsMode != Config::SnippetsMode::IgnoreCallback) CPU_REGISTER_PASS_X64(snippetsManager, SnippetsMarkSkipped, inferencePrecision != ov::element::f32); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h index dc7c734abce382..84be0afe6c1287 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h @@ -15,8 +15,6 @@ #include #include -using namespace InferenceEngine; - #define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: " namespace ov { diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.cpp b/src/plugins/intel_cpu/src/utils/blob_dump.cpp index c310996a30891d..4cbf42fcfca93d 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.cpp +++ b/src/plugins/intel_cpu/src/utils/blob_dump.cpp @@ -211,7 +211,32 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) const { stream << static_cast(blob_ptr[desc.getElementOffset(i)]) << std::endl; break; } + case Precision::I64: { + auto* blob_ptr = reinterpret_cast(ptr); + for (size_t i = 0; i < data_size; i++) + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + break; + } + case Precision::U32: { + auto* blob_ptr = reinterpret_cast(ptr); + for (size_t i = 0; i < data_size; i++) + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + break; + } + case Precision::U16: { + auto* blob_ptr = reinterpret_cast(ptr); + for (size_t i = 0; i < data_size; i++) + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + break; + } + case Precision::I16: { + auto* blob_ptr = reinterpret_cast(ptr); + for (size_t i = 0; i < data_size; i++) + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; + break; + } default: + break; IE_THROW() << "Dumper. Unsupported precision"; } } diff --git a/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp b/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp index b9743971e20cb1..851fec5e8b8092 100644 --- a/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp +++ b/src/plugins/intel_cpu/src/utils/ngraph_utils.hpp @@ -5,6 +5,8 @@ #pragma once #include +#include + #include "transformations/rt_info/primitives_priority_attribute.hpp" namespace ov { @@ -47,5 +49,24 @@ inline bool isDynamicNgraphNode(const std::shared_ptr& op) { return ret; } +inline std::string get_port_name(const ov::Output& port, const bool is_legacy_api) { + std::string name; + // Should use tensor name as the port name, but many legacy tests still use legacy name + // plus sometimes it will get empty tensor name. + if (!is_legacy_api) { + // TODO: To apply unified tensor name. + } + if (name.empty()) { + bool is_input = ov::op::util::is_parameter(port.get_node()); + if (is_input) { + name = ov::op::util::get_ie_output_name(port); + } else { + const auto node = port.get_node_shared_ptr(); + name = ov::op::util::get_ie_output_name(node->input_value(0)); + } + } + return name; +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 274d23ce23b527..1568bf51e1fa56 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -95,12 +95,10 @@ std::vector disabledTestPatterns() { R"(.*smoke_HeteroOVGetMetricPropsTest.*OVGetMetricPropsTest.*(AVAILABLE_DEVICES|OPTIMIZATION_CAPABILITIES|RANGE_FOR_ASYNC_INFER_REQUESTS|RANGE_FOR_STREAMS).*)", // supports only '' as device id R"(.*OVClassQueryModelTest.*QueryModelWithDeviceID.*)", - // Issue 67214 R"(smoke_PrePostProcess.*resize_and_convert_layout_i8.*)", // TODO: 67255 R"(smoke_If.*SimpleIf2OutTest.*)", - // Issue: 69086 // need to add support convert BIN -> FP32 // if we set output precision as BIN, when we create output blob precision looks like UNSPECIFIED @@ -166,23 +164,51 @@ std::vector disabledTestPatterns() { R"(.*smoke_Snippets_ConvertStub/ConvertStub\.CompareWithRefImpl/IS.*_OT=\(bf16\)_#N=2_#S=2_targetDevice=CPU.*)", R"(.*smoke_Snippets_Convert/Convert\.CompareWithRefImpl/IS.*_IT=\(f32\)_OT=\(u8\)_#N=1_#S=1_targetDevice=CPU.*)", R"(.*smoke_Snippets_ConvertManyOnInputs/ConvertManyOnInputs\.CompareWithRefImpl/IS.*_IT=\(f32\.u8\)_OT=\(\)_#N=1_#S=1_targetDevice=CPU.*)", + // New plugin work with tensors, so it means that blob in old API can have different pointers + R"(.*InferRequestIOBBlobTest.*secondCallGetInputDoNotReAllocateData.*)", + R"(.*InferRequestIOBBlobTest.*secondCallGetOutputDoNotReAllocateData.*)", + R"(.*InferRequestIOBBlobTest.*secondCallGetInputAfterInferSync.*)", + R"(.*InferRequestIOBBlobTest.*secondCallGetOutputAfterInferSync.*)", // Issue: 106939 R"(.*ScatterNDUpdateLayerCPUTest.*-1.-1.-1.-2.-2.-2.*)", // New plugin API doesn't support changes of pre-processing - R"(.*(Hetero).*InferRequestPreprocessTest.*SetPreProcessToInputInfo.*)", - R"(.*(Hetero).*InferRequestPreprocessTest.*SetPreProcessToInferRequest.*)", - // TODO: for 22.2 (Issue 68949) + R"(.*InferRequestPreprocessTest.*SetPreProcessToInputInfo.*)", + R"(.*InferRequestPreprocessTest.*SetPreProcessToInferRequest.*)", + // Old API cannot deallocate tensor + R"(.*InferRequestIOBBlobTest.*canProcessDeallocatedOutputBlobAfterGetAndSetBlob.*)", + // Plugin version was changed to ov::Version + R"(.*VersionTest.*pluginCurrentVersionIsCorrect.*)", + // Issue: 120286 + R"(.*smoke_Basic/FuseTransposeAndReorderTest.CompareWithRefs.*)", + // Issue: 113703, 114763 + R"(.*smoke_If/SimpleIfTest.*Cond=0.*)", + // Issue: 114765 + R"(.*smoke_PSROIPoolingAverageLayoutTest/PSROIPoolingLayerCPUTest.*BF16.*)", + R"(.*smoke_PSROIPoolingBilinearLayoutTest/PSROIPoolingLayerCPUTest.*BF16.*)", + // TODO: for 22.2 (CVS-68949) R"(.*smoke_AutoBatching_CPU/AutoBatching_Test_DetectionOutput.*)", + // Issue: 120279 + R"(.*OVCompiledGraphImportExportTest.*elementType=(i16|u16|u32|u64|i64).*)", // Issue: 120222 R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=1_axis=3_.*_modelType=f16_trgDev=CPU.*)", R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=7_axis=3_.*_modelType=f16_trgDev=CPU.*)", + R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=1_axis=1_.*_modelType=f16_trgDev=CPU.*)", + R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=7_axis=1_.*_modelType=f16_trgDev=CPU.*)", R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=18_.*_modelType=f16_trgDev=CPU.*)", R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=21_.*_sort=value_modelType=f16_trgDev=CPU.*)", // Issue: 121228 R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.Inference.*bf16.*)", + // Issue: 121363 + R"(.*smoke_Constant/ConstantLayerTest.*_dataPRC=(u4|u16|u32|i4|i16|f64).*)", + R"(.*smoke_Constant_with_negative_values/ConstantLayerTest.*_dataPRC=(u4|u16|u32|i4|i16|f64).*)", + R"(.*smoke_Check/ConstantResultSubgraphTest.CompareWithRefs/SubgraphType.*_IT=(u16|i16|u32|i64|u64).*)", // Issue: 121313 R"(smoke_GroupConvBackpropData.*paddingDefined/GroupConvBackpropLayerTest.Inference.*f16.*)", R"(smoke_GroupConvBackpropData.*paddingDefined/GroupConvBackpropLayerTest.Inference.*f32.*)", + // Issue: 121812 + R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)", + // Issue: 122321 + R"(.*smoke_ConvertCPULayerTest_BOOL.*)", // Issue: 122177 R"(smoke_LSTMSequenceCommon.*LSTMSequenceTest.Inference.*CONVERT_TO_TI.*)", // Issue: 122081 @@ -236,6 +262,15 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); + // Issue: 124309 + retVector.emplace_back(R"(.*InferRequestPreprocessConversionTest.*oLT=NHWC.*)"); + retVector.emplace_back(R"(.*smoke_NoReshape/ExecGraphUniqueNodeNames.CheckUniqueNodeNames.*)"); + retVector.emplace_back(R"(.*smoke_BehaviorTests/InferRequestPerfCountersTest.CheckOperationInPerfMap.*)"); + retVector.emplace_back(R"(smoke_BehaviorTests/ExecutableNetworkBaseTest.CheckExecGraphInfo.*)"); + retVector.emplace_back(R"(smoke_BehaviorTests/OVCompiledModelBaseTestOptional.CheckExecGraphInfo.*)"); + retVector.emplace_back(R"(smoke_ExecGraph/ExecGraphRuntimePrecision.CheckRuntimePrecision/Function=FakeQuantizeBinaryConvolution.*)"); + // Issue: 124395 + retVector.emplace_back(R"(smoke_VariableStateBasic/InferRequestVariableStateTest.*)"); #endif #endif diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp index 9bf27c6be07c1a..e07a5bd57bd5a5 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp @@ -3,42 +3,47 @@ // #include "snippets/select.hpp" + #include "common_test_utils/test_constants.hpp" namespace ov { namespace test { namespace snippets { - namespace { //============================Select=======================================// std::vector inShapes_a{{{}, {{1, 5, 5, 35}}}}; std::vector inShapes_b{{{}, {{1}}}}; -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Select, Select, - ::testing::Combine( - ::testing::ValuesIn(inShapes_a), - ::testing::ValuesIn(inShapes_a), - ::testing::ValuesIn(inShapes_b), - ::testing::ValuesIn({ov::element::f32, ov::element::i8}), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - Select::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Select, + Select, + ::testing::Combine(::testing::ValuesIn(inShapes_a), + ::testing::ValuesIn(inShapes_a), + ::testing::ValuesIn(inShapes_b), + ::testing::ValuesIn({ov::element::f32, ov::element::i8}), + // Expected num nodes increases 1, because one additional Convert layer will + // be inserted for precision conversion(issue 115822) + ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + Select::getTestCaseName); // DS -std::vector inShapesDynamic_a{{{1, {1, 5}, -1, 35}, {{1, 5, 5, 35}, {1, 1, 1, 35}, {1, 5, 5, 35}}}}; +std::vector inShapesDynamic_a{ + {{1, {1, 5}, -1, 35}, {{1, 5, 5, 35}, {1, 1, 1, 35}, {1, 5, 5, 35}}}}; std::vector inShapesDynamic_b{{{-1}, {{1}, {1}, {1}}}}; -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Select_Dynamic, Select, - ::testing::Combine( - ::testing::ValuesIn(inShapesDynamic_a), - ::testing::ValuesIn(inShapesDynamic_a), - ::testing::ValuesIn(inShapesDynamic_b), - ::testing::ValuesIn({ov::element::f32, ov::element::i8}), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - Select::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Select_Dynamic, + Select, + ::testing::Combine(::testing::ValuesIn(inShapesDynamic_a), + ::testing::ValuesIn(inShapesDynamic_a), + ::testing::ValuesIn(inShapesDynamic_b), + ::testing::ValuesIn({ov::element::f32, ov::element::i8}), + // Expected num nodes increases 1, because one additional Convert layer will + // be inserted for precision conversion(issue 115822) + ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + Select::getTestCaseName); //============================BroadcastSelect=======================================// std::vector inShapes0{{{}, {{1, 8, 2, 1}}}, {{}, {{1, 1, 1, 1}}}}; @@ -46,37 +51,42 @@ std::vector inShapes1{{{}, {{1, 8, 2, 10}}}, {{}, {{1, 8, std::vector inShapes2{{{}, {{1, 8, 2, 10}}}, {{}, {{1, 1, 1, 1}}}}; std::vector inShapes3{{1, 8, 2, 1}, {1, 8, 2, 10}}; -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastSelect, BroadcastSelect, - ::testing::Combine( - ::testing::ValuesIn(inShapes0), - ::testing::ValuesIn(inShapes1), - ::testing::ValuesIn(inShapes2), - ::testing::ValuesIn(inShapes3), - ::testing::ValuesIn({ov::element::f32, ov::element::i8}), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU)), +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastSelect, + BroadcastSelect, + ::testing::Combine(::testing::ValuesIn(inShapes0), + ::testing::ValuesIn(inShapes1), + ::testing::ValuesIn(inShapes2), + ::testing::ValuesIn(inShapes3), + ::testing::ValuesIn({ov::element::f32, ov::element::i8}), + // Expected num nodes increases 1, because one additional Convert layer will + // be inserted for precision conversion(issue 115822) + ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU)), BroadcastSelect::getTestCaseName); // DS std::vector inShapes0_d{{{-1, -1, -1, -1}, {{1, 8, 2, 1}, {1, 1, 1, 1}, {1, 8, 2, 1}}}}; std::vector inShapes1_d{{{1, -1, -1, -1}, {{1, 8, 2, 10}, {1, 8, 2, 10}, {1, 8, 2, 10}}}}; -std::vector inShapes2_d{{{1, {1, 8}, {1, 2}, {1, 10}}, {{1, 8, 2, 10}, {1, 1, 2, 1}, {1, 8, 2, 10}}}}; +std::vector inShapes2_d{ + {{1, {1, 8}, {1, 2}, {1, 10}}, {{1, 8, 2, 10}, {1, 1, 2, 1}, {1, 8, 2, 10}}}}; std::vector inShapes3_d{{1, 8, 2, 1}, {1, 8, 2, 10}}; -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastSelect_Dynamic, BroadcastSelect, - ::testing::Combine( - ::testing::ValuesIn(inShapes0_d), - ::testing::ValuesIn(inShapes1_d), - ::testing::ValuesIn(inShapes2_d), - ::testing::ValuesIn(inShapes3_d), - ::testing::ValuesIn({ov::element::f32, ov::element::i8}), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU)), +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastSelect_Dynamic, + BroadcastSelect, + ::testing::Combine(::testing::ValuesIn(inShapes0_d), + ::testing::ValuesIn(inShapes1_d), + ::testing::ValuesIn(inShapes2_d), + ::testing::ValuesIn(inShapes3_d), + ::testing::ValuesIn({ov::element::f32, ov::element::i8}), + // Expected num nodes increases 1, because one additional Convert layer will + // be inserted for precision conversion(issue 115822) + ::testing::Values(2), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU)), BroadcastSelect::getTestCaseName); -} // namespace -} // namespace snippets -} // namespace test -} // namespace ov \ No newline at end of file +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fuse_transpose_reorder.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fuse_transpose_reorder.cpp index 24756d0086553d..02609d95da61e2 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fuse_transpose_reorder.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fuse_transpose_reorder.cpp @@ -45,7 +45,7 @@ void FuseTransposeAndReorderTest::CheckTransposeCount(size_t expectedTransposeCo void FuseTransposeAndReorderTest::SetUp() { targetDevice = ov::test::utils::DEVICE_CPU; - + SKIP_IF_CURRENT_TEST_IS_DISABLED(); std::tie(inputShape, inPrec) = this->GetParam(); CreateGraph(); } diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp index ced8d4a2d3cdd9..a1da0abdfb4381 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp @@ -221,7 +221,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr(); } From ac1fb7b955ffab52f62ca28562e40aec931ff31a Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Tue, 7 Nov 2023 17:18:18 +0100 Subject: [PATCH 224/275] Fixing OS list in System Requirements for YUM (#20934) --- .../installing-openvino-yum.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md index 88730d374995b1..bd7f3eb7800035 100644 --- a/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md +++ b/docs/articles_en/get started/installing-openvino-overview/installing-openvino-linux-header/installing-openvino-yum.md @@ -19,32 +19,33 @@ .. tab-item:: System Requirements :sync: system-requirements - + | Full requirement listing is available in: | :doc:`System Requirements Page ` - + .. note:: - + OpenVINO RPM packages are compatible with and can be run on the following operating systems: + - RHEL 8.2 and higher - Amazon Linux 2022 and 2023 - Rocky Linux 8.7, 8.8 and 9.2 - Alma Linux 8.7, 8.8 and 9.2 - - Oracle Linix 8.7, 8.8 and 9.2 + - Oracle Linux 8.7, 8.8 and 9.2 - Fedora 29 and higher up to 40 - OpenEuler 20.03 and 22.03 - Anolis OS 8.6 and 8.8 - CentOS Stream 8 and 9 - + .. tab-item:: Processor Notes :sync: processor-notes - + | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: | `Product Specifications `__ - + .. tab-item:: Software :sync: software - + * `CMake 3.13 or higher, 64-bit `_ * GCC 8.2.0 * `Python 3.8 - 3.11, 64-bit `_ From c6ca7865fb9a0f1e9c7b46ea191f6155e37d0547 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 7 Nov 2023 21:44:09 +0400 Subject: [PATCH 225/275] [TF FE] Fix conversion of TF1 OD models out-of-the-box (#20916) * [TF FE] Fix conversion of TF1 OD models out-of-the-box Signed-off-by: Kazantsev, Roman * Add test While with nested If operation Signed-off-by: Kazantsev, Roman * Update tests/layer_tests/tensorflow_tests/test_tf_While.py --------- Signed-off-by: Kazantsev, Roman --- .../transformations/switch_merge_resolve.cpp | 11 +++ .../const_to_result_remover.cpp | 2 + .../tensorflow_tests/test_tf_While.py | 76 +++++++++++++++++++ 3 files changed, 89 insertions(+) diff --git a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp index af03b24519c29d..35c2cd1b7f23e1 100644 --- a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp +++ b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp @@ -15,6 +15,7 @@ #include "openvino/op/if.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/result.hpp" +#include "openvino/op/util/multi_subgraph_base.hpp" #include "tf_utils.hpp" using namespace ov; @@ -151,6 +152,16 @@ void insert_result_before_merge(const shared_ptr& merge_node, } // namespace bool pass::SwitchMergeResolver::run_on_model(const shared_ptr& m) { + // run this transformation recursively since this is a model pass + for (const auto& op : m->get_ordered_ops()) { + auto multisubgraph_op = as_type_ptr(op); + if (multisubgraph_op) { + for (size_t i = 0; i < multisubgraph_op->get_internal_subgraphs_size(); ++i) { + run_on_model(multisubgraph_op->get_function(static_cast(i))); + } + } + } + // split set of Switch and Merge nodes to clusters // where each cluster of Switch and Merge nodes will represent // the single If operation for fusing diff --git a/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp b/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp index 1963bcf47dae22..d16152ca492246 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp @@ -16,6 +16,8 @@ namespace tensorflow { namespace pass { bool ConstToResultRemover::run_on_model(const std::shared_ptr& m) { + // Note: need to perform this transformation only on the main ov::Model graph + // no need to apply it for sub-graphs! ResultVector results_to_remove; // look for isolated UnsupportedConst->Result sub-graphs to remove // also, find isolated Constant->Result sub-graphs to remove diff --git a/tests/layer_tests/tensorflow_tests/test_tf_While.py b/tests/layer_tests/tensorflow_tests/test_tf_While.py index 2a112700f30ad5..d4aaedf86854e6 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_While.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_While.py @@ -50,6 +50,7 @@ def body(x, y): test_data_basic = [ dict(y_shape=[2, 3], data_type=np.int32, lower_control_flow=False), + dict(y_shape=[2, 3], data_type=np.int32, lower_control_flow=True), dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=False), dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=True) ] @@ -109,6 +110,7 @@ def body(x, y): test_data_basic = [ dict(y_shape=[2, 3], lower_control_flow=False), + dict(y_shape=[2, 3], lower_control_flow=True), dict(y_shape=[2, 1, 4], lower_control_flow=False), dict(y_shape=[2, 1, 4], lower_control_flow=True) ] @@ -122,3 +124,77 @@ def test_while_basic(self, params, ie_device, precision, ir_version, temp_dir, self._test(*self.create_while_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_new_frontend=use_new_frontend, use_old_api=use_old_api) + + +class TestWhileWithNestedIf(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info, "Test error: inputs_info must contain `x`" + assert 'y' in inputs_info, "Test error: inputs_info must contain `y`" + x_shape = inputs_info['x'] + y_shape = inputs_info['y'] + inputs_data = {} + inputs_data['x'] = np.random.randint(1, 10, x_shape).astype(np.int32) + inputs_data['y'] = np.random.randint(-50, 50, y_shape).astype(np.int32) + return inputs_data + + def create_while_with_nested_if_net(self, y_shape, data_type, lower_control_flow): + from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 + def while_function(x, y): + @tf.function + def cond(x, y): + return tf.less(x, 10) + + @tf.function + def body(x, y): + # create If operation inside While body + # use different logic for updating y based on x + def if_op(cond, y): + def then_branch(): + y_new = tf.multiply(y, tf.constant(2, dtype=data_type)) + return y_new + + def else_branch(): + y_new = tf.subtract(y, tf.constant(55, dtype=data_type)) + return y_new + + if_op = tf.cond(cond, then_branch, else_branch) + output = tf.identity(if_op, name='if_op') + return output + + y_new = tf.add(y, tf.constant(2, dtype=data_type)) + cond = tf.less(x, 5) + y_new = if_op(cond, y_new) + x_new = tf.add(x, 1) + return x_new, y_new + + return tf.while_loop(cond, body, [x, y]) + + tf_while_graph = tf.function(while_function) + x = np.random.randint(9, 10, []).astype(data_type) + y = np.random.randint(-50, 50, y_shape).astype(data_type) + concrete_func = tf_while_graph.get_concrete_function(x, y) + + # lower_control_flow defines representation of While operation + # in case of lower_control_flow=True it is decomposed into LoopCond, NextIteration and TensorArray operations + frozen_func = convert_variables_to_constants_v2(concrete_func, + lower_control_flow=lower_control_flow) + + graph_def = frozen_func.graph.as_graph_def(add_shapes=True) + return graph_def, None + + test_data_basic = [ + dict(y_shape=[2, 3], data_type=np.int32, lower_control_flow=False), + dict(y_shape=[2, 3], data_type=np.int32, lower_control_flow=True), + dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=False), + dict(y_shape=[2, 1, 4], data_type=np.int32, lower_control_flow=True) + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + @pytest.mark.skipif(platform == 'darwin', reason="Ticket - 122182") + def test_while_with_nested_if_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_while_with_nested_if_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) From c42a88a190e1bda0c8b210c906e0b54bef6c2e09 Mon Sep 17 00:00:00 2001 From: Paul Youngsoo Ahn Date: Wed, 8 Nov 2023 08:11:08 +0900 Subject: [PATCH 226/275] Support dynamic tensor_iterator (#20869) * [GPU] Support dynamic tensoriterator with -1 num_iteration - remove redundant codes * [GPU] Refactoring methods for pre_process / post_process for body_network * Add unit test for dynamic tensoriterator wo trip_count_id * Follow-up code review * Set inner network in loading of model cache * Fix legacy loop unit tests --- .../include/intel_gpu/primitives/loop.hpp | 23 + .../intel_gpu/src/graph/CMakeLists.txt | 1 + .../intel_gpu/src/graph/impls/common/loop.cpp | 156 ++----- .../intel_gpu/src/graph/include/loop_inst.h | 212 +++------ src/plugins/intel_gpu/src/graph/loop.cpp | 419 +++++++++++++----- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 3 +- .../tests/unit/test_cases/loop_gpu_test.cpp | 124 +++++- 7 files changed, 549 insertions(+), 389 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp index 282147cc9e9d3d..899a4b6ce4c235 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/loop.hpp @@ -114,6 +114,29 @@ struct loop : public primitive_base { ib >> end; ib >> stride; } + + std::string to_string() const { + std::stringstream ss; + ss << "io_primitive_map " << std::endl; + ss << "* external_id : " << external_id.to_string() << std::endl; + ss << "* internal_id : " << internal_id.to_string() << std::endl; + ss << "* axis : " << axis << std::endl; + ss << "* start : " << start << std::endl; + ss << "* end : " << end << std::endl; + ss << "* stride : " << stride << std::endl; + return ss.str(); + } + + std::string to_short_string() const { + std::stringstream ss; + ss << "io_primitive_map[e:" << external_id.to_string(); + ss << "," << internal_id.to_string(); + ss << "," << axis; + ss << "," << start; + ss << "," << end; + ss << "," << stride << "]"; + return ss.str(); + } }; struct backedge_mapping { diff --git a/src/plugins/intel_gpu/src/graph/CMakeLists.txt b/src/plugins/intel_gpu/src/graph/CMakeLists.txt index 731b580718ea84..080804a1af49c9 100644 --- a/src/plugins/intel_gpu/src/graph/CMakeLists.txt +++ b/src/plugins/intel_gpu/src/graph/CMakeLists.txt @@ -37,6 +37,7 @@ target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL openvino::shape_infer target_link_libraries(${TARGET_NAME} PRIVATE openvino_intel_gpu_kernels openvino_intel_gpu_runtime openvino::itt + openvino::reference openvino::runtime::dev openvino::runtime) diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index b774b72dd506ec..119a186b71a8b7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -110,91 +110,12 @@ struct loop_impl : typed_primitive_impl { _back_edges = node.get_back_edges(); } - void set_memory_in_body_network(cldnn::network::ptr body_network, - const std::shared_ptr& inst, memory::ptr mem) const { - if (inst->is_input()) { - body_network->set_input_data(inst->id(), mem); - } else if (inst->is_output()) { - body_network->set_output_memory(inst->id(), mem); - } else { - inst->set_output_memory(mem, false); - } - } - - std::vector handle_buffers_for_next_iteration(const loop_inst::backedge_memory_mapping& mapping, - network::ptr body_network, int64_t iter, bool is_dynamic) const { - std::vector event_vec; - OPENVINO_ASSERT(iter >= 0, "iteration should not be negative : ", iter); - if (mapping.type == loop_inst::backedge_memory_mapping::CONCAT_OUTPUT) { - if (iter == 0) { - set_memory_in_body_network(body_network, mapping.to_primitive, mapping.initial_mem); - } else if (iter > 0) { - if (is_dynamic) { - auto from_id = mapping.from_primitive->id(); - if (body_network->has_event(from_id)) { - auto ev = body_network->get_primitive_event(from_id); - if (ev) ev->wait(); - } - // In dynamic model, just copy data from inner body output to inner body input in back_edges. - memory::ptr mem1 = mapping.to_primitive->output_memory_ptr(); - memory::ptr mem2 = mapping.from_primitive->output_memory_ptr(); - auto ev = mem1->copy_from(body_network->get_stream(), *(mem2)); - if (ev) event_vec = {ev}; - } else { - auto mem = mapping.concat_mem_mapping->get_sliced_mems().at(iter - 1); - set_memory_in_body_network(body_network, mapping.to_primitive, mem); - } - } - } else if (mapping.type == loop_inst::backedge_memory_mapping::SINGLE_SHARED) { - if (iter == 0) { - if (mapping.from_mem != nullptr) { - auto ev = mapping.from_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem)); - if (ev) event_vec = {ev}; - } - } else { - // In dynamic model, output memory is not defined before execution. - // After body network execution, replace input memory from initial_mem(external input memory) to output memory. - if (mapping.from_mem == nullptr) { - mapping.from_mem = mapping.from_primitive->output_memory_ptr(); - OPENVINO_ASSERT(mapping.from_mem != nullptr, "from_mem should not be null"); - set_memory_in_body_network(body_network, mapping.to_primitive, mapping.from_mem); - } - } - } else if (mapping.type == loop_inst::backedge_memory_mapping::SINGLE) { - memory::ptr mem1 = mapping.to_primitive->output_memory_ptr(); - if (iter == 0) { - auto ev = mem1->copy_from(body_network->get_stream(), *(mapping.initial_mem)); - if (ev) event_vec = {ev}; - } else { - if (is_dynamic) { - // In dynamic model, do not set memory buffer between input and output in inner body network. - // Just copy data from input buffer memory to output buffer memory. - auto from_id = mapping.from_primitive->id(); - if (body_network->has_event(from_id)) { - auto ev = body_network->get_primitive_event(from_id); - if (ev) ev->wait(); - } - memory::ptr mem2 = mapping.from_primitive->output_memory_ptr(); - auto ev = mem1->copy_from(body_network->get_stream(), *(mem2)); - if (ev) event_vec = {ev}; - } else { - // In static model, swap memory buffer between output and input in inner body network - memory::ptr mem2 = mapping.from_primitive->output_memory_ptr(); - set_memory_in_body_network(body_network, mapping.to_primitive, std::move(mem2)); - set_memory_in_body_network(body_network, mapping.from_primitive, std::move(mem1)); - } - } - } - return event_vec; - } - event::ptr execute_impl(const std::vector& events, loop_inst& instance) override { const auto& impl_params = instance.get_impl_params(); const auto& primitive = impl_params->typed_desc(); auto& outer_network = instance.get_network(); auto& stream = outer_network.get_stream(); - const auto max_num_iterations = primitive->max_num_iterations; auto body_network = instance.get_body_network(); int64_t current_iteration_idx = 0; @@ -202,6 +123,9 @@ struct loop_impl : typed_primitive_impl { OPENVINO_ASSERT(!primitive->num_iteration_id.empty(), "loop operation should have num_iteration_id"); + auto num_iterations = instance.get_num_iterations(); + GPU_DEBUG_LOG << "num_iterations : " << num_iterations << std::endl; + ////////////////////////////////////////// // memory pointers for outer network ////////////////////////////////////////// @@ -211,8 +135,16 @@ struct loop_impl : typed_primitive_impl { memory::ptr trip_count_mem = outer_network.get_primitive(primitive->trip_count_id)->output_memory_ptr(); trip_count = read_scalar_value(std::move(trip_count_mem), stream); } else { - trip_count = max_num_iterations; + OPENVINO_ASSERT(!primitive->body_execution_condition_id.empty() + || num_iterations > 0 || primitive->max_num_iterations > 0, + "num_iterations should be positive when trip_count_id is not existed"); + // If trip_count_id is not existed, the original ngraph operation is TensorIterator. + // If num_iterations is negative, it means that TensorIterator has no concat input / output memory. + // When it has no body_exeuction_conditio_id and num_iterations and primtive->max_num_iteartion, + // TensorIterator has no ending condition. So it cannot terminate inner body execution loop. + trip_count = num_iterations > 0 ? num_iterations : primitive->max_num_iterations; } + GPU_DEBUG_LOG << "trip_count : " << trip_count << std::endl; // read initial execution condition from outer network int64_t execution_condition = 1; @@ -220,6 +152,7 @@ struct loop_impl : typed_primitive_impl { memory::ptr first_execution_condition_mem = outer_network.get_primitive(primitive->first_execution_condition_id)->output_memory_ptr(); execution_condition = read_scalar_value(first_execution_condition_mem, stream); } + GPU_DEBUG_LOG << "execution_condition: " << execution_condition << std::endl; // When execution_condition is false or trip_count is zero, return execute_impl without any body_network execution. if (!execution_condition || trip_count == 0) { @@ -257,17 +190,16 @@ struct loop_impl : typed_primitive_impl { } if (!instance.preproc_memories_done) { - instance.preprocess_output_memory(trip_count); - instance.preprocess_input_memory(trip_count); + instance.preprocess_output_memory(num_iterations); + instance.preprocess_input_memory(num_iterations); instance.preprocess_backedge_memory(); instance.preproc_memories_done = true; } const auto& concatenated_input_mem_mappings = instance.concatenated_input_mem_mappings; - const auto& concatenated_output_mem_mappings = instance.concatenated_output_mem_mappings; const auto& backedge_memory_mappings = instance.backedge_memory_mappings; - // If there are concatenated_output_mem_mappings or backedge_memory_mappings we need to wait for + // If there are concatenated_input_mem_mappings or backedge_memory_mappings we need to wait for // previous tasks before accessing memory in get_sliced_mem() and setup_iteration() functions if (!concatenated_input_mem_mappings.empty() || !backedge_memory_mappings.empty()) { for (auto& e : events) { @@ -278,36 +210,18 @@ struct loop_impl : typed_primitive_impl { // Set sliced input data for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) { const auto& concatenated_input = concatenated_input_mem_mappings.at(i); + concatenated_input->slice_mem(num_iterations); memory::ptr mem = concatenated_input->get_sliced_mem(0); OPENVINO_ASSERT(mem != nullptr, instance.id(), "sliced input memory of loop is not allocated properly"); - body_network->set_input_data(concatenated_input->sliced_data_prim->id(), mem); + body_network->set_input_data(concatenated_input->get_sliced_data_prim_id(), mem); } std::vector all_events; std::vector loop_carried_dep(events.begin(), events.end()); - while (((trip_count <= 0) || (current_iteration_idx < trip_count)) && execution_condition) { - // Copy & Set sliced input memory - for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) { - const auto& concatenated_input = concatenated_input_mem_mappings.at(i); - memory::ptr mem = concatenated_input->get_sliced_mem(current_iteration_idx); - OPENVINO_ASSERT(mem != nullptr, instance.id(), " sliced input memory of loop is not allocated properly"); - concatenated_input->sliced_data_prim->set_output_memory(mem); - } - - // Set backedges and output memory - for (auto& backedge_memory_mapping : backedge_memory_mappings) { - auto event_vec = handle_buffers_for_next_iteration(backedge_memory_mapping, body_network, current_iteration_idx, is_dynamic); - for (auto ev : event_vec) { - loop_carried_dep.push_back(ev); - } - } - - if (!is_dynamic) { - // Set sliced output memory for static shape model - // because body network generate output memory during the body network execution in dynamic model - for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) { - concat_output_mem_mapping->setup_sliced_output_memory(current_iteration_idx); - } + while (((trip_count < 0) || (current_iteration_idx < trip_count)) && execution_condition) { + auto prev_events = instance.preprocess_memory_for_body_network(current_iteration_idx); + for (auto& ev : prev_events) { + loop_carried_dep.push_back(ev); } // execute body network @@ -335,22 +249,10 @@ struct loop_impl : typed_primitive_impl { // After execution of body network, sliced_data_prim will has output memory buffer // current memory buffer move to sliced_mems and new memory buffer will be allocated in sliced_data_prim if (is_dynamic) { - for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) { - auto sliced_data_prim = concat_output_mem_mapping->sliced_data_prim; - auto output_mem_ptr = sliced_data_prim->output_memory_ptr(); - - auto sliced_id = sliced_data_prim->id(); - if (body_network->has_event(sliced_id)) { - auto ev = body_network->get_primitive_event(sliced_id); - if (ev) ev->wait(); - } - memory::ptr new_sliced_mem = concat_output_mem_mapping->get_or_create_sliced_mem(current_iteration_idx, - output_mem_ptr->get_layout()); - auto ev = new_sliced_mem->copy_from(body_network->get_stream(), *output_mem_ptr); - if (ev) { - loop_carried_dep.push_back(ev); - all_events.push_back(ev); - } + auto post_events = instance.postprocess_memory_for_body_network(current_iteration_idx); + for (auto& ev : post_events) { + loop_carried_dep.push_back(ev); + all_events.push_back(ev); } } @@ -364,7 +266,7 @@ struct loop_impl : typed_primitive_impl { execution_condition = read_scalar_value(body_execution_condition_mem, body_network->get_stream()); } GPU_DEBUG_IF(!execution_condition) { - GPU_DEBUG_LOG << "body_exec_condition is false at "<< current_iteration_idx << " iterations" << std::endl; + GPU_DEBUG_LOG << "body_exec_condition is false at "<< current_iteration_idx << " iteration idx" << std::endl; } current_iteration_idx++; @@ -378,12 +280,12 @@ struct loop_impl : typed_primitive_impl { // update num_iterations (actual number of iterations) memory::ptr num_actual_iterations_mem = outer_network.get_primitive(primitive->num_iteration_id)->output_memory_ptr(); write_scalar_value(num_actual_iterations_mem, stream, current_iteration_idx); - GPU_DEBUG_LOG << "current_iteration(" << primitive->num_iteration_id << ", " + GPU_DEBUG_LOG << "current_iteration_idx(" << primitive->num_iteration_id << ", " << num_actual_iterations_mem << ") : " << current_iteration_idx << std::endl; if (is_dynamic) instance.update_output_layout(); - instance.postprocess_output_memory(is_dynamic); + instance.postprocess_output_memory(is_dynamic, current_iteration_idx); ev->set(); return ev; diff --git a/src/plugins/intel_gpu/src/graph/include/loop_inst.h b/src/plugins/intel_gpu/src/graph/include/loop_inst.h index b41f58accc65e0..ba39207199476c 100644 --- a/src/plugins/intel_gpu/src/graph/include/loop_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/loop_inst.h @@ -107,137 +107,50 @@ class typed_primitive_inst : public typed_primitive_inst_base { struct concatenated_memory_mapping { using ptr = std::shared_ptr; using cptr = std::shared_ptr; - concatenated_memory_mapping(int64_t axis, - memory::ptr concatenated_mem, - std::vector sliced_mems, // To change shared ptr vector + concatenated_memory_mapping(memory::ptr concatenated_mem, + std::vector sliced_mems, stream& stream, engine& engine, - int64_t iteration_elements = 0, - int64_t stride = 0, - int64_t initial_offset = 0) : - axis(axis), + std::shared_ptr concat_data_prim, + std::shared_ptr sliced_data_prim, + const cldnn::loop::io_primitive_map& io_prim_map) : concatenated_mem(concatenated_mem), sliced_mems(sliced_mems), stream(stream), engine(engine), - iteration_elements(iteration_elements), - stride(stride), - initial_offset(initial_offset) { - calculate_concatenated_mem(); - } + concat_data_prim(std::move(concat_data_prim)), + sliced_data_prim(std::move(sliced_data_prim)), + io_prim_map(io_prim_map) {} concatenated_memory_mapping(const concatenated_memory_mapping& o) : - axis(o.axis), - concat_data_prim(o.concat_data_prim), - sliced_data_prim(o.sliced_data_prim), - concatenated_mem(o.concatenated_mem), sliced_mems(o.sliced_mems), stream(o.stream), engine(o.engine), - iteration_elements(o.iteration_elements), - stride(o.stride), - initial_offset(o.initial_offset), - - bytes_per_element(o.bytes_per_element), - batch_size(o.batch_size), - bytes_batch_stride(o.bytes_batch_stride), - bytes_iteration(o.bytes_iteration), - bytes_iteration_stride(o.bytes_iteration_stride), - bytes_iteration_initial_offset(o.bytes_iteration_initial_offset) {} - - - static int64_t get_batch_size(layout mem_layout, int64_t axis) { - if (axis < 0) { - throw std::runtime_error("axis should be positive integer or zero"); - } - - if (mem_layout.is_dynamic()) { - return -1; - } - - int64_t batch_size = 1; - for (int64_t i = 0; i < axis; ++i) { - batch_size *= mem_layout.get_tensor().raw[i]; - } - for (int64_t i = axis-1; i >= 2; --i) { - batch_size *= mem_layout.get_tensor().raw[i]; - } - return batch_size; - } - - void calculate_concatenated_mem() const { - if (!sliced_mems.empty() && concatenated_mem != nullptr) { - auto& sliced_layout = sliced_mems.front()->get_layout(); - const int64_t num_elements_batch = get_batch_size(sliced_layout, axis); - iteration_elements = sliced_layout.count() / num_elements_batch; - bytes_per_element = data_type_traits::size_of(concatenated_mem->get_layout().data_type); - batch_size = get_batch_size(concatenated_mem->get_layout(), axis); - bytes_batch_stride = (static_cast(concatenated_mem->get_layout().count()) / batch_size) * bytes_per_element; - bytes_iteration = iteration_elements * bytes_per_element; - bytes_iteration_stride = stride * bytes_iteration; - bytes_iteration_initial_offset = initial_offset * bytes_iteration; - } - } + concat_data_prim(o.concat_data_prim), + sliced_data_prim(o.sliced_data_prim), + io_prim_map(o.io_prim_map) {} void update_concatenated_mem(memory::ptr mem) { - if (concatenated_mem != nullptr && concatenated_mem->get_layout() == mem->get_layout()) { - concatenated_mem = mem; - } else { - concatenated_mem = mem; - calculate_concatenated_mem(); - } + concatenated_mem = mem; } - void restore_concatenated_mem() const { - OPENVINO_ASSERT(concatenated_mem != nullptr, "concatenated_mem should not be nullptr"); - mem_lock concat_mem_lock{ concatenated_mem, stream }; - int64_t iteration_offset = bytes_iteration_initial_offset; - for (const auto& sliced_mem : sliced_mems) { - // To support multi-batch, just repeat memcpy for each batch - for (int64_t batch = 0; batch < batch_size; ++batch) { - const int64_t src_offset = batch * bytes_iteration; - const int64_t dst_offset = batch * bytes_batch_stride + iteration_offset; - mem_lock sliced_mem_lock{ sliced_mem, stream }; - uint8_t* src = sliced_mem_lock.data() + src_offset; - uint8_t* dst = concat_mem_lock.data() + dst_offset; - std::copy(src, src + bytes_iteration, dst); - } - iteration_offset += bytes_iteration_stride; - } - } + void slice_mem(const int64_t num_iteration) const; + void concat_mem(const int64_t curent_iterations) const; // Get sliced mem for the iteration idx and copy data from external input to sliced mem // In the case of dynamic model, concatenated_mem is always non nullptr. memory::ptr get_sliced_mem(int64_t iteration) const { - OPENVINO_ASSERT(!sliced_mems.empty(), "For input data, sliced_mems should not be empty"); - mem_lock from_lock{ concatenated_mem, stream }; - int64_t batch_offset = 0; - auto sliced_mem = get_or_create_sliced_mem(iteration, sliced_mems.front()->get_layout()); - const int64_t iteration_offset = bytes_iteration_initial_offset + - bytes_iteration_stride * iteration; - // To support multi-batch, just repeat memcpy for each batch - for (int64_t batch = 0; batch < batch_size; ++batch) { - const int64_t src_offset = batch_offset + iteration_offset; - const int64_t dst_offset = batch * bytes_iteration; - mem_lock to_lock{ sliced_mem, stream }; - const auto src = from_lock.begin() + src_offset; - const auto dst = to_lock.begin() + dst_offset; - std::copy(src, src + bytes_iteration, dst); - batch_offset += bytes_batch_stride; - } - return sliced_mem; + OPENVINO_ASSERT(static_cast(iteration) < sliced_mems.size(), "invalid itertion(", iteration, + ") for sliced_mes(", sliced_mems.size(), ")"); + return sliced_mems.at(iteration);; } memory::ptr get_or_create_sliced_mem(int64_t idx, const layout& mem_layout) const { - bool recalc_data = !sliced_mems.empty(); while (sliced_mems.size() <= static_cast(idx)) { memory::ptr sliced_mem = engine.allocate_memory(mem_layout, 0); sliced_mems.push_back(sliced_mem); } - if (recalc_data) { - calculate_concatenated_mem(); - } return sliced_mems.at(idx); } @@ -252,78 +165,48 @@ class typed_primitive_inst : public typed_primitive_inst_base { std::vector& get_sliced_mems() const { return sliced_mems; } void reset_data_for_shape_changed() { - bytes_per_element = 0; - batch_size = 0; - bytes_batch_stride = 0; - bytes_iteration = 0; - bytes_iteration_stride = 0; - bytes_iteration_initial_offset = 0; if (concatenated_mem) concatenated_mem = nullptr; - iteration_elements = 0; sliced_mems.clear(); } + const input_info& get_external_id() { + return io_prim_map.external_id; + } + std::string to_string() const { std::stringstream ss; ss << "concatenated_memory_mapping [" << std::endl; - ss << "* axis : " << axis << std::endl; - ss << "* bytes_per_element : " << bytes_per_element << std::endl; - ss << "* batch_size : " << batch_size << std::endl; - if (concatenated_mem != nullptr && concatenated_mem->get_layout().is_static()) { - ss << "* bytes_batch_stride : " << bytes_batch_stride << " = (static_cast(" - << concatenated_mem->get_layout().count() << ") / batch_size:" << batch_size << ") * bytes_per_element:" << bytes_per_element << std::endl; - } else { - ss << "* bytes_batch_stride : " << bytes_batch_stride << std::endl; - } - ss << "* bytes_iteration : " << bytes_iteration << " = (iteration_elements:" - << iteration_elements << " * bytes_per_element:" << bytes_per_element << ")" << std::endl; - ss << "* bytes_iteration_stride : " << bytes_iteration_stride << std::endl; - ss << "* bytes_iteration_initial_offset : " << bytes_iteration_initial_offset << std::endl; ss << "* concat_data_prim : " << ((concat_data_prim != nullptr)? concat_data_prim->id() : "nullptr") << std::endl; ss << "* sliced_data_prim : " << ((sliced_data_prim != nullptr)? sliced_data_prim->id() : "nullptr") << std::endl; - if (concatenated_mem) { - ss << "* concatenated_mem : " << concatenated_mem->get_layout().to_short_string() << std::endl; - } else { - ss << "* concatenated_mem : nullptr" << std::endl; - } - ss << "* iteration_elements : " << iteration_elements << std::endl; - ss << "* stride : " << stride << std::endl; - ss << "* initial_offset : " << initial_offset << std::endl; - ss << "* input_info : " << concat_data_id.to_string() << std::endl; + ss << "* concatenated_mem : " + << ((concatenated_mem != nullptr)? concatenated_mem->get_layout().to_short_string() : "nullptr") << std::endl; ss << "* sliced_mems :{ "; for (auto mem : sliced_mems) { ss << mem->get_layout().to_short_string() << ","; } + ss << "* io_prim_map : " << io_prim_map.to_string() << std::endl; ss << "}]" << std::endl; return ss.str(); } - const int64_t axis; - std::shared_ptr concat_data_prim; - std::shared_ptr sliced_data_prim; - cldnn::input_info concat_data_id; + std::shared_ptr get_sliced_data_prim() { + OPENVINO_ASSERT(sliced_data_prim != nullptr, "sliced_data_prim should not be nullptr"); + return sliced_data_prim; + } + + primitive_id get_sliced_data_prim_id() { + OPENVINO_ASSERT(sliced_data_prim != nullptr, "sliced_data_prim should not be nullptr"); + return sliced_data_prim->id(); + } private: mutable memory::ptr concatenated_mem; mutable std::vector sliced_mems; cldnn::stream& stream; cldnn::engine& engine; - mutable int64_t iteration_elements = 0; - const int64_t stride = 0; - const int64_t initial_offset = 0; - - // element size - mutable int64_t bytes_per_element; - // number of higher level of dimension of slicing axis - mutable int64_t batch_size; - // stride of batch in concatenated memory - mutable int64_t bytes_batch_stride; - // byte size of each iteration per batch in a sliced memory - mutable int64_t bytes_iteration; - // byte size of each iteration (bytes_iteration * batch_size) in a sliced memory - mutable int64_t bytes_iteration_stride; - // byte offset of 1st iteration in a batch in a sliced memory - mutable int64_t bytes_iteration_initial_offset; + std::shared_ptr concat_data_prim; + std::shared_ptr sliced_data_prim; + const cldnn::loop::io_primitive_map& io_prim_map; }; struct backedge_memory_mapping { @@ -420,18 +303,17 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: typed_primitive_inst(network& network, const loop_node& node); network::ptr get_body_network() const { return body_network; } - void preprocess_input_memory(const int64_t trip_count); - void preprocess_output_memory(const int64_t trip_count); + void preprocess_input_memory(const int64_t num_iteration); + void preprocess_output_memory(const int64_t num_iteration); void preprocess_backedge_memory(); void update_mapped_memory(); void update_input_mapped_memory(); void update_output_mapped_memory(); void update_backedge_mapped_memory(); - void postprocess_output_memory(bool is_dynamic); - concatenated_memory_mapping::ptr create_concat_memory_map(const input_info& id, - const cldnn::loop::io_primitive_map& io_prim_map, + void postprocess_output_memory(bool is_dynamic, int64_t current_iteration); + concatenated_memory_mapping::ptr create_concat_memory_map(const cldnn::loop::io_primitive_map& io_prim_map, memory::ptr mem_ptr, - const int64_t trip_count); + const int64_t num_iteration); event::ptr set_output_memory(memory::ptr mem, bool check = true, size_t idx = 0) override; void reset_memory(); @@ -442,11 +324,23 @@ class typed_primitive_inst : public typed_primitive_inst_base { void update_shape() override { primitive_inst::update_shape(); } void update_output_layout(); + // num_iteration is used for slicing input memory + int64_t get_num_iterations(); + + std::vector preprocess_memory_for_body_network(int64_t current_iteration_idx); + std::vector postprocess_memory_for_body_network(int64_t current_iteration_idx); + private: network::ptr body_network; memory::ptr get_external_memory(const primitive_id& external_id, size_t mem_idx = 0) const; layout get_external_output_layout(const primitive_id& external_id, size_t mem_idx = 0) const; std::shared_ptr get_sliced_mem(const primitive_id& internal_id) const; + int64_t calculate_num_iterations(const cldnn::loop::io_primitive_map& io_prim_map, ov::PartialShape& pshape); + std::vector handle_buffers_for_next_iteration(const backedge_memory_mapping& mapping, + network::ptr body_network, int64_t iter); + void set_memory_in_body_network(cldnn::network::ptr body_network, const std::shared_ptr& inst, + memory::ptr mem); + std::vector _input_primitive_maps; std::vector _output_primitive_maps; std::vector _back_edges; diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp index f3f29862cc5be9..e988c96799b931 100644 --- a/src/plugins/intel_gpu/src/graph/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/loop.cpp @@ -13,6 +13,8 @@ #include #include #include +#include "openvino/reference/concat.hpp" +#include "openvino/reference/split.hpp" namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(loop) @@ -40,61 +42,6 @@ std::map loop_node::get_memory_deps() const { return memory_deps; } -static size_t convert_to_raw_axis(size_t axis, size_t ndim) { - // convert between bfyx, bfzyx, bfzyxw and tensor.size.raw - if (axis >= ndim) { - throw std::runtime_error("axis should be less than ndim"); - } - - if (axis < 2) { - return axis; - } - return (ndim - 1) - (axis - 2); -} - -static bool check_if_axis_is_set_properly(loop_node const & node) { - const auto& input_primitive_maps = node.get_input_primitive_maps(); - - std::vector> input_with_axis_iteration; - for (const auto& input : input_primitive_maps) { - if (input.axis >= 0) { - input_with_axis_iteration.push_back(std::cref(input)); - } - } - - // check all iteration axis has the same size - const std::vector>& dependencies = node.get_dependencies(); - int32_t iteration_size = -1; - for (const auto& pm : input_with_axis_iteration) { - auto found = std::find_if(dependencies.begin(), dependencies.end(), - [&pm](const std::pair& dep){ return dep.first->id() == pm.get().external_id.pid; }); - assert(found != dependencies.end()); - const layout input_layout = (*found).first->get_output_layout(); - const auto shape = input_layout.get_tensor().sizes(input_layout.format); - const size_t iteration_axis = convert_to_raw_axis(pm.get().axis, static_cast(shape.size())); - if (iteration_size < 0) { - iteration_size = shape[iteration_axis]; - } else { - if (iteration_size != shape[iteration_axis]) { - return false; - } - } - } - - // check if size of iteration axis is 1 - for (const auto& input_ref : input_with_axis_iteration) { - const loop::io_primitive_map& input = input_ref.get(); - auto dep = std::find_if(dependencies.begin(), dependencies.end(), - [&input](const std::pair& dep) { return input.external_id.pid == dep.first->id(); }); - - // if corresponding external id is not found - if (dep == dependencies.end()) { - return false; - } - } - return true; -} - layout loop_inst::calc_output_layout(loop_node const& /*node*/, kernel_impl_params const& impl_param) { auto prim = impl_param.typed_desc(); @@ -293,7 +240,7 @@ void loop_inst::update_input_mapped_memory() { bool is_concatenated_input = (input_map->axis >= 0); if (is_concatenated_input) { for (auto& mem_mapping : concatenated_input_mem_mappings) { - if (mem_mapping->sliced_data_prim->id() == input_map->internal_id.pid) { + if (mem_mapping->get_sliced_data_prim_id() == input_map->internal_id.pid) { mem_mapping->update_concatenated_mem(memory); break; } @@ -320,7 +267,7 @@ void loop_inst::update_output_mapped_memory() { body_network->get_primitive(internal_id)->set_output_memory(to_mem, true, internal_mem_idx); } else { for (auto& mem_mapping : concatenated_output_mem_mappings) { - if (mem_mapping->sliced_data_prim->id() == internal_id) { + if (mem_mapping->get_sliced_data_prim_id() == internal_id) { mem_mapping->update_concatenated_mem(to_mem); break; } @@ -398,49 +345,45 @@ event::ptr loop_inst::set_output_memory(memory::ptr mem, bool check, size_t idx) return ev; } -loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(const input_info& internal_id, - const cldnn::loop::io_primitive_map& io_prim_map, - memory::ptr mem_ptr, - const int64_t trip_count) { +loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(const cldnn::loop::io_primitive_map& io_prim_map, + memory::ptr mem_ptr, + const int64_t num_iterations) { + const auto& external_id = io_prim_map.external_id; + const auto& internal_id = io_prim_map.internal_id; auto& engine = body_network->get_engine(); auto& stream = body_network->get_stream(); auto prim = body_network->get_primitive(internal_id.pid); - const int64_t start = io_prim_map.start < 0? trip_count - 1: io_prim_map.start; std::vector sliced_mems; - int64_t num_elements_iteration = 0; // if memory is nullptr, that means memory is not allocated yet because current network is dynamic shape model. // In dynamic model, we can't calculate num_element_iteration, start, and sliced_layout. // will recalculate that parameters in backedge preprocessing map after first execution. if (mem_ptr != nullptr) { - layout sliced_layout = prim->output_memory(internal_id.idx).get_layout(); + auto& out_mem = prim->output_memory(internal_id.idx); + layout sliced_layout = out_mem.get_layout(); // When trip_count is -1, allocate first sliced_mem and allocate sliced memory if additional sliced mem is required - if (trip_count < 0) { - memory::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); + if (num_iterations < 0) { + memory::ptr sliced_mem = engine.allocate_memory(sliced_layout); sliced_mems.push_back(sliced_mem); } else { - sliced_mems.reserve(trip_count); - for (int j=0; j < trip_count; ++j) { - memory::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); + sliced_mems.reserve(num_iterations); + for (int j=0; j < num_iterations; ++j) { + memory::ptr sliced_mem = engine.allocate_memory(sliced_layout); sliced_mems.push_back(sliced_mem); } } - - const int64_t num_elements_batch = concatenated_memory_mapping::get_batch_size( - sliced_layout, io_prim_map.axis); - num_elements_iteration = sliced_layout.count() / num_elements_batch; } - auto concat_memory_mapping = std::make_shared( - io_prim_map.axis, mem_ptr, sliced_mems, stream, - engine, num_elements_iteration, io_prim_map.stride, start); - concat_memory_mapping->sliced_data_prim = body_network->get_primitive(internal_id.pid); - return concat_memory_mapping; + auto sliced_data_prim = body_network->get_primitive(internal_id.pid); + auto concat_data_prim = get_network().get_primitive(external_id.pid); + auto concat_data_id = external_id; + return std::make_shared(mem_ptr, sliced_mems, stream, engine, + concat_data_prim, sliced_data_prim, io_prim_map); } -void loop_inst::preprocess_output_memory(const int64_t trip_count) { +void loop_inst::preprocess_output_memory(const int64_t num_iterations) { if (concatenated_output_mem_mappings.empty()) concatenated_output_mem_mappings.reserve(_output_primitive_maps.size()); for (size_t i = 0; i < _output_primitive_maps.size(); ++i) { @@ -459,12 +402,10 @@ void loop_inst::preprocess_output_memory(const int64_t trip_count) { } else { auto iter = std::find_if(concatenated_output_mem_mappings.begin(), concatenated_output_mem_mappings.end(), [&](loop_inst::concatenated_memory_mapping::ptr concat_mem_map) -> bool { - return concat_mem_map->sliced_data_prim->id() == internal_id.pid; + return concat_mem_map->get_sliced_data_prim_id() == internal_id.pid; }); if (iter == concatenated_output_mem_mappings.end()) { - auto memory_mapping_info = create_concat_memory_map(internal_id, output_mapping, memory, trip_count); - memory_mapping_info->concat_data_prim = get_network().get_primitive(external_id.pid); - memory_mapping_info->concat_data_id = external_id; + auto memory_mapping_info = create_concat_memory_map(output_mapping, memory, num_iterations); concatenated_output_mem_mappings.push_back(memory_mapping_info); GPU_DEBUG_LOG << i << ") generate concat output memory mapping: " << memory_mapping_info->to_string() << std::endl; } @@ -475,7 +416,7 @@ void loop_inst::preprocess_output_memory(const int64_t trip_count) { } } -void loop_inst::preprocess_input_memory(const int64_t trip_count) { +void loop_inst::preprocess_input_memory(const int64_t num_iterations) { for (size_t memory_num = 0; memory_num < inputs_memory_count(); memory_num++) { const primitive_id& input_external_id = dependencies().at(memory_num).first->id(); auto input_map_ptrs = find_io_primitive_maps(_input_primitive_maps, @@ -499,13 +440,7 @@ void loop_inst::preprocess_input_memory(const int64_t trip_count) { GPU_DEBUG_LOG << i << ") input mapping - external " << external_id.to_string() << std::endl; GPU_DEBUG_LOG << i << ") input mapping - internal " << internal_id.to_string() << std::endl; - if (input_map->axis >= 0) { - OPENVINO_ASSERT(trip_count > 0, "In preprocessing concat input mapping, trip_count should be positive"); - OPENVINO_ASSERT(memory != nullptr, "In preprocessing concat input mapping, concat memory should be allocated"); - auto memory_mapping_info = create_concat_memory_map(internal_id, *input_map, memory, trip_count); - concatenated_input_mem_mappings.push_back(memory_mapping_info); - GPU_DEBUG_LOG << i << ") generate concat input memory mapping: " << memory_mapping_info->to_string() << std::endl; - } else { + if (input_map->axis < 0) { auto input_inst = body_network->get_primitive(internal_id.pid); if (memory->get_layout() != input_inst->get_output_layout()) { input_inst->set_output_layout(memory->get_layout()); @@ -514,6 +449,11 @@ void loop_inst::preprocess_input_memory(const int64_t trip_count) { << " to " << memory->get_layout().to_short_string() << std::endl; } body_network->set_input_data(internal_id.pid, memory); + } else { + OPENVINO_ASSERT(memory != nullptr, "In preprocessing concat input mapping, concat memory should be allocated"); + auto memory_mapping_info = create_concat_memory_map(*input_map, memory, num_iterations); + concatenated_input_mem_mappings.push_back(memory_mapping_info); + GPU_DEBUG_LOG << i << ") generate concat input memory mapping: " << memory_mapping_info->to_string() << std::endl; } } } @@ -605,12 +545,12 @@ void loop_inst::preprocess_backedge_memory() { std::shared_ptr loop_inst::get_sliced_mem(const primitive_id& internal_id) const { for (const auto& mem_mapping : concatenated_input_mem_mappings) { - if (mem_mapping->sliced_data_prim->id() == internal_id) { + if (mem_mapping->get_sliced_data_prim_id() == internal_id) { return mem_mapping; } } for (const auto& mem_mapping : concatenated_output_mem_mappings) { - if (mem_mapping->sliced_data_prim->id() == internal_id) { + if (mem_mapping->get_sliced_data_prim_id() == internal_id) { return mem_mapping; } } @@ -625,7 +565,10 @@ void loop_inst::validate_backedges(loop_node const & node) const { for (const auto& back_edge : back_edges) { for (const auto& mapping : input_primitive_maps) { OPENVINO_ASSERT((mapping.internal_id.pid != back_edge.to || mapping.axis < 0), - node.id(), ": input with iteration axis should not have backedges"); + node.id(), ": input with iteration axis should not have backedges external_id: ", + mapping.external_id.to_string(), ", internal_id: ", mapping.internal_id.to_string(), + ", back_edge.to: ", back_edge.to, ", back_edge.from ", back_edge.from, + ", mapping.axis: ", std::to_string(mapping.axis)); } } } @@ -653,8 +596,6 @@ loop_inst::typed_primitive_inst(network & network, loop_node const & node) const primitive_id& num_iterations_id = node.get_num_iterations_id(); OPENVINO_ASSERT(node.get_program().get_node(num_iterations_id).is_type(), node.id(), ": num_iterations is not mutable_data"); - OPENVINO_ASSERT(check_if_axis_is_set_properly(node), node.id(), ": axis is not set properly"); - set_inner_networks({body_network}); validate_backedges(node); validate_mappings(node); @@ -694,9 +635,11 @@ void loop_inst::load(BinaryInputBuffer& ib) { ib >> _condition_id; ib >> _num_iterations_id; body_network = std::make_shared(ib, get_network().get_stream_ptr(), get_network().get_engine(), get_network().is_primary_stream(), 0); + // set inner network to the new loaded _impl_params from cache. + set_inner_networks({body_network}); } -void loop_inst::postprocess_output_memory(bool is_dynamic) { +void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_iteration) { if (is_dynamic) { std::vector external_outputs; external_outputs.resize(outputs_memory_count()); @@ -733,7 +676,7 @@ void loop_inst::postprocess_output_memory(bool is_dynamic) { auto iter = std::find_if(concatenated_output_mem_mappings.begin(), concatenated_output_mem_mappings.end(), [&](std::shared_ptr &concat_output){ - return concat_output->concat_data_id == external_id; + return concat_output->get_external_id() == external_id; }); if (iter != concatenated_output_mem_mappings.end()) { (*iter)->update_concatenated_mem(concat_mem); @@ -748,7 +691,7 @@ void loop_inst::postprocess_output_memory(bool is_dynamic) { for (size_t i = 0; i < concatenated_output_mem_mappings.size(); ++i) { const auto& concat_output = concatenated_output_mem_mappings.at(i); - concat_output->restore_concatenated_mem(); + concat_output->concat_mem(current_iteration); } } @@ -793,4 +736,282 @@ void loop_inst::update_output_layout() { } } } + +void loop_inst::concatenated_memory_mapping::slice_mem(const int64_t num_iterations) const { + size_t num_iters = static_cast(num_iterations); + OPENVINO_ASSERT(num_iters > 0 && num_iters == sliced_mems.size(), "num_iterations(", num_iters, + ") should be same with sliced_mems.size(", sliced_mems.size(), ")"); + OPENVINO_ASSERT(concatenated_mem != nullptr, "concatenated_mem should not be nullptr"); + + auto elem_size = ov::element::Type(concatenated_mem->get_layout().data_type).size(); + auto concat_mem_shape = concatenated_mem->get_layout().get_shape(); + auto sliced_mem_shape = sliced_mems.front()->get_layout().get_shape(); + const auto stride = io_prim_map.stride; + const auto axis = io_prim_map.axis; + const auto step = std::abs(stride); + OPENVINO_ASSERT((static_cast(step) == sliced_mem_shape[axis]) + && (concat_mem_shape[axis] >= num_iterations * sliced_mem_shape[axis]), + "slice_mem: concat_mem_shape[axis(", axis, "),step(", step, ")](", + concat_mem_shape.to_string(), ") != num_iterations(", + num_iterations, ") * sliced_mem_shape[axis](", sliced_mem_shape.to_string(), ")"); + std::vector pointers_to_data(num_iters); + for (size_t i = 0; i < num_iters; i++) { + auto mem = sliced_mems[i]; + pointers_to_data[stride > 0 ? i : (num_iters - i - 1)] = reinterpret_cast(mem->lock(stream)); + } + char* concat_data = reinterpret_cast(concatenated_mem->lock(stream, cldnn::mem_lock_type::read)); + ov::reference::split(concat_data, concat_mem_shape, elem_size, axis, num_iters, pointers_to_data.data()); + + for (size_t i = 0; i < num_iters; i++) { + sliced_mems[i]->unlock(stream); + } + concatenated_mem->unlock(stream); + GPU_DEBUG_LOG << "slice memory [" << io_prim_map.to_short_string() << "] from concat_mem[" + << concatenated_mem->get_layout().to_short_string() + << "], current_iteration: " << num_iterations << ", stride: " << stride + << " to sliced_mems[" << sliced_mems.front()->get_layout().to_short_string() << "]" << std::endl; +} + +void loop_inst::concatenated_memory_mapping::concat_mem(const int64_t curent_iterations) const { + size_t curr_iters = static_cast(curent_iterations); + OPENVINO_ASSERT(sliced_mems.size() >= curr_iters, "curent_iterations(", curr_iters, + ") should be less than the number of sliced_mems(", sliced_mems.size(), ")"); + OPENVINO_ASSERT(concatenated_mem != nullptr, "concatenated_mem should not be nullptr"); + + auto elem_size = ov::element::Type(concatenated_mem->get_layout().data_type).size(); + auto concat_mem_shape = concatenated_mem->get_layout().get_shape(); + auto sliced_mem_shape = sliced_mems.front()->get_layout().get_shape(); + const auto stride = io_prim_map.stride; + const auto axis = io_prim_map.axis; + const auto step = std::abs(stride); + OPENVINO_ASSERT((static_cast(step) == sliced_mem_shape[axis]) + && (concat_mem_shape[axis] >= curent_iterations * sliced_mem_shape[axis]), + "concat_mem: concat_mem_shape[axis(", axis, "),step(", step, ")](", + concat_mem_shape.to_string(), ") != curent_iterations(", + curent_iterations, ") * sliced_mem_shape[axis](", sliced_mem_shape.to_string(), ")"); + std::vector shapes_to_concat(curr_iters, sliced_mem_shape); + std::vector pointers_to_data(curr_iters); + for (size_t i = 0; i < curr_iters; i++) { + auto mem = sliced_mems[i]; + pointers_to_data[stride > 0 ? i : (curr_iters - i - 1)] = reinterpret_cast(mem->lock(stream)); + } + + char* concat_data = reinterpret_cast(concatenated_mem->lock(stream)); + ov::reference::concat(pointers_to_data, concat_data, shapes_to_concat, concat_mem_shape, axis, elem_size); + + for (size_t i = 0; i < curr_iters; i++) { + sliced_mems[i]->unlock(stream); + } + concatenated_mem->unlock(stream); + GPU_DEBUG_LOG << "concatenate memory [" << io_prim_map.to_short_string() << "] from sliced_mems[" + << sliced_mems.front()->get_layout().to_short_string() << "], current_iteration: " + << curent_iterations << ", stride: " << stride << " to concat_mem[" + << concatenated_mem->get_layout().to_short_string() << "]" << std::endl; +} + +int64_t loop_inst::calculate_num_iterations(const cldnn::loop::io_primitive_map& io_prim_map, + ov::PartialShape& pshape) { + OPENVINO_ASSERT(io_prim_map.stride != 0, "stride should not be zero"); + const auto space = pshape[io_prim_map.axis].get_length(); + const auto start = (io_prim_map.start < 0? (space + 1) : 0) + io_prim_map.start; + const auto end = (io_prim_map.end < 0? (space + 1) : 0) + io_prim_map.end; + const auto step = std::abs(io_prim_map.stride); + const auto src = io_prim_map.stride < 0 ? end : start; + const auto dst = io_prim_map.stride < 0 ? start : end; + const auto len = dst - src; + OPENVINO_ASSERT(src >= 0 && dst > src && dst <= space && len >= static_cast(step), + "invalid values in an iteration component start:", + io_prim_map.start, ", end: ", io_prim_map.end, ", stride:", + io_prim_map.stride, ", axis: ", io_prim_map.axis, ", dst: ", + dst, ", src: ", src, ", space: ", space, ", len: ", + len, ", step: ", step, ", pshape: ", pshape.to_string()); + OPENVINO_ASSERT(len % step == 0, "Each iteration should have same size: length(", len, ") % step(", step, ")"); + int64_t num_iterations = static_cast(len / step); + { + GPU_DEBUG_LOG << "Caculate num_iterations ..." << std::endl; + GPU_DEBUG_LOG << "* io_prim_map.{start:" << io_prim_map.start << ", end:" << io_prim_map.end + << ", stride: " << io_prim_map.stride << ", axis: " << io_prim_map.axis << "}" << std::endl; + GPU_DEBUG_LOG << "* pshape : " << pshape.to_string() << std::endl; + GPU_DEBUG_LOG << "* space : " << space << std::endl; + GPU_DEBUG_LOG << "* start : " << start << std::endl; + GPU_DEBUG_LOG << "* end : " << end << std::endl; + GPU_DEBUG_LOG << "* step : " << step << std::endl; + GPU_DEBUG_LOG << "* src : " << src << std::endl; + GPU_DEBUG_LOG << "* dst : " << dst << std::endl; + GPU_DEBUG_LOG << "* len : " << len << std::endl; + GPU_DEBUG_LOG << "* num_iterations : " << num_iterations << std::endl; + } + return num_iterations; +} + +int64_t loop_inst::get_num_iterations() { + int64_t num_iterations = -1; + bool is_default_num_iter = true; + for (auto& input_map : _input_primitive_maps) { + if (input_map.axis == -1) + continue; + const auto& external_id = input_map.external_id; + auto exteranl_input_inst = get_network().get_primitive(external_id.pid); + auto concat_shape = exteranl_input_inst->get_output_layout(external_id.idx).get_partial_shape(); + + if (concat_shape[input_map.axis].get_length() == 0) + continue; + + const auto current_num_iterations = calculate_num_iterations(input_map, concat_shape); + if (is_default_num_iter) { + is_default_num_iter = false; + num_iterations = current_num_iterations; + } + OPENVINO_ASSERT(num_iterations == current_num_iterations, + "iteration num shuld be same between ", num_iterations, " and ", current_num_iterations); + } + + for (auto& output_map : _output_primitive_maps) { + if (output_map.axis == -1) + continue; + + const auto& external_id = output_map.external_id; + auto exteranl_output_inst = get_network().get_primitive(external_id.pid); + auto concat_shape = exteranl_output_inst->get_output_layout(external_id.idx).get_partial_shape(); + + if (concat_shape[output_map.axis].is_dynamic() || concat_shape[output_map.axis].get_length() == 0) + continue; + + const auto current_num_iterations = calculate_num_iterations(output_map, concat_shape); + if (is_default_num_iter) { + is_default_num_iter = false; + num_iterations = current_num_iterations; + } + OPENVINO_ASSERT(num_iterations == current_num_iterations, + "iteration num shuld be same between ", num_iterations, " and ", current_num_iterations); + } + return num_iterations; +} + +void loop_inst::set_memory_in_body_network(cldnn::network::ptr body_network, + const std::shared_ptr& inst, memory::ptr mem) { + if (inst->is_input()) { + body_network->set_input_data(inst->id(), mem); + } else if (inst->is_output()) { + body_network->set_output_memory(inst->id(), mem); + } else { + inst->set_output_memory(mem, false); + } +} + +std::vector loop_inst::handle_buffers_for_next_iteration(const loop_inst::backedge_memory_mapping& mapping, + network::ptr body_network, int64_t iter) { + std::vector event_vec; + OPENVINO_ASSERT(iter >= 0, "iteration should not be negative : ", iter); + if (mapping.type == loop_inst::backedge_memory_mapping::CONCAT_OUTPUT) { + if (iter == 0) { + set_memory_in_body_network(body_network, mapping.to_primitive, mapping.initial_mem); + } else if (iter > 0) { + if (is_dynamic()) { + auto from_id = mapping.from_primitive->id(); + if (body_network->has_event(from_id)) { + auto ev = body_network->get_primitive_event(from_id); + if (ev) ev->wait(); + } + // In dynamic model, just copy data from inner body output to inner body input in back_edges. + memory::ptr to_mem = mapping.to_primitive->output_memory_ptr(); + memory::ptr from_mem = mapping.from_primitive->output_memory_ptr(); + auto ev = to_mem->copy_from(body_network->get_stream(), *(from_mem)); + if (ev) event_vec = {ev}; + } else { + auto mem = mapping.concat_mem_mapping->get_sliced_mems().at(iter - 1); + set_memory_in_body_network(body_network, mapping.to_primitive, mem); + } + } + } else if (mapping.type == loop_inst::backedge_memory_mapping::SINGLE_SHARED) { + if (iter == 0) { + if (mapping.from_mem != nullptr) { + auto ev = mapping.from_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem)); + if (ev) event_vec = {ev}; + } + } else { + // In dynamic model, output memory is not defined before execution. + // After body network execution, replace input memory from initial_mem(external input memory) to output memory. + if (mapping.from_mem == nullptr) { + mapping.from_mem = mapping.from_primitive->output_memory_ptr(); + OPENVINO_ASSERT(mapping.from_mem != nullptr, "from_mem should not be null"); + set_memory_in_body_network(body_network, mapping.to_primitive, mapping.from_mem); + } + } + } else if (mapping.type == loop_inst::backedge_memory_mapping::SINGLE) { + memory::ptr to_mem = mapping.to_primitive->output_memory_ptr(); + if (iter == 0) { + auto ev = to_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem)); + if (ev) event_vec = {ev}; + } else { + if (is_dynamic()) { + // In dynamic model, do not set memory buffer between input and output in inner body network. + // Just copy data from input buffer memory to output buffer memory. + auto from_id = mapping.from_primitive->id(); + if (body_network->has_event(from_id)) { + auto ev = body_network->get_primitive_event(from_id); + if (ev) ev->wait(); + } + memory::ptr from_mem = mapping.from_primitive->output_memory_ptr(); + auto ev = to_mem->copy_from(body_network->get_stream(), *(from_mem)); + if (ev) event_vec = {ev}; + } else { + // In static model, swap memory buffer between output and input in inner body network + memory::ptr from_mem = mapping.from_primitive->output_memory_ptr(); + set_memory_in_body_network(body_network, mapping.to_primitive, std::move(from_mem)); + set_memory_in_body_network(body_network, mapping.from_primitive, std::move(to_mem)); + } + } + } + return event_vec; +} + +std::vector loop_inst::preprocess_memory_for_body_network(int64_t current_iteration_idx) { + std::vector events; + // Copy & Set sliced input memory + for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) { + const auto& concatenated_input = concatenated_input_mem_mappings.at(i); + memory::ptr mem = concatenated_input->get_sliced_mem(current_iteration_idx); + OPENVINO_ASSERT(mem != nullptr, id(), " sliced input memory of loop is not allocated properly"); + concatenated_input->get_sliced_data_prim()->set_output_memory(mem); + } + + // Set backedges and output memory + for (auto& backedge_memory_mapping : backedge_memory_mappings) { + auto event_vec = handle_buffers_for_next_iteration(backedge_memory_mapping, body_network, current_iteration_idx); + for (auto ev : event_vec) { + events.push_back(ev); + } + } + + if (!is_dynamic()) { + // Set sliced output memory for static shape model + // because body network generate output memory during the body network execution in dynamic model + for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) { + concat_output_mem_mapping->setup_sliced_output_memory(current_iteration_idx); + } + } + return events; +} + +std::vector loop_inst::postprocess_memory_for_body_network(int64_t current_iteration_idx) { + std::vector events; + for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) { + auto sliced_data_prim = concat_output_mem_mapping->get_sliced_data_prim(); + auto output_mem_ptr = sliced_data_prim->output_memory_ptr(); + + auto sliced_id = sliced_data_prim->id(); + if (body_network->has_event(sliced_id)) { + auto ev = body_network->get_primitive_event(sliced_id); + if (ev) ev->wait(); + } + memory::ptr new_sliced_mem = concat_output_mem_mapping->get_or_create_sliced_mem(current_iteration_idx, + output_mem_ptr->get_layout()); + auto ev = new_sliced_mem->copy_from(body_network->get_stream(), *output_mem_ptr); + if (ev) { + events.push_back(ev); + } + } + return events; +} } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 0a2a971a46be78..db90ee9a50dc83 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -199,7 +199,6 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptris_dynamic(); int64_t num_iterations = op->get_num_iterations(); - OPENVINO_ASSERT((is_dynamic || num_iterations > 0), "loop's num_iteration should be positive on static shape model"); auto num_outputs = is_dynamic? op->get_output_size() : 1; auto ov_model = op->get_function(); @@ -280,7 +279,7 @@ static void CreateCommonLoopOp(ProgramBuilder& p, const std::shared_ptr output_primitive_maps, - std::vector back_edges) { + std::vector back_edges, + bool allow_new_shape_infer = false) { std::vector output_names_vec; for (auto out_map : output_primitive_maps) { output_names_vec.push_back(out_map.internal_id.pid); @@ -48,6 +49,7 @@ static program::ptr build_program(engine& engine, config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(output_names_vec)); config.set_property(ov::intel_gpu::max_dynamic_batch(1)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(allow_new_shape_infer)); return program::build_program(engine, body_topology, config, false, false, true); } @@ -296,7 +298,7 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test) // set inner loop body ///////////////////////////////// topology inner_loop_body( - input_layout("inner_input", input_mem->get_layout()), + input_layout("inner_input", { { 1, 1, 1, 4 }, data_types::f32, format::bfyx }), data("inner_eltwise_operand", inner_operand_mem), eltwise("inner_eltwise", input_info("inner_input"), input_info("inner_eltwise_operand"), eltwise_mode::sum) ); @@ -428,3 +430,121 @@ TEST(loop_gpu, basic_concat_cached) { TEST(loop_gpu, basic_concat_nested_cached) { test_loop_gpu_basic_concat_nested(true); } + +static void test_loop_gpu_wo_trip_count(bool is_caching_test) { + auto& engine = get_test_engine(); + + auto e_input_layout = cldnn::layout{ { 1, 1, 5, 4 }, data_types::f32, format::bfyx }; + auto b_input_layout = cldnn::layout{ { 1, 1, 1, 4}, data_types::f32, format::bfyx }; + auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx }; + + auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y + auto e_initial_condition_mem = engine.allocate_memory(const_layout); + auto e_num_iteration_mem = engine.allocate_memory(const_layout); + auto b_exit_value_mem = engine.allocate_memory(const_layout); + auto b_index_inc_mem = engine.allocate_memory(const_layout); + + std::vector input_data{ + 1.0f, 2.0f, -15.f, 3.0f, + 4.0f, -15.f, 5.0f, 6.0f, + -15.f, 7.0f, -15.f, 0.0f, + 0.0f, -15.f, 0.5f, -0.5f, + -15.f, 8.0f, 1.5f, 5.2f + }; + + const int64_t exit_value = 3; + + // initialize input buffers + set_values(e_input_mem, input_data); + set_values(e_initial_condition_mem, {1}); + set_values(b_exit_value_mem, {exit_value}); + set_values(b_index_inc_mem, {1}); + + primitive_id body_current_iteration_id = "b_index"; + primitive_id body_execution_condition_id = "b_cond_exit_value"; + + cldnn::topology body( + input_layout(body_current_iteration_id, const_layout), + input_layout("b_add_data", b_input_layout), + input_layout("b_mul_data", b_input_layout), + data("b_exit_value", b_exit_value_mem), + data("b_index_inc", b_index_inc_mem), + eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum), + reorder("b_index_cast", input_info("b_index_update"), + cldnn::format::any, data_types::f32, {}, cldnn::reorder_mean_mode::subtract, cldnn::padding(), true), + eltwise(body_execution_condition_id, input_info("b_index"), input_info("b_exit_value"), eltwise_mode::lt), + eltwise("b_add", input_info("b_add_data"), input_info("b_index_cast"), eltwise_mode::sum), + eltwise("b_mul", input_info("b_mul_data"), input_info("b_index_cast"), eltwise_mode::prod) + ); + + primitive_id trip_count_id = ""; + primitive_id actual_iteration_count_id = "actual_iteration_count"; + primitive_id initial_condition_id = "initial_condition"; + int64_t num_iterations = -1; + + std::vector input_primitive_maps { + loop::io_primitive_map("input", "b_add_data", 2), + loop::io_primitive_map("input", "b_mul_data", 2), + loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) }; + std::vector output_primitive_maps { + loop::io_primitive_map(cldnn::input_info("loop", 0), cldnn::input_info("b_add", 0), 2), + loop::io_primitive_map(cldnn::input_info("loop", 1), cldnn::input_info("b_mul", 0), 2) }; + std::vector back_edges { + loop::backedge_mapping("b_index_update", body_current_iteration_id) }; + + auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); + + cldnn::topology topology( + input_layout("input", e_input_layout), + input_layout(initial_condition_id, e_initial_condition_mem->get_layout()), + mutable_data(actual_iteration_count_id, e_num_iteration_mem), + loop("loop", { input_info(actual_iteration_count_id), input_info("input") }, body_program, + trip_count_id, initial_condition_id, actual_iteration_count_id, + input_primitive_maps, output_primitive_maps, back_edges, + num_iterations, body_current_iteration_id, body_execution_condition_id, 2), + eltwise("out_sum", input_info("loop", 0), input_info("loop", 1), eltwise_mode::sum) + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + network->set_input_data("input", e_input_mem); + network->set_input_data(initial_condition_id, e_initial_condition_mem); + + auto outputs = network->execute(); + ASSERT_EQ(outputs.size(), 1); + + auto expected_num_iterations = (exit_value + 1); + + auto num_iter_mem = network->get_output_memory(actual_iteration_count_id); + if (num_iter_mem != nullptr) { + mem_lock num_iter_ptr{ num_iter_mem, get_test_stream() }; + ASSERT_EQ(num_iter_ptr.data()[0], expected_num_iterations); + } + + std::vector expected(input_data.size()); + for (size_t j = 0; j < input_data.size(); j++) { + auto val = static_cast(j / 4) + 1; + expected[j] = static_cast(input_data[j] + val) + static_cast(input_data[j] * val); + } + + auto output_mem = outputs.begin()->second.get_memory(); + auto output_layout = output_mem->get_layout(); + + ASSERT_EQ(output_layout.batch(), 1); + ASSERT_EQ(output_layout.feature(), 1); + ASSERT_EQ(output_layout.spatial(0), 4); + ASSERT_EQ(output_layout.spatial(1), expected_num_iterations); + // value check + { + mem_lock output_ptr{ output_mem, get_test_stream() }; + for (size_t i = 0, iend = output_layout.count(); i < iend; ++i) { + ASSERT_FLOAT_EQ(output_ptr[i], expected.at(i)); + } + } +} + +TEST(loop_gpu, support_dynamic_tensoriterator) { + test_loop_gpu_wo_trip_count(false); +} From 0f260c2ccd6279fd46ee6118ea3af7779c611c17 Mon Sep 17 00:00:00 2001 From: Alexander Kozlov Date: Wed, 8 Nov 2023 10:17:57 +0400 Subject: [PATCH 227/275] [DOC]: Added INT4 weight compression description (#20812) * Added INT4 information into weight compression doc * Added GPTQ info. Fixed comments * Fixed list * Fixed issues. Updated Gen.AI doc * Applied comments * Added additional infor about GPTQ support * Fixed typos * Update docs/articles_en/openvino_workflow/gen_ai.md Co-authored-by: Nico Galoppo * Update docs/articles_en/openvino_workflow/gen_ai.md Co-authored-by: Nico Galoppo * Update docs/optimization_guide/nncf/code/weight_compression_openvino.py Co-authored-by: Nico Galoppo * Applied changes * Update docs/articles_en/openvino_workflow/gen_ai.md Co-authored-by: Tatiana Savina * Update docs/articles_en/openvino_workflow/gen_ai.md Co-authored-by: Tatiana Savina * Update docs/articles_en/openvino_workflow/gen_ai.md Co-authored-by: Tatiana Savina * Update docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md Co-authored-by: Tatiana Savina * Update docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md Co-authored-by: Tatiana Savina * Update docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md Co-authored-by: Tatiana Savina * Update docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md Co-authored-by: Tatiana Savina * Update docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md Co-authored-by: Tatiana Savina * Added table with results * One more comment --------- Co-authored-by: Nico Galoppo Co-authored-by: Tatiana Savina --- docs/articles_en/openvino_workflow/gen_ai.md | 22 ++++ .../weight_compression.md | 103 +++++++++++++++++- .../nncf/code/weight_compression_openvino.py | 9 +- 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/docs/articles_en/openvino_workflow/gen_ai.md b/docs/articles_en/openvino_workflow/gen_ai.md index 4ecb55fcc2427c..40567d2daa353d 100644 --- a/docs/articles_en/openvino_workflow/gen_ai.md +++ b/docs/articles_en/openvino_workflow/gen_ai.md @@ -115,6 +115,28 @@ Optimum-Intel API also provides out-of-the-box model optimization through weight Weight compression is applied by default to models larger than one billion parameters and is also available for CLI interface as the ``--int8`` option. +.. note:: + + 8-bit weight compression is enabled by default for models larger than 1 billion parameters. + +`NNCF `__ also provides 4-bit weight compression, which is supported by OpenVINO. It can be applied to Optimum objects as follows: + +.. code-block:: python + + from nncf import compress_weights, CompressWeightsMode + + model = OVModelForCausalLM.from_pretrained(model_id, export=True, load_in_8bit=False) + model.model = compress_weights(model.model, mode=CompressWeightsMode.INT4_SYM, group_size=128, ratio=0.8) + + +The optimized model can be saved as usual with a call to ``save_pretrained()``. For more details on compression options, refer to the :doc:`weight compression guide `. + +.. note:: + + OpenVINO also supports 4-bit models from Hugging Face `Transformers `__ library optimized + with `GPTQ `__. In this case, there is no need for an additional model optimization step because model conversion will automatically preserve the INT4 optimization results, allowing model inference to benefit from it. + + Below are some examples of using Optimum-Intel for model conversion and inference: * `Stable Diffusion v2.1 using Optimum-Intel OpenVINO `__ diff --git a/docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md b/docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md index fb29a6d49b767f..fd9599a31f7ea7 100644 --- a/docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md +++ b/docs/articles_en/openvino_workflow/model_optimization_guide/weight_compression.md @@ -10,12 +10,14 @@ Weight compression aims to reduce the memory footprint of a model. It can also l - enabling the inference of exceptionally large models that cannot be accommodated in the memory of the device; - improving the inference performance of the models by reducing the latency of the memory access when computing the operations with weights, for example, Linear layers. -Currently, `Neural Network Compression Framework (NNCF) `__ provides 8-bit weight quantization as a compression method primarily designed to optimize LLMs. The main difference between weights compression and full model quantization (post-training quantization) is that activations remain floating-point in the case of weights compression which leads to a better accuracy. Weight compression for LLMs provides a solid inference performance improvement which is on par with the performance of the full model quantization. In addition, weight compression is data-free and does not require a calibration dataset, making it easy to use. +Currently, `Neural Network Compression Framework (NNCF) `__ provides weight quantization to 8 and 4-bit integer data types as a compression method primarily designed to optimize LLMs. The main difference between weights compression and full model quantization is that activations remain floating-point in the case of weight compression, resulting in better accuracy. Weight compression for LLMs provides a solid inference performance improvement which is on par with the performance of the full model quantization. In addition, weight compression is data-free and does not require a calibration dataset, making it easy to use. Compress Model Weights ###################### -The code snippet below shows how to compress the weights of the model represented in OpenVINO IR using NNCF: +- **8-bit weight quantization** - this method is aimed at accurate optimization of the model, which usually leads to significant performance improvements for Transformer-based models. Models with 8-bit compressed weights are performant on the vast majority of supported CPU and GPU platforms. + +The code snippet below shows how to do 8-bit quantization of the model weights represented in OpenVINO IR using NNCF: .. tab-set:: @@ -28,6 +30,103 @@ The code snippet below shows how to compress the weights of the model represente Now, the model is ready for compilation and inference. It can be also saved into a compressed format, resulting in a smaller binary file. +- **4-bit weight quantization** - this method stands for an INT4-INT8 mixed-precision weight quantization, where INT4 is considered as the primary precision and INT8 is the backup one. It usually results in a smaller model size and lower inference latency, although the accuracy degradation could be higher, depending on the model. The method has several parameters that can provide different performance-accuracy trade-offs after optimization: + + * ``mode`` - there are two modes to choose from: ``INT4_SYM`` - stands for INT4 symmetric weight quantization and results in faster inference and smaller model size, and ``INT4_ASYM`` - INT4 asymmetric weight quantization with variable zero-point for more accurate results. + + * ``group_size`` - controls the size of the group of weights that share the same quantization parameters. Smaller model size results in a more accurate optimized model but with a larger footprint and slower inference. The following group sizes are recommended: ``128``, ``64``, ``32`` (``128`` is default value) + + * ``ratio`` - controls the ratio between INT4 and INT8 compressed layers in the model. For example, 0.8 means that 80% of layers will be compressed to INT4, while the rest will be compressed to INT8 precision. + +The example below shows 4-bit weight quantization applied on top of OpenVINO IR: + +.. tab-set:: + + .. tab-item:: OpenVINO + :sync: openvino + + .. doxygensnippet:: docs/optimization_guide/nncf/code/weight_compression_openvino.py + :language: python + :fragment: [compression_4bit] + +.. note:: + + OpenVINO also supports 4-bit models from Hugging Face `Transformers `__ library optimized + with `GPTQ `__. In this case, there is no need for an additional model optimization step because model conversion will automatically preserve the INT4 optimization results, allowing model inference to benefit from it. + + +The table below shows examples of Text Generation models with different optimization settings: + +.. list-table:: + :widths: 40 55 25 25 + :header-rows: 1 + + * - Model + - Optimization + - Perplexity + - Model Size (Gb) + * - databricks/dolly-v2-3b + - FP32 + - 5.01 + - 10.3 + * - databricks/dolly-v2-3b + - INT8 + - 5.07 + - 2.6 + * - databricks/dolly-v2-3b + - INT4_ASYM,group_size=32,ratio=0.5 + - 5.28 + - 2.2 + * - facebook/opt-6.7b + - FP32 + - 4.25 + - 24.8 + * - facebook/opt-6.7b + - INT8 + - 4.27 + - 6.2 + * - facebook/opt-6.7b + - INT4_ASYM,group_size=64,ratio=0.8 + - 4.32 + - 4.1 + * - meta-llama/Llama-2-7b-chat-hf + - FP32 + - 3.28 + - 25.1 + * - meta-llama/Llama-2-7b-chat-hf + - INT8 + - 3.29 + - 6.3 + * - meta-llama/Llama-2-7b-chat-hf + - INT4_ASYM,group_size=128,ratio=0.8 + - 3.41 + - 4.0 + * - togethercomputer/RedPajama-INCITE-7B-Instruct + - FP32 + - 4.15 + - 25.6 + * - togethercomputer/RedPajama-INCITE-7B-Instruct + - INT8 + - 4.17 + - 6.4 + * - togethercomputer/RedPajama-INCITE-7B-Instruct + - INT4_ASYM,group_size=128,ratio=1.0 + - 4.17 + - 3.6 + * - meta-llama/Llama-2-13b-chat-hf + - FP32 + - 2.92 + - 48.5 + * - meta-llama/Llama-2-13b-chat-hf + - INT8 + - 2.91 + - 12.1 + * - meta-llama/Llama-2-13b-chat-hf + - INT4_SYM,group_size=64,ratio=0.8 + - 2.98 + - 8.0 + + Additional Resources #################### diff --git a/docs/optimization_guide/nncf/code/weight_compression_openvino.py b/docs/optimization_guide/nncf/code/weight_compression_openvino.py index c9ab67efd5aa32..d66fb28f4243c0 100644 --- a/docs/optimization_guide/nncf/code/weight_compression_openvino.py +++ b/docs/optimization_guide/nncf/code/weight_compression_openvino.py @@ -3,4 +3,11 @@ ... model = compress_weights(model) # model is openvino.Model object -#! [compression_8bit] \ No newline at end of file +#! [compression_8bit] + +#! [compression_4bit] +from nncf import compress_weights, CompressWeightsMode + +... +model = compress_weights(model, mode=CompressWeightsMode.INT4_SYM, group_size=128, ratio=0.8) # model is openvino.Model object +#! [compression_4bit] \ No newline at end of file From d6cc3d70585d3f6b7b3aff0dc109e1e97ba3fdbc Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 8 Nov 2023 12:45:22 +0400 Subject: [PATCH 228/275] Disable warnings about API 1.0 in GNA, Python API 1.0 (#20933) --- src/bindings/python/src/compatibility/openvino/CMakeLists.txt | 3 +++ src/plugins/intel_gna/CMakeLists.txt | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/bindings/python/src/compatibility/openvino/CMakeLists.txt b/src/bindings/python/src/compatibility/openvino/CMakeLists.txt index e6f7f2aec3b347..aa2e7093d41b1b 100644 --- a/src/bindings/python/src/compatibility/openvino/CMakeLists.txt +++ b/src/bindings/python/src/compatibility/openvino/CMakeLists.txt @@ -13,6 +13,9 @@ if(NOT DEFINED OpenVINO_SOURCE_DIR) PATHS "${InferenceEngineDeveloperPackage_DIR}") endif() +# Python API 1.0 will be removed before 2024.0 +ov_disable_deprecated_warnings() + if(UNIX) # cython generated files requires public visibility. Force visibility required. set(CMAKE_CXX_VISIBILITY_PRESET default) diff --git a/src/plugins/intel_gna/CMakeLists.txt b/src/plugins/intel_gna/CMakeLists.txt index 107ea8a5a0cdc9..ecb94ece6b3db2 100644 --- a/src/plugins/intel_gna/CMakeLists.txt +++ b/src/plugins/intel_gna/CMakeLists.txt @@ -6,6 +6,9 @@ if (NOT ENABLE_INTEL_GNA) return() endif() +# GNA plugin will be removed before 2024.0 together with API 1.0 +ov_disable_deprecated_warnings() + add_subdirectory(legacy) set(TARGET_NAME "openvino_intel_gna_plugin") From 9e7243d67c1d2ecbec19c2d5b25fd925e1bb86f0 Mon Sep 17 00:00:00 2001 From: Liu <137922263+YaritaiKoto@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:10:11 +0800 Subject: [PATCH 229/275] fix typo (#20906) Co-authored-by: Michal Lukaszewski --- .../src/compatibility/openvino/inference_engine/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/src/compatibility/openvino/inference_engine/__init__.py b/src/bindings/python/src/compatibility/openvino/inference_engine/__init__.py index 24aca47b2edb71..b7ece2fcbbd817 100644 --- a/src/bindings/python/src/compatibility/openvino/inference_engine/__init__.py +++ b/src/bindings/python/src/compatibility/openvino/inference_engine/__init__.py @@ -32,7 +32,7 @@ from .ie_api import * warnings.warn( - message="OpenVINO Inference Engine Python API is deprecated and will be removed in 2024.0 release." + message="OpenVINO Inference Engine Python API is deprecated and will be removed in 2024.0 release. " "For instructions on transitioning to the new API, please refer to " "https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html", category=FutureWarning, From 87cef53088f5c56f19c9a5eb63f978d51d49a4b8 Mon Sep 17 00:00:00 2001 From: Andrei Gorbachev Date: Wed, 8 Nov 2023 09:42:44 +0000 Subject: [PATCH 230/275] [GPU] Refactor (#20938) * maxmin * mvn * normalize_l2 and fix mvn * prior_box_clustered * prior_box * pad * roi_align * scatter_update * select * shape_of * shuffle_channels * space_to_batch * space_to_depth * split * squeeze_unsqueeze * tile * transpose * variadic_split * scatter_nd_update --- .../single_layer_tests/minimum_maximum.cpp | 34 ++-- .../single_layer_tests/mvn.cpp | 74 +++++---- .../single_layer_tests/normalize_l2.cpp | 17 +- .../single_layer_tests/pad.cpp | 93 +++++------ .../single_layer_tests/prior_box.cpp | 55 +++---- .../prior_box_clustered.cpp | 22 +-- .../single_layer_tests/roi_align.cpp | 63 ++++---- .../single_layer_tests/scatter_nd_update.cpp | 50 ++++-- .../single_layer_tests/scatter_update.cpp | 60 +++++-- .../single_layer_tests/select.cpp | 31 ++-- .../single_layer_tests/shape_of.cpp | 26 ++- .../single_layer_tests/shuffle_channels.cpp | 43 ++--- .../single_layer_tests/space_to_batch.cpp | 152 ++++++------------ .../single_layer_tests/space_to_depth.cpp | 35 ++-- .../single_layer_tests/split.cpp | 30 ++-- .../single_layer_tests/squeeze_unsqueeze.cpp | 71 ++++---- .../single_layer_tests/tile.cpp | 23 +-- .../single_layer_tests/transpose.cpp | 71 +++----- .../single_layer_tests/variadic_split.cpp | 44 +++-- 19 files changed, 449 insertions(+), 545 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp index 74b9d475016a84..4a3c1ccac6b422 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/minimum_maximum.cpp @@ -2,15 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "single_layer_tests/minimum_maximum.hpp" +#include "single_op_tests/minimum_maximum.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::MaxMinLayerTest; -const std::vector>> inShapes = { +const std::vector> inShapes = { {{2}, {1}}, {{1, 1, 1, 3}, {1}}, {{1, 2, 4}, {1}}, @@ -20,31 +18,27 @@ const std::vector>> inShapes = { {{8, 1, 6, 1}, {7, 1, 5}}, }; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, }; -const std::vector opType = { - ngraph::helpers::MinMaxOpType::MINIMUM, - ngraph::helpers::MinMaxOpType::MAXIMUM, +const std::vector opType = { + ov::test::utils::MinMaxOpType::MINIMUM, + ov::test::utils::MinMaxOpType::MAXIMUM, }; -const std::vector inputType = { - ngraph::helpers::InputLayerType::CONSTANT, - ngraph::helpers::InputLayerType::PARAMETER, +const std::vector second_inputType = { + ov::test::utils::InputLayerType::CONSTANT, + ov::test::utils::InputLayerType::PARAMETER, }; INSTANTIATE_TEST_SUITE_P(smoke_maximum, MaxMinLayerTest, ::testing::Combine( - ::testing::ValuesIn(inShapes), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapes)), ::testing::ValuesIn(opType), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputType), + ::testing::ValuesIn(second_inputType), ::testing::Values(ov::test::utils::DEVICE_GPU)), MaxMinLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mvn.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mvn.cpp index 56384a0a798a95..329ac165b7e9dd 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mvn.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/mvn.cpp @@ -4,26 +4,28 @@ #include -#include "single_layer_tests/mvn.hpp" +#include "single_op_tests/mvn.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +namespace { +using ov::test::Mvn1LayerTest; +using ov::test::Mvn6LayerTest; const std::vector emptyReductionAxes = {{}}; -const std::vector> inputShapes = { - {1, 32, 17}, - {1, 37, 9}, - {1, 16, 5, 8}, - {2, 19, 5, 10}, - {7, 32, 2, 8}, - {5, 8, 3, 5}, - {4, 41, 6, 9}, - {1, 32, 8, 1, 6}, - {1, 9, 1, 15, 9}, - {6, 64, 6, 1, 18}, - {2, 31, 2, 9, 1}, - {10, 16, 5, 10, 6} +const std::vector> inputShapes = { + {{1, 32, 17}}, + {{1, 37, 9}}, + {{1, 16, 5, 8}}, + {{2, 19, 5, 10}}, + {{7, 32, 2, 8}}, + {{5, 8, 3, 5}}, + {{4, 41, 6, 9}}, + {{1, 32, 8, 1, 6}}, + {{1, 9, 1, 15, 9}}, + {{6, 64, 6, 1, 18}}, + {{2, 31, 2, 9, 1}}, + {{10, 16, 5, 10, 6}} }; const std::vector acrossChannels = { @@ -42,8 +44,8 @@ const std::vector epsilon = { INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsMVN, Mvn1LayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapes), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes)), + ::testing::Values(ov::element::f32), ::testing::ValuesIn(emptyReductionAxes), ::testing::ValuesIn(acrossChannels), ::testing::ValuesIn(normalizeVariance), @@ -51,14 +53,14 @@ INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsMVN, ::testing::Values(ov::test::utils::DEVICE_GPU)), Mvn1LayerTest::getTestCaseName); -std::vector dataPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +std::vector dataPrecisions = { + ov::element::f32, + ov::element::f16 }; -std::vector idxPrecisions = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64 +std::vector idxPrecisions = { + ov::element::i32, + ov::element::i64 }; const std::vector epsMode = { @@ -72,7 +74,8 @@ const std::vector epsilonF = { INSTANTIATE_TEST_SUITE_P(smoke_MVN_5D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{1, 10, 5, 7, 8}, {1, 3, 8, 9, 49}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{1, 10, 5, 7, 8}}, {{1, 3, 8, 9, 49}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{1, 2, 3, 4}, {2, 3, 4}, {-3, -2, -1}, {-1, -4, -2, -3}, {-1}}), @@ -84,7 +87,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_MVN_5D, Mvn6LayerTest, INSTANTIATE_TEST_SUITE_P(smoke_MVN_4D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{1, 10, 5, 17}, {1, 3, 8, 9}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{1, 10, 5, 17}}, {{1, 3, 8, 9}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{1, 2, 3}, {2, 3}, {-2, -1}, {-2, -1, -3}, {-1}}), @@ -96,7 +100,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_MVN_4D, Mvn6LayerTest, INSTANTIATE_TEST_SUITE_P(smoke_MVN_3D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{1, 32, 17}, {1, 37, 9}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{1, 32, 17}}, {{1, 37, 9}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{1, 2}, {2}, {-1}, {-1, -2}}), @@ -108,7 +113,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_MVN_3D, Mvn6LayerTest, INSTANTIATE_TEST_SUITE_P(smoke_MVN_2D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{3, 5}, {2, 55}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{3, 5}}, {{2, 55}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{1}}), @@ -120,7 +126,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_MVN_2D, Mvn6LayerTest, INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_1D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{3}, {9}, {55}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{3}}, {{9}}, {{55}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{}}), @@ -132,7 +139,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_1D, Mvn6LayerTest, INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_3D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{1, 32, 17}, {1, 37, 9}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{1, 32, 17}}, {{1, 37, 9}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{0, 1, 2}, {0}, {1}}), @@ -144,7 +152,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_3D, Mvn6LayerTest, INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_4D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{1, 16, 5, 8}, {2, 19, 5, 10}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{1, 16, 5, 8}}, {{2, 19, 5, 10}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{0, 1, 2, 3}, {0, 1, 2}, {0, 3}, {0}, {1}, {2}, {3}}), @@ -156,7 +165,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_4D, Mvn6LayerTest, INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_6D, Mvn6LayerTest, ::testing::Combine( - ::testing::ValuesIn(std::vector>{{1, 3, 5, 4, 2, 6}}), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{1, 3, 5, 4, 2, 6}}})), ::testing::ValuesIn(dataPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::ValuesIn(std::vector>{{0, 1, 5}, {0, 1, 2, 3}, {0, 1, 2}, {0, 3}, {0}, {3}}), @@ -165,3 +175,5 @@ INSTANTIATE_TEST_SUITE_P(smoke_Decomposition_6D, Mvn6LayerTest, ::testing::ValuesIn(epsMode), ::testing::Values(ov::test::utils::DEVICE_GPU)), Mvn6LayerTest::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/normalize_l2.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/normalize_l2.cpp index a741bded90bdfb..e8e90841be99bb 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/normalize_l2.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/normalize_l2.cpp @@ -2,16 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/normalize_l2.hpp" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/normalize_l2.hpp" namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +using ov::test::NormalizeL2LayerTest; + +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; const std::vector> axes = { @@ -30,7 +28,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_NormalizeL2, testing::Combine(testing::ValuesIn(axes), testing::ValuesIn(eps), testing::ValuesIn(epsMode), - testing::Values(std::vector{1, 3, 10, 5}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{1, 3, 10, 5}})), testing::ValuesIn(netPrecisions), testing::Values(ov::test::utils::DEVICE_GPU)), NormalizeL2LayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp index e207e8911e8247..b048a3f9655a54 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp @@ -1,27 +1,26 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // - -#include - -#include "single_layer_tests/pad.hpp" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/pad.hpp" namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +using ov::test::PadLayerTest; +using ov::test::Pad12LayerTest; +using ov::op::PadMode; + +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; const std::vector> padsBegin2D = {{0, 0}, {1, 1}, {2, 0}, {0, 3}}; const std::vector> padsEnd2D = {{0, 0}, {1, 1}, {0, 1}, {3, 2}}; const std::vector argPadValue = {0.f, 1.f, 2.f, -1.f}; -const std::vector padMode = { - ngraph::helpers::PadMode::EDGE, - ngraph::helpers::PadMode::REFLECT, - ngraph::helpers::PadMode::SYMMETRIC +const std::vector padMode = { + PadMode::EDGE, + PadMode::REFLECT, + PadMode::SYMMETRIC }; INSTANTIATE_TEST_SUITE_P(smoke_Pad2DConst, @@ -29,12 +28,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_Pad2DConst, testing::Combine(testing::ValuesIn(padsBegin2D), testing::ValuesIn(padsEnd2D), testing::ValuesIn(argPadValue), - testing::Values(ngraph::helpers::PadMode::CONSTANT), + testing::Values(PadMode::CONSTANT), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{13, 5}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{13, 5}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); @@ -45,10 +42,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Pad2D, testing::Values(0), testing::ValuesIn(padMode), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{13, 5}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{13, 5}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); @@ -60,12 +55,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_Pad4DConst, testing::Combine(testing::ValuesIn(padsBegin4D), testing::ValuesIn(padsEnd4D), testing::ValuesIn(argPadValue), - testing::Values(ngraph::helpers::PadMode::CONSTANT), + testing::Values(PadMode::CONSTANT), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{3, 5, 10, 11}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{3, 5, 10, 11}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); @@ -73,13 +66,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Pad4D, PadLayerTest, testing::Combine(testing::ValuesIn(padsBegin4D), testing::ValuesIn(padsEnd4D), - testing::Values(0), + testing::Values(0.0f), testing::ValuesIn(padMode), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{3, 5, 10, 11}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{3, 5, 10, 11}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); @@ -87,30 +78,26 @@ const std::vector> padsBegin2DMixed = {{0, 0}, {1, 1}, {-2, const std::vector> padsEnd2DMixed = {{0, 0}, {1, 1}, {0, 1}, {-3, -2}, {2, -1}}; INSTANTIATE_TEST_SUITE_P(smoke_Pad2DConst, - PadLayerTest12, + Pad12LayerTest, testing::Combine(testing::ValuesIn(padsEnd2DMixed), testing::ValuesIn(padsEnd2D), testing::ValuesIn(argPadValue), - testing::Values(ngraph::helpers::PadMode::CONSTANT), + testing::Values(PadMode::CONSTANT), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{13, 5}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{13, 5}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Pad2D, - PadLayerTest12, + Pad12LayerTest, testing::Combine(testing::ValuesIn(padsBegin2DMixed), testing::ValuesIn(padsEnd2DMixed), - testing::Values(-333), + testing::Values(-333.0f), testing::ValuesIn(padMode), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{13, 5}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{13, 5}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); @@ -118,30 +105,26 @@ const std::vector> padsBegin4DMixed = {{0, 0, 0, 0}, {0, 3, const std::vector> padsEnd4DMixed = {{0, 0, 0, 0}, {0, 3, 0, 0}, {1, 0, 0, 0}, {0, 0, 0, 2}, {1, -3, 0, 0}, {0, 3, 0, -1}}; INSTANTIATE_TEST_SUITE_P(smoke_Pad4DConst, - PadLayerTest12, + Pad12LayerTest, testing::Combine(testing::ValuesIn(padsBegin4DMixed), testing::ValuesIn(padsEnd4DMixed), testing::ValuesIn(argPadValue), - testing::Values(ngraph::helpers::PadMode::CONSTANT), + testing::Values(PadMode::CONSTANT), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{3, 5, 10, 11}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{3, 5, 10, 11}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Pad4D, - PadLayerTest12, + Pad12LayerTest, testing::Combine(testing::ValuesIn(padsBegin4DMixed), testing::ValuesIn(padsEnd4DMixed), testing::Values(-333), testing::ValuesIn(padMode), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(std::vector{3, 5, 10, 11}), + testing::Values(ov::test::static_shapes_to_test_representation( + std::vector{{3, 5, 10, 11}})), testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box.cpp index 1495fdc2c11887..2cada1a2e733d4 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box.cpp @@ -2,16 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/prior_box.hpp" - -#include - +#include "single_op_tests/prior_box.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +namespace { +using ov::test::PriorBoxLayerTest; -const std::vector netPrecisions = {InferenceEngine::Precision::I32, - InferenceEngine::Precision::U16}; +const std::vector netPrecisions = {ov::element::i32, + ov::element::u16}; const std::vector> min_sizes = {{256.0f}}; @@ -41,33 +39,27 @@ const std::vector> variances = {{}}; const std::vector min_max_aspect_ratios_order = {false, true}; -const std::vector inputShape = {300, 300}; -const std::vector imageShape = {32, 32}; +std::vector input_shapes_static = {{32, 32}, {300, 300}}; const auto scaleSizesParams = ::testing::Combine(::testing::ValuesIn(min_sizes), - ::testing::ValuesIn(max_sizes), - ::testing::ValuesIn(aspect_ratios), - ::testing::ValuesIn(densities), - ::testing::ValuesIn(fixed_ratios), - ::testing::ValuesIn(fixed_sizes), - ::testing::ValuesIn(clips), - ::testing::ValuesIn(flips), - ::testing::ValuesIn(steps), - ::testing::ValuesIn(offsets), - ::testing::ValuesIn(variances), - ::testing::Values(true), - ::testing::ValuesIn(min_max_aspect_ratios_order)); + ::testing::ValuesIn(max_sizes), + ::testing::ValuesIn(aspect_ratios), + ::testing::ValuesIn(densities), + ::testing::ValuesIn(fixed_ratios), + ::testing::ValuesIn(fixed_sizes), + ::testing::ValuesIn(clips), + ::testing::ValuesIn(flips), + ::testing::ValuesIn(steps), + ::testing::ValuesIn(offsets), + ::testing::ValuesIn(variances), + ::testing::Values(true), + ::testing::ValuesIn(min_max_aspect_ratios_order)); INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_PriorBox8_Scale, PriorBoxLayerTest, ::testing::Combine(scaleSizesParams, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::I32), - ::testing::Values(InferenceEngine::Precision::FP32), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(inputShape), - ::testing::Values(imageShape), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_GPU)), PriorBoxLayerTest::getTestCaseName); @@ -89,11 +81,8 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_PriorBox8_NoScale, PriorBoxLayerTest, ::testing::Combine(scaleSizesParams, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::I32), - ::testing::Values(InferenceEngine::Precision::FP32), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(inputShape), - ::testing::Values(imageShape), + ::testing::Values(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::Values(ov::test::utils::DEVICE_GPU)), PriorBoxLayerTest::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box_clustered.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box_clustered.cpp index 87658dfd9940f7..8b98c39795301b 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box_clustered.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/prior_box_clustered.cpp @@ -2,18 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "single_layer_tests/prior_box_clustered.hpp" +#include "single_op_tests/prior_box_clustered.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::helpers; - namespace { +using ov::test::PriorBoxClusteredLayerTest; + // Common params -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; const std::vector> widths = { @@ -65,12 +63,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_PriorBoxClustered_Basic, PriorBoxClusteredLayerTe ::testing::Combine( layerSpeficParams, ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 4, 4 })), - ::testing::Values(std::vector({ 50, 50 })), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{4, 4}, {50, 50}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), PriorBoxClusteredLayerTest::getTestCaseName ); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp index 97c758df7f79c0..b721d0643a88ab 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/roi_align.cpp @@ -1,16 +1,15 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/roi_align.hpp" - -#include - +#include "single_op_tests/roi_align.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +namespace { +using ov::test::ROIAlignLayerTest; +using ov::test::ROIAlignV9LayerTest; -const std::vector netPRCs = { - InferenceEngine::Precision::FP32 +const std::vector netPRCs = { + ov::element::f32 // There is no possibility to test ROIAlign in fp16 precision, // because on edge cases where in fp32 version ROI value is // a little bit smaller than the nearest integer value, @@ -19,14 +18,15 @@ const std::vector netPRCs = { // in fp32 and fp16 precisions. // In real AI applications this problem is solved by precision-aware training. - // InferenceEngine::Precision::FP16 + // ov::element::f16 }; INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_average, ROIAlignLayerTest, - ::testing::Combine(::testing::ValuesIn(std::vector>{{3, 8, 16, 16}, - {2, 1, 16, 16}, - {2, 1, 8, 16}}), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{3, 8, 16, 16}}, + {{2, 1, 16, 16}}, + {{2, 1, 8, 16}}})), ::testing::Values(std::vector{2, 4}), ::testing::Values(2), ::testing::Values(2), @@ -39,9 +39,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_average, INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max, ROIAlignLayerTest, - ::testing::Combine(::testing::ValuesIn(std::vector>{{2, 8, 20, 20}, - {2, 1, 20, 20}, - {2, 1, 10, 20}}), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{2, 8, 20, 20}}, + {{2, 1, 20, 20}}, + {{2, 1, 10, 20}}})), ::testing::Values(std::vector{2, 4}), ::testing::Values(2), ::testing::Values(2), @@ -54,11 +55,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max, INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_avg_asym, ROIAlignV9LayerTest, - ::testing::Combine(::testing::ValuesIn(std::vector>{{2, 1, 8, 8}, - {2, 8, 20, 20}, - {2, 1, 20, 20}, - {2, 1, 10, 20}}), - ::testing::Values(std::vector{2, 4}), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{2, 1, 8, 8}}, + {{2, 8, 20, 20}}, + {{2, 1, 20, 20}}, + {{2, 1, 10, 20}}})), + ::testing::Values(ov::Shape{2, 4}), ::testing::Values(2), ::testing::Values(2), ::testing::ValuesIn(std::vector{1, 0.625}), @@ -71,11 +73,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_avg_asym, INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_avg_hpfn, ROIAlignV9LayerTest, - ::testing::Combine(::testing::ValuesIn(std::vector>{{2, 1, 8, 8}, - {2, 8, 20, 20}, - {2, 1, 20, 20}, - {2, 1, 10, 20}}), - ::testing::Values(std::vector{2, 4}), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{2, 1, 8, 8}}, + {{2, 8, 20, 20}}, + {{2, 1, 20, 20}}, + {{2, 1, 10, 20}}})), + ::testing::Values(ov::Shape{2, 4}), ::testing::Values(2), ::testing::Values(2), ::testing::ValuesIn(std::vector{1, 0.625}), @@ -88,11 +91,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_avg_hpfn, INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max_hp, ROIAlignV9LayerTest, - ::testing::Combine(::testing::ValuesIn(std::vector>{{2, 1, 8, 8}, - {2, 8, 20, 20}, - {2, 1, 20, 20}, - {2, 1, 10, 20}}), - ::testing::Values(std::vector{2, 4}), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation( + std::vector>{{{2, 1, 8, 8}}, + {{2, 8, 20, 20}}, + {{2, 1, 20, 20}}, + {{2, 1, 10, 20}}})), + ::testing::Values(ov::Shape{2, 4}), ::testing::Values(2), ::testing::Values(2), ::testing::ValuesIn(std::vector{1, 0.625}), @@ -102,3 +106,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_TestsROIAlign_max_hp, ::testing::ValuesIn(netPRCs), ::testing::Values(ov::test::utils::DEVICE_GPU)), ROIAlignV9LayerTest::getTestCaseName); +} // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_nd_update.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_nd_update.cpp index 278acb4a593cc1..39c1ed945b5a3a 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_nd_update.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_nd_update.cpp @@ -2,16 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include - -#include "single_layer_tests/scatter_ND_update.hpp" +#include "single_op_tests/scatter_ND_update.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::opset3; - namespace { +using ov::test::ScatterNDUpdateLayerTest; // map> // updateShape is gotten from inputShape and indicesShape @@ -30,22 +25,47 @@ std::map, std::map, std::vector> {{1, 2, 4}, {2, 3, 1, 8, 7, 5, 6, 5}}, {{2, 5}, {2, 3, 1, 8, 6, 9, 7, 5, 6, 5}}, {{2, 6}, {2, 3, 1, 8, 6, 5, 9, 7, 5, 6, 5, 7}}}} }; +std::vector combineShapes( + const std::map, std::map, std::vector>>& input_shapes) { + std::vector resVec; + for (auto& input_shape : input_shapes) { + for (auto& item : input_shape.second) { + auto indices_shape = item.first; + size_t indices_rank = indices_shape.size(); + std::vector update_shape; + for (size_t i = 0; i < indices_rank - 1; i++) { + update_shape.push_back(indices_shape[i]); + } + auto src_shape = input_shape.first; + for (size_t j = indices_shape[indices_rank - 1]; j < src_shape.size(); j++) { + update_shape.push_back(src_shape[j]); + } + std::vector in_shapes{src_shape, update_shape}; + resVec.push_back( + ov::test::scatterNDUpdateSpecParams{ + ov::test::static_shapes_to_test_representation(in_shapes), + ov::Shape{indices_shape}, + item.second}); + } + } + return resVec; +} -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i32, }; -const std::vector idxPrecisions = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, +const std::vector idxPrecisions = { + ov::element::i32, + ov::element::i64, }; INSTANTIATE_TEST_SUITE_P( smoke_ScatterNDUpdate, ScatterNDUpdateLayerTest, - ::testing::Combine(::testing::ValuesIn(ScatterNDUpdateLayerTest::combineShapes(sliceSelectInShape)), + ::testing::Combine(::testing::ValuesIn(combineShapes(sliceSelectInShape)), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), ::testing::Values(ov::test::utils::DEVICE_GPU)), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp index d54fe57c30c5cc..0c17b4b5eee9a7 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/scatter_update.cpp @@ -1,26 +1,21 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // - -#include -#include - -#include "single_layer_tests/scatter_update.hpp" +#include "single_op_tests/scatter_update.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::opset3; - namespace { -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, +using ov::test::ScatterUpdateLayerTest; + +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i32, }; -const std::vector idxPrecisions = { - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, +const std::vector idxPrecisions = { + ov::element::i32, + ov::element::i64, }; // map> @@ -36,10 +31,43 @@ const std::vector> idxValue = { {0, 2, 4, 6, 1, 3, 5, 7} }; +std::vector combine_shapes( + const std::map, std::map, std::vector>>& input_shapes) { + std::vector res_vec; + for (auto& input_shape : input_shapes) { + auto src_shape = input_shape.first; + auto srcRank = src_shape.size(); + for (auto& item : input_shape.second) { + auto indices_shape = item.first; + auto indices_rank = indices_shape.size(); + for (auto& axis : item.second) { + auto axisP = axis < 0 ? axis + srcRank : axis; + std::vector update_shape; + for (size_t rs = 0; rs < srcRank; rs++) { + if (rs != axisP) { + update_shape.push_back(src_shape[rs]); + } else { + for (size_t ri = 0; ri < indices_rank; ri++) { + update_shape.push_back(indices_shape[ri]); + } + } + } + std::vector in_shapes{src_shape, update_shape}; + res_vec.push_back( + ov::test::axisUpdateShapeInShape{ + ov::test::static_shapes_to_test_representation(in_shapes), + ov::Shape{indices_shape}, + axis}); + } + } + } + return res_vec; +} + INSTANTIATE_TEST_SUITE_P( smoke_ScatterUpdate, ScatterUpdateLayerTest, - ::testing::Combine(::testing::ValuesIn(ScatterUpdateLayerTest::combineShapes(axesShapeInShape)), + ::testing::Combine(::testing::ValuesIn(combine_shapes(axesShapeInShape)), ::testing::ValuesIn(idxValue), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(idxPrecisions), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp index 210d180a64dbe9..0e2978ced32504 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/select.cpp @@ -1,23 +1,20 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // - -#include - -#include "single_layer_tests/select.hpp" +#include "single_op_tests/select.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::SelectLayerTest; -const std::vector inputPrecision = { - InferenceEngine::Precision::U8, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::I32 +const std::vector inputPrecision = { + ov::element::u8, + ov::element::f16, + ov::element::f32, + ov::element::i16, + ov::element::i32 }; -const std::vector>> noneShapes = { +const std::vector> noneShapes = { {{1}, {1}, {1}}, {{8}, {8}, {8}}, {{4, 5}, {4, 5}, {4, 5}}, @@ -25,7 +22,7 @@ const std::vector>> noneShapes = { {{2, 3, 4, 5}, {2, 3, 4, 5}, {2, 3, 4, 5}} }; -const std::vector>> numpyShapes = { +const std::vector> numpyShapes = { {{1}, {16}, {1}}, {{1}, {1}, {16}}, {{1}, {8}, {8}}, @@ -52,16 +49,16 @@ const std::vector>> numpyShapes = { INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsSelect_none, SelectLayerTest, - ::testing::Combine(::testing::ValuesIn(noneShapes), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(noneShapes)), ::testing::ValuesIn(inputPrecision), - ::testing::Values(ngraph::op::AutoBroadcastType::NONE), + ::testing::Values(ov::op::AutoBroadcastType::NONE), ::testing::Values(ov::test::utils::DEVICE_GPU)), SelectLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsSelect_numpy, SelectLayerTest, - ::testing::Combine(::testing::ValuesIn(numpyShapes), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(numpyShapes)), ::testing::ValuesIn(inputPrecision), - ::testing::Values(ngraph::op::AutoBroadcastType::NUMPY), + ::testing::Values(ov::op::AutoBroadcastType::NUMPY), ::testing::Values(ov::test::utils::DEVICE_GPU)), SelectLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shape_of.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shape_of.cpp index 4efba7c864339e..ad47393e054148 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shape_of.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shape_of.cpp @@ -2,30 +2,28 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/shape_of.hpp" +#include "single_op_tests/shape_of.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -const std::vector model_precisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32 +using ov::test::ShapeOfLayerTest; + +const std::vector model_precisions = { + ov::element::f32, + ov::element::i32 }; -const std::vector input_shapes = { - std::vector({1, 2, 3, 4, 5}), - std::vector({1, 2, 3, 4}), - std::vector({1, 2}) +const std::vector> input_shapes = { + {{1, 2, 3, 4, 5}}, + {{1, 2, 3, 4}}, + {{1, 2}} }; INSTANTIATE_TEST_SUITE_P(smoke_Check, ShapeOfLayerTest, ::testing::Combine( ::testing::ValuesIn(model_precisions), - ::testing::Values(InferenceEngine::Precision::I64), - ::testing::ValuesIn(input_shapes), + ::testing::Values(ov::element::i64), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes)), ::testing::Values(ov::test::utils::DEVICE_GPU)), ShapeOfLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shuffle_channels.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shuffle_channels.cpp index fdef75740848fd..28e8b8b1daa4b9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shuffle_channels.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/shuffle_channels.cpp @@ -4,18 +4,18 @@ #include -#include "single_layer_tests/shuffle_channels.hpp" +#include "single_op_tests/shuffle_channels.hpp" -using namespace LayerTestsDefinitions; +using ov::test::ShuffleChannelsLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::U8, +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::u8, }; -const std::vector> inputShapes = { - {3, 4, 9, 5}, {2, 16, 24, 15}, {1, 32, 12, 25} +const std::vector> inputShapes = { + {{3, 4, 9, 5}}, {{2, 16, 24, 15}}, {{1, 32, 12, 25}} }; const std::vector> shuffleParameters = { @@ -28,11 +28,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_GPU_ShuffleChannels, ShuffleChannelsLayerTest, ::testing::Combine(::testing::ValuesIn(shuffleParameters), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes)), ::testing::Values(ov::test::utils::DEVICE_GPU)), ShuffleChannelsLayerTest::getTestCaseName); @@ -41,11 +37,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ShuffleChannels3D, ShuffleChannelsLayerTest, ::testing::Combine(::testing::Values(std::tuple(1, 3)), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({18, 30, 36})), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{18, 30, 36}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), ShuffleChannelsLayerTest::getTestCaseName); @@ -53,11 +46,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_ShuffleChannels2D, ShuffleChannelsLayerTest, ::testing::Combine(::testing::Values(std::tuple(1, 3)), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({18, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{18, 30}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), ShuffleChannelsLayerTest::getTestCaseName); @@ -65,10 +55,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ShuffleChannels1D, ShuffleChannelsLayerTest, ::testing::Combine(::testing::Values(std::tuple(0, 3)), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({30})), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{30}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), ShuffleChannelsLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_batch.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_batch.cpp index a7a759535f2381..a2cf46dc472b45 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_batch.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_batch.cpp @@ -2,176 +2,126 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/space_to_batch.hpp" +#include "single_op_tests/space_to_batch.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::SpaceToBatchLayerTest; +using ov::test::spaceToBatchParamsTuple; auto stb_only_test_cases = []() { return std::vector{ spaceToBatchParamsTuple({1, 2, 2}, {0, 0, 0}, {0, 0, 0}, - {1, 2, 2}, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 2, 2}})), + ov::element::f32, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 1, 2, 2}, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 2, 2}})), + ov::element::f32, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 3, 2, 2}, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 3, 2, 2}})), + ov::element::f32, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 1, 4, 4}, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 4, 4}})), + ov::element::f32, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 2}, {0, 0, 0, 0}, - {2, 1, 2, 4}, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{2, 1, 2, 4}})), + ov::element::f32, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 3, 2, 2}, {0, 0, 1, 0, 3}, {0, 0, 2, 0, 0}, - {1, 1, 3, 2, 1}, - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 3, 2, 1}})), + ov::element::f32, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 1, 2, 2}, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 2, 2}})), + ov::element::i8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 3, 2, 2}, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 3, 2, 2}})), + ov::element::i8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 1, 4, 4}, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 4, 4}})), + ov::element::i8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 2}, {0, 0, 0, 0}, - {2, 1, 2, 4}, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{2, 1, 2, 4}})), + ov::element::i8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 3, 2, 2}, {0, 0, 1, 0, 3}, {0, 0, 2, 0, 0}, - {1, 1, 3, 2, 1}, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 3, 2, 1}})), + ov::element::i8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 1, 2, 2}, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 2, 2}})), + ov::element::u8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 3, 2, 2}, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 3, 2, 2}})), + ov::element::u8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, - {1, 1, 4, 4}, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 4, 4}})), + ov::element::u8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 2}, {0, 0, 0, 0}, - {2, 1, 2, 4}, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{2, 1, 2, 4}})), + ov::element::u8, ov::test::utils::DEVICE_GPU), spaceToBatchParamsTuple({1, 1, 3, 2, 2}, {0, 0, 1, 0, 3}, {0, 0, 2, 0, 0}, - {1, 1, 3, 2, 1}, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Precision::UNSPECIFIED, - InferenceEngine::Layout::ANY, - InferenceEngine::Layout::ANY, + ov::test::static_shapes_to_test_representation( + std::vector({{1, 1, 3, 2, 1}})), + ov::element::u8, ov::test::utils::DEVICE_GPU), }; }; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_depth.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_depth.cpp index aee64379a6796c..d95dd31f6ef731 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_depth.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/space_to_depth.cpp @@ -2,46 +2,43 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include - -#include "single_layer_tests/space_to_depth.hpp" +#include "single_op_tests/space_to_depth.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace ngraph::opset3; - namespace { -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::U8, - InferenceEngine::Precision::I16, +using ov::test::SpaceToDepthLayerTest; +using ov::op::v0::SpaceToDepth; + +const std::vector inputPrecisions = { + ov::element::f32, + ov::element::u8, + ov::element::i16, }; const std::vector modes = { SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST}; -const std::vector> inputShapesBS2 = { - {1, 1, 2, 2}, {1, 1, 4, 4}, {1, 1, 6, 6}, {2, 8, 6, 6}, {2, 4, 10, 8}, - {1, 1, 2, 2, 2}, {1, 1, 4, 4, 4}, {1, 1, 6, 6, 6}, {2, 8, 6, 6, 6}, {2, 4, 10, 8, 12}}; +const std::vector> inputShapesBS2 = { + {{1, 1, 2, 2}}, {{1, 1, 4, 4}}, {{1, 1, 6, 6}}, {{2, 8, 6, 6}}, {{2, 4, 10, 8}}, + {{1, 1, 2, 2, 2}}, {{1, 1, 4, 4, 4}}, {{1, 1, 6, 6, 6}}, {{2, 8, 6, 6, 6}}, {{2, 4, 10, 8, 12}}}; INSTANTIATE_TEST_SUITE_P(SpaceToDepthBS2, SpaceToDepthLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapesBS2), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesBS2)), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(modes), ::testing::Values(2), ::testing::Values(ov::test::utils::DEVICE_GPU)), SpaceToDepthLayerTest::getTestCaseName); -const std::vector> inputShapesBS3 = { - {1, 1, 3, 3}, {1, 1, 6, 6}, {1, 1, 9, 9}, {2, 4, 9, 9}, {2, 3, 15, 12}, - {1, 1, 3, 3, 3}, {1, 1, 6, 6, 6}, {1, 1, 9, 9, 9}, {2, 4, 9, 9, 9}, {2, 3, 15, 12, 18}}; +const std::vector> inputShapesBS3 = { + {{1, 1, 3, 3}}, {{1, 1, 6, 6}}, {{1, 1, 9, 9}}, {{2, 4, 9, 9}}, {{2, 3, 15, 12}}, + {{1, 1, 3, 3, 3}}, {{1, 1, 6, 6, 6}}, {{1, 1, 9, 9, 9}}, {{2, 4, 9, 9, 9}}, {{2, 3, 15, 12, 18}}}; INSTANTIATE_TEST_SUITE_P(SpaceToDepthBS3, SpaceToDepthLayerTest, - ::testing::Combine(::testing::ValuesIn(inputShapesBS3), + ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapesBS3)), ::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(modes), ::testing::Values(3), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/split.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/split.cpp index fd55e5fc6551ee..96d11a46349e6d 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/split.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/split.cpp @@ -1,20 +1,16 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // - -#include - -#include "single_layer_tests/split.hpp" +#include "single_op_tests/split.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::SplitLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I64 +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16, + ov::element::i64 }; INSTANTIATE_TEST_SUITE_P(smoke_NumSplitsCheck, SplitLayerTest, @@ -22,11 +18,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_NumSplitsCheck, SplitLayerTest, ::testing::Values(1), ::testing::Values(0, 1, 2, 3), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({30, 30, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{30, 30, 30, 30}}))), ::testing::Values(std::vector({})), ::testing::Values(ov::test::utils::DEVICE_GPU)), SplitLayerTest::getTestCaseName); @@ -36,11 +29,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest, ::testing::Values(5), ::testing::Values(0), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({30, 30, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation( + std::vector({{30, 30, 30, 30}}))), ::testing::Values(std::vector({0, 3})), ::testing::Values(ov::test::utils::DEVICE_GPU)), SplitLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp index 92f0dcdb7a6f90..4d09bc71d650b2 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/squeeze_unsqueeze.cpp @@ -2,63 +2,62 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/squeeze_unsqueeze.hpp" +#include "single_op_tests/squeeze_unsqueeze.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { -std::map, std::vector>> axesVectors = { - {{1, 1, 1, 1}, {{-1}, {0}, {1}, {2}, {3}, {0, 1}, {0, 2}, {0, 3}, {1, 2}, {2, 3}, {0, 1, 2}, {0, 2, 3}, {1, 2, 3}, {0, 1, 2, 3}}}, - {{1, 2, 3, 4}, {{0}}}, - {{2, 1, 3, 4}, {{1}}}, - {{1}, {{-1}, {0}}}, - {{1, 2}, {{0}}}, - {{2, 1}, {{1}, {-1}}}, +using ov::test::SqueezeUnsqueezeLayerTest; +using ov::test::utils::SqueezeOpType; + +std::map, std::vector>> axesVectors = { + {{{1, 1, 1, 1}}, {{-1}, {0}, {1}, {2}, {3}, {0, 1}, {0, 2}, {0, 3}, {1, 2}, {2, 3}, {0, 1, 2}, {0, 2, 3}, {1, 2, 3}, {0, 1, 2, 3}}}, + {{{1, 2, 3, 4}}, {{0}}}, + {{{2, 1, 3, 4}}, {{1}}}, + {{{1}}, {{-1}, {0}}}, + {{{1, 2}}, {{0}}}, + {{{2, 1}}, {{1}, {-1}}}, +}; + +std::map, std::vector>> emptyAxesVectors = { + {{{1, 1, 1, 1}}, {{}}}, + {{{1, 2, 3, 4}}, {{}}}, + {{{2, 1, 3, 4}}, {{}}}, + {{{1}}, {{}}}, + {{{1, 2}}, {{}}}, + {{{2, 1}}, {{}}}, }; -std::map, std::vector>> emptyAxesVectors = { - {{1, 1, 1, 1}, {{}}}, - {{1, 2, 3, 4}, {{}}}, - {{2, 1, 3, 4}, {{}}}, - {{1}, {{}}}, - {{1, 2}, {{}}}, - {{2, 1}, {{}}}, +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 }; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector opTypes = { + SqueezeOpType::SQUEEZE, + SqueezeOpType::UNSQUEEZE }; -const std::vector opTypes = { - ngraph::helpers::SqueezeOpType::SQUEEZE, - ngraph::helpers::SqueezeOpType::UNSQUEEZE +auto prepare_cases = [](const std::vector, std::vector>>& raw_axes) { + std::vector, std::vector>> cases; + for (const auto& raw_case : raw_axes) + cases.emplace_back(ov::test::static_shapes_to_test_representation(raw_case.first), + raw_case.second); + return cases; }; INSTANTIATE_TEST_SUITE_P(smoke_Basic, SqueezeUnsqueezeLayerTest, ::testing::Combine( - ::testing::ValuesIn(ov::test::utils::combineParams(axesVectors)), + ::testing::ValuesIn(prepare_cases(ov::test::utils::combineParams(axesVectors))), ::testing::ValuesIn(opTypes), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU)), SqueezeUnsqueezeLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Basic_emptyAxes, SqueezeUnsqueezeLayerTest, ::testing::Combine( - ::testing::ValuesIn(ov::test::utils::combineParams(emptyAxesVectors)), - ::testing::Values(ngraph::helpers::SqueezeOpType::SQUEEZE), + ::testing::ValuesIn(prepare_cases(ov::test::utils::combineParams(emptyAxesVectors))), + ::testing::Values(SqueezeOpType::SQUEEZE), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(ov::test::utils::DEVICE_GPU)), SqueezeUnsqueezeLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp index 7b5b72856e758a..45677eb87b99e9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/tile.cpp @@ -2,16 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/tile.hpp" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/tile.hpp" namespace { +using ov::test::TileLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32 +const std::vector netPrecisions = { + ov::element::f32 }; const std::vector> repeats = { @@ -27,11 +24,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Tile, TileLayerTest, ::testing::Combine( ::testing::ValuesIn(repeats), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({2, 3, 4})), + ::testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{2, 3, 4}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), TileLayerTest::getTestCaseName); @@ -39,11 +32,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Tile6d, TileLayerTest, ::testing::Combine( ::testing::Values(std::vector({1, 1, 1, 2, 1, 2})), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 3, 1, 3, 1})), + ::testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{1, 4, 3, 1, 3, 1}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), TileLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp index 4675dad43de93b..981e77d2f5fe25 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp @@ -2,29 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "single_layer_tests/transpose.hpp" +#include "single_op_tests/transpose.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::TransposeLayerTest; -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, +const std::vector netPrecisions = { + ov::element::f32, }; /** * 4D permute tests */ -const std::vector> inputShapes = { - std::vector{1, 3, 100, 100}, +const std::vector> inputShapes = { + {{1, 3, 100, 100}}, // use permute_8x8_4x4 kernel - std::vector{2, 8, 64, 64}, - std::vector{2, 5, 64, 64}, - std::vector{2, 8, 64, 5}, - std::vector{2, 5, 64, 5}, + {{2, 8, 64, 64}}, + {{2, 5, 64, 64}}, + {{2, 8, 64, 5}}, + {{2, 5, 64, 5}}, }; const std::vector> inputOrder = { @@ -39,22 +36,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Transpose, TransposeLayerTest, testing::Combine(testing::ValuesIn(inputOrder), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes)), testing::Values(ov::test::utils::DEVICE_GPU)), TransposeLayerTest::getTestCaseName); /** * 5D permute tests */ -const std::vector> inputShapes5D = { - std::vector{2, 3, 4, 12, 64}, - std::vector{2, 5, 11, 32, 32}, - std::vector{2, 8, 64, 32, 5}, - std::vector{2, 5, 64, 32, 5}, +const std::vector> inputShapes5D = { + {{2, 3, 4, 12, 64}}, + {{2, 5, 11, 32, 32}}, + {{2, 8, 64, 32, 5}}, + {{2, 5, 64, 32, 5}}, }; const std::vector> inputOrder5D = { @@ -71,22 +64,18 @@ INSTANTIATE_TEST_SUITE_P(smoke_Transpose_5D, TransposeLayerTest, testing::Combine(testing::ValuesIn(inputOrder5D), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes5D), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes5D)), testing::Values(ov::test::utils::DEVICE_GPU)), TransposeLayerTest::getTestCaseName); /** * 6D permute tests */ -const std::vector> inputShapes6D = { - std::vector{2, 8, 5, 13, 11, 16}, - std::vector{2, 11, 6, 2, 15, 10}, - std::vector{2, 13, 1, 3, 14, 32}, - std::vector{2, 14, 3, 4, 4, 22}, +const std::vector> inputShapes6D = { + {{2, 8, 5, 13, 11, 16}}, + {{2, 11, 6, 2, 15, 10}}, + {{2, 13, 1, 3, 14, 32}}, + {{2, 14, 3, 4, 4, 22}}, }; const std::vector> inputOrder6D = { @@ -101,19 +90,15 @@ INSTANTIATE_TEST_SUITE_P(smoke_Transpose_6D, TransposeLayerTest, testing::Combine(testing::ValuesIn(inputOrder6D), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes6D), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes6D)), testing::Values(ov::test::utils::DEVICE_GPU)), TransposeLayerTest::getTestCaseName); /** * 8D permute tests */ -const std::vector> inputShapes8D = { - std::vector{1, 2, 3, 4, 5, 6, 7, 8}, +const std::vector> inputShapes8D = { + {{1, 2, 3, 4, 5, 6, 7, 8}}, }; const std::vector> inputOrder8D = { @@ -124,11 +109,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Transpose_8D, TransposeLayerTest, testing::Combine(testing::ValuesIn(inputOrder8D), testing::ValuesIn(netPrecisions), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Precision::UNSPECIFIED), - testing::Values(InferenceEngine::Layout::ANY), - testing::Values(InferenceEngine::Layout::ANY), - testing::ValuesIn(inputShapes8D), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes8D)), testing::Values(ov::test::utils::DEVICE_GPU)), TransposeLayerTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp index 739373173615aa..dc873c25af6643 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp @@ -1,40 +1,32 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // - -#include - -#include "single_layer_tests/variadic_split.hpp" +#include "single_op_tests/variadic_split.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; - namespace { +using ov::test::VariadicSplitLayerTest; - const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 - }; +const std::vector netPrecisions = { + ov::element::f32, + ov::element::f16 +}; - // Sum of elements numSplits = inputShapes[Axis] - const std::vector> numSplits = { - {1, 16, 5, 8}, - {2, 19, 5, 4}, - {7, 13, 2, 8}, - {5, 8, 12, 5}, - {4, 11, 6, 9} - }; +// Sum of elements numSplits = inputShapes[Axis] +const std::vector> numSplits = { + {1, 16, 5, 8}, + {2, 19, 5, 4}, + {7, 13, 2, 8}, + {5, 8, 12, 5}, + {4, 11, 6, 9} +}; - INSTANTIATE_TEST_SUITE_P(smoke_NumSplitsCheck, VariadicSplitLayerTest, - ::testing::Combine( +INSTANTIATE_TEST_SUITE_P(smoke_NumSplitsCheck, VariadicSplitLayerTest, + ::testing::Combine( ::testing::ValuesIn(numSplits), ::testing::Values(0, 1, 2, 3), ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({30, 30, 30, 30})), + ::testing::Values(ov::test::static_shapes_to_test_representation(std::vector({{30, 30, 30, 30}}))), ::testing::Values(ov::test::utils::DEVICE_GPU)), - VariadicSplitLayerTest::getTestCaseName); + VariadicSplitLayerTest::getTestCaseName); } // namespace From 6210deba495922eb4e58d9be5bc7957b9c0b5767 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 8 Nov 2023 10:52:21 +0100 Subject: [PATCH 231/275] [core]Migrate FakeQuantize operator to new API (#20895) * Migrate FakeQuantize operator to new API * Minor refactor in FakeQuantize reference re-use existing functions in `get_inner_stride` --- .../include/openvino/op/fake_quantize.hpp | 4 +- .../openvino/reference/fake_quantize.hpp | 20 +- src/core/src/op/fake_quantize.cpp | 195 +++++++++--------- 3 files changed, 102 insertions(+), 117 deletions(-) diff --git a/src/core/include/openvino/op/fake_quantize.hpp b/src/core/include/openvino/op/fake_quantize.hpp index 9e2491aeffa562..d40cb174f2bbe2 100644 --- a/src/core/include/openvino/op/fake_quantize.hpp +++ b/src/core/include/openvino/op/fake_quantize.hpp @@ -67,9 +67,7 @@ class OPENVINO_API FakeQuantize : public Op { m_auto_broadcast = auto_broadcast; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override { return false; diff --git a/src/core/reference/include/openvino/reference/fake_quantize.hpp b/src/core/reference/include/openvino/reference/fake_quantize.hpp index 2fb30a4a5c492b..a731290c2516b1 100644 --- a/src/core/reference/include/openvino/reference/fake_quantize.hpp +++ b/src/core/reference/include/openvino/reference/fake_quantize.hpp @@ -318,19 +318,15 @@ std::tuple get_inner_stride(size_t num_output_elements, return (last == 1 && dim > 1) || (last > 1 && dim == 1); }); if (it == shape.rend()) { - const size_t num_elements = shape_size(shape); - return std::tuple{ - num_elements, - last == 1 ? current_output_inner_stride : std::min(current_output_inner_stride, num_elements)}; + const auto num_elements = shape_size(shape); + return {num_elements, + last == 1 ? current_output_inner_stride : std::min(current_output_inner_stride, num_elements)}; } - const size_t idx = std::distance(it, shape.rbegin()) + static_cast(shape.size()); - const size_t inner_stride = - std::accumulate(shape.begin() + idx, shape.end(), static_cast(1), std::multiplies()); - const size_t output_inner_stride = std::accumulate(output_shape.begin() + output_shape.size() - shape.size() + idx, - output_shape.end(), - static_cast(1), - std::multiplies()); - return std::tuple{inner_stride, std::min(current_output_inner_stride, output_inner_stride)}; + const auto idx = std::distance(it, shape.rbegin()) + static_cast(shape.size()); + const auto inner_stride = shape_size(shape.begin() + idx, shape.end()); + const auto output_inner_stride = + shape_size(output_shape.begin() + (output_shape.size() - shape.size() + idx), output_shape.end()); + return {inner_stride, std::min(current_output_inner_stride, output_inner_stride)}; } template diff --git a/src/core/src/op/fake_quantize.cpp b/src/core/src/op/fake_quantize.cpp index f558d090723e32..9b7ba0e991ae11 100644 --- a/src/core/src/op/fake_quantize.cpp +++ b/src/core/src/op/fake_quantize.cpp @@ -2,51 +2,81 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/fake_quantize.hpp" - -#include +#include "openvino/op/fake_quantize.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/attribute_visitor.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/op/convert.hpp" -#include "ngraph/op/select.hpp" -#include "ngraph/shape.hpp" -#include "ngraph/type/element_type.hpp" #include "openvino/reference/fake_quantize.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace fake_quantize { + +struct Evaluate : element::NoAction { + using element::NoAction::visit; + + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + const Tensor& arg2, + const Tensor& arg3, + const Tensor& arg4, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const Shape& shape2, + const Shape& shape3, + const Shape& shape4, + const size_t levels, + const AutoBroadcastSpec& broadcast_spec) { + reference::fake_quantize(arg0.data(), + arg1.data(), + arg2.data(), + arg3.data(), + arg4.data(), + out.data(), + shape0, + shape1, + shape2, + shape3, + shape4, + levels, + broadcast_spec); + return true; + } +}; +} // namespace fake_quantize +namespace v0 { -op::FakeQuantize::FakeQuantize() : Op(), m_levels() {} +FakeQuantize::FakeQuantize() : Op(), m_levels() {} -op::FakeQuantize::FakeQuantize(const Output& data, - const Output& input_low, - const Output& input_high, - const Output& output_low, - const Output& output_high, - size_t levels, - const AutoBroadcastSpec& auto_broadcast) +FakeQuantize::FakeQuantize(const Output& data, + const Output& input_low, + const Output& input_high, + const Output& output_low, + const Output& output_high, + size_t levels, + const AutoBroadcastSpec& auto_broadcast) : Op({data, input_low, input_high, output_low, output_high}), m_levels(levels), m_auto_broadcast(auto_broadcast) { constructor_validate_and_infer_types(); } -void op::FakeQuantize::validate_and_infer_types() { +void FakeQuantize::validate_and_infer_types() { OV_OP_SCOPE(v0_FakeQuantize_validate_and_infer_types); - ov::PartialShape data_pshape = get_input_partial_shape(0); + auto data_pshape = get_input_partial_shape(0); for (auto i = 1; i <= 4; i++) { if (m_auto_broadcast.m_type == op::AutoBroadcastType::NONE) { NODE_VALIDATION_CHECK(this, - ov::PartialShape::merge_into(data_pshape, get_input_partial_shape(i)), + PartialShape::merge_into(data_pshape, get_input_partial_shape(i)), "Argument shapes are inconsistent."); } else if (m_auto_broadcast.m_type == op::AutoBroadcastType::NUMPY || m_auto_broadcast.m_type == op::AutoBroadcastType::PDPD) { NODE_VALIDATION_CHECK( this, - ov::PartialShape::broadcast_merge_into(data_pshape, get_input_partial_shape(i), m_auto_broadcast), + PartialShape::broadcast_merge_into(data_pshape, get_input_partial_shape(i), m_auto_broadcast), "Argument shapes are inconsistent."); } else { NODE_VALIDATION_CHECK(this, false, "Unsupported auto broadcast specification"); @@ -55,103 +85,64 @@ void op::FakeQuantize::validate_and_infer_types() { set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); } -bool ngraph::op::v0::FakeQuantize::visit_attributes(AttributeVisitor& visitor) { +bool FakeQuantize::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_FakeQuantize_visit_attributes); visitor.on_attribute("levels", m_levels); visitor.on_attribute("auto_broadcast", m_auto_broadcast); return true; } -shared_ptr op::FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_FakeQuantize_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), // X - new_args.at(1), // input_low - new_args.at(2), // input_high - new_args.at(3), // output_low - new_args.at(4), // output_high - m_levels, - m_auto_broadcast); + return std::make_shared(new_args.at(0), // X + new_args.at(1), // input_low + new_args.at(2), // input_high + new_args.at(3), // output_low + new_args.at(4), // output_high + m_levels, + m_auto_broadcast); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace fakequantizeop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& arg2, - const HostTensorPtr& arg3, - const HostTensorPtr& arg4, - const HostTensorPtr& out, - const ngraph::op::FakeQuantize* parent) { +bool FakeQuantize::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_FakeQuantize_evaluate); - using T = typename element_type_traits::value_type; - out->set_shape(arg0->get_shape()); - out->set_element_type(arg0->get_element_type()); - ov::reference::fake_quantize(arg0->get_data_ptr(), - arg1->get_data_ptr(), - arg2->get_data_ptr(), - arg3->get_data_ptr(), - arg4->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - arg2->get_shape(), - arg3->get_shape(), - arg4->get_shape(), - parent->get_levels(), - parent->get_auto_broadcast()); - return true; -} + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 5); -bool evaluate_fakequantize(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& arg2, - const HostTensorPtr& arg3, - const HostTensorPtr& arg4, - const HostTensorPtr& out, - const ngraph::op::FakeQuantize* parent) { - bool rc = true; - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_fakequantize, i32, arg0, arg1, arg2, arg3, arg4, out, parent); - OPENVINO_TYPE_CASE(evaluate_fakequantize, i64, arg0, arg1, arg2, arg3, arg4, out, parent); - OPENVINO_TYPE_CASE(evaluate_fakequantize, u32, arg0, arg1, arg2, arg3, arg4, out, parent); - OPENVINO_TYPE_CASE(evaluate_fakequantize, u64, arg0, arg1, arg2, arg3, arg4, out, parent); - OPENVINO_TYPE_CASE(evaluate_fakequantize, f16, arg0, arg1, arg2, arg3, arg4, out, parent); - OPENVINO_TYPE_CASE(evaluate_fakequantize, f32, arg0, arg1, arg2, arg3, arg4, out, parent); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace fakequantizeop + const auto& shape0 = inputs[0].get_shape(); + outputs[0].set_shape(shape0); -bool ngraph::op::FakeQuantize::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_FakeQuantize_evaluate); - return fakequantizeop::evaluate_fakequantize(inputs[0], - inputs[1], - inputs[2], - inputs[3], - inputs[4], - outputs[0], - this); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + inputs[2], + inputs[3], + inputs[4], + outputs[0], + shape0, + inputs[1].get_shape(), + inputs[2].get_shape(), + inputs[3].get_shape(), + inputs[4].get_shape(), + get_levels(), + get_auto_broadcast()); } -bool ngraph::op::FakeQuantize::has_evaluate() const { +bool FakeQuantize::has_evaluate() const { OV_OP_SCOPE(v0_FakeQuantize_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From b8eea7bf8478b87eca0b3f844de694a5164b841a Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 8 Nov 2023 10:53:20 +0100 Subject: [PATCH 232/275] [core]Migrate Multiply operator to new API (#20853) * Migrate Multiply operator to new API * Add comment explain use of custom multiply * Update custom multiply comment Co-authored-by: Tomasz Jankowski --------- Co-authored-by: Tomasz Jankowski --- src/core/include/openvino/op/multiply.hpp | 4 +- .../include/openvino/reference/multiply.hpp | 31 +++-- src/core/src/op/multiply.cpp | 110 ++++++++---------- 3 files changed, 71 insertions(+), 74 deletions(-) diff --git a/src/core/include/openvino/op/multiply.hpp b/src/core/include/openvino/op/multiply.hpp index 2e2f3bd4c73000..0fb82273056017 100644 --- a/src/core/include/openvino/op/multiply.hpp +++ b/src/core/include/openvino/op/multiply.hpp @@ -29,9 +29,7 @@ class OPENVINO_API Multiply : public util::BinaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/multiply.hpp b/src/core/reference/include/openvino/reference/multiply.hpp index 91d279cc6935da..8394d58ecc95aa 100644 --- a/src/core/reference/include/openvino/reference/multiply.hpp +++ b/src/core/reference/include/openvino/reference/multiply.hpp @@ -4,21 +4,36 @@ #pragma once +#include #include -#include "openvino/core/shape.hpp" -#include "openvino/op/util/attr_types.hpp" #include "openvino/reference/autobroadcast_binop.hpp" namespace ov { namespace reference { +namespace func { +// Usage of custom function instead of std::multiplies gives smaller binary size. +template +constexpr T multiply(const T a, const T b) { + return a * b; +} +} // namespace func + template -void multiply(const T* arg0, const T* arg1, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = arg0[i] * arg1[i]; - } +void multiply(const T* arg0, const T* arg1, T* out, const size_t count) { + std::transform(arg0, arg0 + count, arg1, out, func::multiply); } +/** + * @brief Reference implementation of binary elementwise Multiply operator. + * + * @param arg0 Pointer to input 0 data. + * @param arg1 Pointer to input 1 data. + * @param out Pointer to output data. + * @param arg_shape0 Input 0 shape. + * @param arg_shape1 Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ template void multiply(const T* arg0, const T* arg1, @@ -26,9 +41,7 @@ void multiply(const T* arg0, const Shape& arg0_shape, const Shape& arg1_shape, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T { - return x * y; - }); + autobroadcast_binop(arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, func::multiply); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/multiply.cpp b/src/core/src/op/multiply.cpp index b30c2adaa7d6c5..2ae5f4304cfcbe 100644 --- a/src/core/src/op/multiply.cpp +++ b/src/core/src/op/multiply.cpp @@ -2,90 +2,76 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/multiply.hpp" +#include "openvino/op/multiply.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/multiply.hpp" +#include "utils.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace multiply { +struct Evaluate : element::NoAction { + using element::NoAction::visit; -OPENVINO_SUPPRESS_DEPRECATED_START -namespace multiplyop { -namespace { -template -bool evaluate(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::multiply(arg0->get_data_ptr(), - arg1->get_data_ptr(), - out->get_data_ptr(), - arg0->get_shape(), - arg1->get_shape(), - broadcast_spec); - return true; -} - -bool evaluate_multiply(const HostTensorPtr& arg0, - const HostTensorPtr& arg1, - const HostTensorPtr& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - out->set_broadcast(broadcast_spec, arg0, arg1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_multiply, i32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, i64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, u32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, u64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, f16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, f32, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, f64, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, bf16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, u8, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, i16, arg0, arg1, out, broadcast_spec); - OPENVINO_TYPE_CASE(evaluate_multiply, u16, arg0, arg1, out, broadcast_spec); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& arg0, + const Tensor& arg1, + Tensor& out, + const Shape& shape0, + const Shape& shape1, + const AutoBroadcastSpec& broadcast_spec) { + reference::multiply(arg0.data(), arg1.data(), out.data(), shape0, shape1, broadcast_spec); + return true; } - return rc; -} -} // namespace -} // namespace multiplyop +}; +} // namespace multiply // ------------------------------------ v1 ------------------------------------- -op::v1::Multiply::Multiply(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +Multiply::Multiply(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Multiply::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Multiply::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Multiply_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + return std::make_shared(new_args.at(0), new_args.at(1), get_autob()); } -bool op::v1::Multiply::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool Multiply::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_Multiply_evaluate); - return multiplyop::evaluate_multiply(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + outputs[0].set_shape(infer_broadcast_shape(this, inputs)); + + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + inputs[0].get_shape(), + inputs[1].get_shape(), + get_autob()); } -bool op::v1::Multiply::has_evaluate() const { +bool Multiply::has_evaluate() const { OV_OP_SCOPE(v1_Multiply_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: - case ngraph::element::f64: - case ngraph::element::bf16: + case element::bf16: + case element::f16: + case element::f32: + case element::f64: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov From ace986cac0b63fcf002acf0df82bdfc7e1952f64 Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Wed, 8 Nov 2023 11:06:28 +0100 Subject: [PATCH 233/275] Refactor GenerateProposalsLayerTest, GridSampleLayerTest (#20772) * Refactor GenerateProposalsLayerTest * Refactor GridSampleLayerTest * Fix * Apply comments * Apply comments --- .../single_layer_tests/generate_proposals.cpp | 220 +----------------- .../single_layer_tests/grid_sample.cpp | 40 ++-- .../skip_tests_config.cpp | 2 + .../single_op_tests/generate_proposals.hpp | 15 ++ .../include/single_op_tests/grid_sample.hpp | 15 ++ .../single_op/generate_proposals.hpp | 34 +++ .../single_op/grid_sample.hpp | 32 +++ .../src/single_op/generate_proposals.cpp | 95 ++++++++ .../src/single_op/grid_sample.cpp | 61 +++++ 9 files changed, 276 insertions(+), 238 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/generate_proposals.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/grid_sample.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/generate_proposals.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/grid_sample.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/generate_proposals.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/grid_sample.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp index f91380d972a77a..d298befa488324 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp @@ -2,208 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "single_layer_tests/generate_proposals.hpp" -#include "common_test_utils/ov_tensor_utils.hpp" - -using namespace ov::test; -using namespace ov::test::subgraph; +#include "single_op_tests/generate_proposals.hpp" namespace { +using ov::test::GenerateProposalsLayerTest; +using ov::test::InputShape; const std::vector min_size = { 1 }; const std::vector nms_threshold = { 0.699999988079071f }; const std::vector post_nms_count = { 6 }; const std::vector pre_nms_count = { 1000 }; -const std::vector>> inputTensors = { - { - "empty", - { - // 3 - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 3}, {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f}), - // 2 x 6 x 3 x 4 = 144 - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 6, 3, 4}, std::vector(144, 1.0f)), - // 2 x 12 x 2 x 6 = 144 * 2 - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 12, 2, 6}, std::vector(288, 1.0f)), - // {2 x 3 x 2 x 6} = 36 * 2 - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 3, 2, 6}, { - 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f, - 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f}) - } - }, - { - "filled", - { - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 3}, {150.0f, 150.0f, 0.0f, 150.0f, 150.0f, 0.0f}), - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 6, 3, 4}, { - 12.0f, 68.0f, 102.0f, 123.0f, 46.0f, 80.0f, 79.0f, 128.0f, 33.0f, 71.0f, 127.0f, 86.0f, 33.0f, 56.0f, 150.0f, 73.0f, - 5.0f, 41.0f, 93.0f, 150.0f, 74.0f, 66.0f, 106.0f, 115.0f, 17.0f, 37.0f, 87.0f, 150.0f, 31.0f, 27.0f, 150.0f, 39.0f, - 29.0f, 23.0f, 112.0f, 123.0f, 41.0f, 37.0f, 103.0f, 150.0f, 8.0f, 46.0f, 98.0f, 111.0f, 7.0f, 69.0f, 114.0f, 150.0f, - 70.0f, 21.0f, 150.0f, 125.0f, 54.0f, 19.0f, 132.0f, 68.0f, 62.0f, 8.0f, 150.0f, 101.0f, 57.0f, 81.0f, 150.0f, 97.0f, - 79.0f, 29.0f, 109.0f, 130.0f, 12.0f, 63.0f, 100.0f, 150.0f, 17.0f, 33.0f, 113.0f, 150.0f, 90.0f, 78.0f, 150.0f, 111.0f, - 47.0f, 68.0f, 150.0f, 71.0f, 66.0f, 103.0f, 111.0f, 150.0f, 4.0f, 17.0f, 112.0f, 94.0f, 12.0f, 8.0f, 119.0f, 98.0f, - 54.0f, 56.0f, 120.0f, 150.0f, 56.0f, 29.0f, 150.0f, 31.0f, 42.0f, 3.0f, 139.0f, 92.0f, 41.0f, 65.0f, 150.0f, 130.0f, - 49.0f, 13.0f, 143.0f, 30.0f, 40.0f, 60.0f, 150.0f, 150.0f, 23.0f, 73.0f, 24.0f, 115.0f, 56.0f, 84.0f, 107.0f, 108.0f, - 63.0f, 8.0f, 142.0f, 125.0f, 78.0f, 37.0f, 93.0f, 144.0f, 40.0f, 34.0f, 150.0f, 46.0f, 30.0f, 21.0f, 150.0f, 120.0f}), - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 12, 2, 6}, { - 9.062256f, 10.883133f, 9.8441105f, 12.694285f, 0.41781136f, 8.749107f, 14.990341f, 6.587644f, 1.4206103f, - 13.299262f, 12.432549f, 2.736371f, 0.22732796f, 6.3361835f, 12.268727f, 2.1009045f, 4.771589f, 2.5131326f, - 5.610736f, 9.3604145f, 4.27379f, 8.317948f, 0.60510135f, 6.7446275f, 1.0207708f, 1.1352817f, 1.5785321f, - 1.718335f, 1.8093798f, 0.99247587f, 1.3233583f, 1.7432803f, 1.8534478f, 1.2593061f, 1.7394226f, 1.7686696f, - 1.647999f, 1.7611449f, 1.3119122f, 0.03007332f, 1.1106564f, 0.55669737f, 0.2546148f, 1.9181818f, 0.7134989f, - 2.0407224f, 1.7211134f, 1.8565536f, 14.562747f, 2.8786168f, 0.5927796f, 0.2064463f, 7.6794515f, 8.672126f, - 10.139171f, 8.002429f, 7.002932f, 12.6314945f, 10.550842f, 0.15784842f, 0.3194304f, 10.752157f, 3.709805f, - 11.628928f, 0.7136225f, 14.619964f, 15.177284f, 2.2824087f, 15.381494f, 0.16618137f, 7.507227f, 11.173228f, - 0.4923559f, 1.8227729f, 1.4749299f, 1.7833921f, 1.2363617f, -0.23659119f, 1.5737582f, 1.779316f, 1.9828427f, - 1.0482665f, 1.4900246f, 1.3563544f, 1.5341306f, 0.7634312f, 4.6216766e-05f, 1.6161222f, 1.7512476f, 1.9363779f, - 0.9195784f, 1.4906164f, -0.03244795f, 0.681073f, 0.6192401f, 1.8033613f, 14.146055f, 3.4043705f, 15.292292f, - 3.5295358f, 11.138999f, 9.952057f, 5.633434f, 12.114562f, 9.427372f, 12.384038f, 9.583308f, 8.427233f, - 15.293704f, 3.288159f, 11.64898f, 9.350885f, 2.0037227f, 13.523184f, 4.4176426f, 6.1057625f, 14.400079f, - 8.248259f, 11.815807f, 15.713364f, 1.0023532f, 1.3203261f, 1.7100681f, 0.7407832f, 1.09448f, 1.7188418f, - 1.4412547f, 1.4862992f, 0.74790007f, 0.31571656f, 0.6398838f, 2.0236106f, 1.1869069f, 1.7265586f, 1.2624544f, - 0.09934269f, 1.3508598f, 0.85212964f, -0.38968498f, 1.7059708f, 1.6533034f, 1.7400402f, 1.8123854f, -0.43063712f, - 9.062256f, 10.883133f, 9.8441105f, 12.694285f, 0.41781136f, 8.749107f, 14.990341f, 6.587644f, 1.4206103f, - 13.299262f, 12.432549f, 2.736371f, 0.22732796f, 6.3361835f, 12.268727f, 2.1009045f, 4.771589f, 2.5131326f, - 5.610736f, 9.3604145f, 4.27379f, 8.317948f, 0.60510135f, 6.7446275f, 1.0207708f, 1.1352817f, 1.5785321f, - 1.718335f, 1.8093798f, 0.99247587f, 1.3233583f, 1.7432803f, 1.8534478f, 1.2593061f, 1.7394226f, 1.7686696f, - 1.647999f, 1.7611449f, 1.3119122f, 0.03007332f, 1.1106564f, 0.55669737f, 0.2546148f, 1.9181818f, 0.7134989f, - 2.0407224f, 1.7211134f, 1.8565536f, 14.562747f, 2.8786168f, 0.5927796f, 0.2064463f, 7.6794515f, 8.672126f, - 10.139171f, 8.002429f, 7.002932f, 12.6314945f, 10.550842f, 0.15784842f, 0.3194304f, 10.752157f, 3.709805f, - 11.628928f, 0.7136225f, 14.619964f, 15.177284f, 2.2824087f, 15.381494f, 0.16618137f, 7.507227f, 11.173228f, - 0.4923559f, 1.8227729f, 1.4749299f, 1.7833921f, 1.2363617f, -0.23659119f, 1.5737582f, 1.779316f, 1.9828427f, - 1.0482665f, 1.4900246f, 1.3563544f, 1.5341306f, 0.7634312f, 4.6216766e-05f, 1.6161222f, 1.7512476f, 1.9363779f, - 0.9195784f, 1.4906164f, -0.03244795f, 0.681073f, 0.6192401f, 1.8033613f, 14.146055f, 3.4043705f, 15.292292f, - 3.5295358f, 11.138999f, 9.952057f, 5.633434f, 12.114562f, 9.427372f, 12.384038f, 9.583308f, 8.427233f, - 15.293704f, 3.288159f, 11.64898f, 9.350885f, 2.0037227f, 13.523184f, 4.4176426f, 6.1057625f, 14.400079f, - 8.248259f, 11.815807f, 15.713364f, 1.0023532f, 1.3203261f, 1.7100681f, 0.7407832f, 1.09448f, 1.7188418f, - 1.4412547f, 1.4862992f, 0.74790007f, 0.31571656f, 0.6398838f, 2.0236106f, 1.1869069f, 1.7265586f, 1.2624544f, - 0.09934269f, 1.3508598f, 0.85212964f, -0.38968498f, 1.7059708f, 1.6533034f, 1.7400402f, 1.8123854f, -0.43063712f}), - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 3, 2, 6}, { - 0.7719922f, 0.35906568f, 0.29054508f, 0.18124384f, 0.5604661f, 0.84750974f, 0.98948747f, 0.009793862f, 0.7184191f, - 0.5560748f, 0.6952493f, 0.6732593f, 0.3306898f, 0.6790913f, 0.41128764f, 0.34593266f, 0.94296855f, 0.7348507f, - 0.24478768f, 0.94024557f, 0.05405676f, 0.06466125f, 0.36244348f, 0.07942984f, 0.10619422f, 0.09412837f, 0.9053611f, - 0.22870538f, 0.9237487f, 0.20986171f, 0.5067282f, 0.29709867f, 0.53138554f, 0.189101f, 0.4786443f, 0.88421875f, - 0.7719922f, 0.35906568f, 0.29054508f, 0.18124384f, 0.5604661f, 0.84750974f, 0.98948747f, 0.009793862f, 0.7184191f, - 0.5560748f, 0.6952493f, 0.6732593f, 0.3306898f, 0.6790913f, 0.41128764f, 0.34593266f, 0.94296855f, 0.7348507f, - 0.24478768f, 0.94024557f, 0.05405676f, 0.06466125f, 0.36244348f, 0.07942984f, 0.10619422f, 0.09412837f, 0.9053611f, - 0.22870538f, 0.9237487f, 0.20986171f, 0.5067282f, 0.29709867f, 0.53138554f, 0.189101f, 0.4786443f, 0.88421875f}), - } - }, - { - "filled2", - { - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 3}, {200.0f, 200.0f, 4.0f, 200.0f, 200.0f, 4.0f}), - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 6, 3, 4}, {0.0f, 1.0f, 2.0f, 3.0f, - 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, - 20.0f, 21.0f, 22.0f, 23.0f, - 24.0f, 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, - 40.0f, 41.0f, 42.0f, 43.0f, - 44.0f, 45.0f, 46.0f, 47.0f, - 48.0f, 49.0f, 50.0f, 51.0f, - 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, - 60.0f, 61.0f, 62.0f, 63.0f, - 64.0f, 65.0f, 66.0f, 67.0f, - 68.0f, 69.0f, 70.0f, 71.0f, - 72.0f, 73.0f, 74.0f, 75.0f, - 76.0f, 77.0f, 78.0f, 79.0f, - 80.0f, 81.0f, 82.0f, 83.0f, - 84.0f, 85.0f, 86.0f, 87.0f, - 88.0f, 89.0f, 90.0f, 91.0f, - 92.0f, 93.0f, 94.0f, 95.0f, - 96.0f, 97.0f, 98.0f, 99.0f, - 100.0f, 101.0f, 102.0f, 103.0f, - 104.0f, 105.0f, 106.0f, 107.0f, - 108.0f, 109.0f, 110.0f, 111.0f, - 112.0f, 113.0f, 114.0f, 115.0f, - 116.0f, 117.0f, 118.0f, 119.0f, - 120.0f, 121.0f, 122.0f, 123.0f, - 124.0f, 125.0f, 126.0f, 127.0f, - 128.0f, 129.0f, 130.0f, 131.0f, - 132.0f, 133.0f, 134.0f, 135.0f, - 136.0f, 137.0f, 138.0f, 139.0f, - 140.0f, 141.0f, 142.0f, 143.0f}), - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 12, 2, 6}, { - 0.5337073f, 0.86607957f, 0.55151343f, 0.21626699f, 0.4462629f, 0.03985678f, - 0.5157072f, 0.9932138f, 0.7565954f, 0.43803605f, 0.802818f, 0.14834064f, - 0.53932905f, 0.14314f, 0.3817048f, 0.95075196f, 0.05516243f, 0.2567484f, - 0.25508744f, 0.77438325f, 0.43561f, 0.2094628f, 0.8299043f, 0.44982538f, - 0.95615596f, 0.5651084f, 0.11801951f, 0.05352486f, 0.9774733f, 0.14439464f, - 0.62644225f, 0.14370479f, 0.54161614f, 0.557915f, 0.53102225f, 0.0840179f, - 0.7249888f, 0.9843559f, 0.5490522f, 0.53788143f, 0.822474f, 0.3278008f, - 0.39688024f, 0.3286012f, 0.5117038f, 0.04743988f, 0.9408995f, 0.29885054f, - 0.81039643f, 0.85277915f, 0.06807619f, 0.86430097f, 0.36225632f, 0.16606331f, - 0.5401001f, 0.7541649f, 0.11998601f, 0.5131829f, 0.40606487f, 0.327888f, - 0.27721855f, 0.6378373f, 0.22795396f, 0.4961256f, 0.3215895f, 0.15607187f, - 0.14782153f, 0.8908137f, 0.8835288f, 0.834191f, 0.29907143f, 0.7983525f, - 0.755875f, 0.30837986f, 0.0839176f, 0.26624718f, 0.04371626f, 0.09472824f, - 0.20689541f, 0.37622106f, 0.1083321f, 0.1342548f, 0.05815459f, 0.7676379f, - 0.8105144f, 0.92348766f, 0.26761323f, 0.7183306f, 0.8947588f, 0.19020908f, - 0.42731014f, 0.7473663f, 0.85775334f, 0.9340091f, 0.3278848f, 0.755993f, - 0.05307213f, 0.39705503f, 0.21003333f, 0.5625373f, 0.66188884f, 0.80521655f, - 0.6125863f, 0.44678232f, 0.97802377f, 0.0204936f, 0.02686367f, 0.7390654f, - 0.74631f, 0.58399844f, 0.5988792f, 0.37413648f, 0.5946692f, 0.6955776f, - 0.36377597f, 0.7891322f, 0.40900692f, 0.99139464f, 0.50169915f, 0.41435778f, - 0.17142445f, 0.26761186f, 0.31591868f, 0.14249913f, 0.12919712f, 0.5418711f, - 0.6523203f, 0.50259084f, 0.7379765f, 0.01171071f, 0.94423133f, 0.00841132f, - 0.97486794f, 0.2921785f, 0.7633071f, 0.88477814f, 0.03563205f, 0.50833166f, - 0.01354555f, 0.535081f, 0.41366324f, 0.0694767f, 0.9944055f, 0.9981207f, - 0.5337073f, 0.86607957f, 0.55151343f, 0.21626699f, 0.4462629f, 0.03985678f, - 0.5157072f, 0.9932138f, 0.7565954f, 0.43803605f, 0.802818f, 0.14834064f, - 0.53932905f, 0.14314f, 0.3817048f, 0.95075196f, 0.05516243f, 0.2567484f, - 0.25508744f, 0.77438325f, 0.43561f, 0.2094628f, 0.8299043f, 0.44982538f, - 0.95615596f, 0.5651084f, 0.11801951f, 0.05352486f, 0.9774733f, 0.14439464f, - 0.62644225f, 0.14370479f, 0.54161614f, 0.557915f, 0.53102225f, 0.0840179f, - 0.7249888f, 0.9843559f, 0.5490522f, 0.53788143f, 0.822474f, 0.3278008f, - 0.39688024f, 0.3286012f, 0.5117038f, 0.04743988f, 0.9408995f, 0.29885054f, - 0.81039643f, 0.85277915f, 0.06807619f, 0.86430097f, 0.36225632f, 0.16606331f, - 0.5401001f, 0.7541649f, 0.11998601f, 0.5131829f, 0.40606487f, 0.327888f, - 0.27721855f, 0.6378373f, 0.22795396f, 0.4961256f, 0.3215895f, 0.15607187f, - 0.14782153f, 0.8908137f, 0.8835288f, 0.834191f, 0.29907143f, 0.7983525f, - 0.755875f, 0.30837986f, 0.0839176f, 0.26624718f, 0.04371626f, 0.09472824f, - 0.20689541f, 0.37622106f, 0.1083321f, 0.1342548f, 0.05815459f, 0.7676379f, - 0.8105144f, 0.92348766f, 0.26761323f, 0.7183306f, 0.8947588f, 0.19020908f, - 0.42731014f, 0.7473663f, 0.85775334f, 0.9340091f, 0.3278848f, 0.755993f, - 0.05307213f, 0.39705503f, 0.21003333f, 0.5625373f, 0.66188884f, 0.80521655f, - 0.6125863f, 0.44678232f, 0.97802377f, 0.0204936f, 0.02686367f, 0.7390654f, - 0.74631f, 0.58399844f, 0.5988792f, 0.37413648f, 0.5946692f, 0.6955776f, - 0.36377597f, 0.7891322f, 0.40900692f, 0.99139464f, 0.50169915f, 0.41435778f, - 0.17142445f, 0.26761186f, 0.31591868f, 0.14249913f, 0.12919712f, 0.5418711f, - 0.6523203f, 0.50259084f, 0.7379765f, 0.01171071f, 0.94423133f, 0.00841132f, - 0.97486794f, 0.2921785f, 0.7633071f, 0.88477814f, 0.03563205f, 0.50833166f, - 0.01354555f, 0.535081f, 0.41366324f, 0.0694767f, 0.9944055f, 0.9981207f}), - ov::test::utils::create_tensor(ov::element::f32, ov::Shape{2, 3, 2, 6}, { - 0.56637216f, 0.90457034f, 0.69827306f, 0.4353543f, 0.47985056f, 0.42658508f, - 0.14516132f, 0.08081771f, 0.1799732f, 0.9229515f, 0.42420176f, 0.50857586f, - 0.82664067f, 0.4972319f, 0.3752427f, 0.56731623f, 0.18241242f, 0.33252355f, - 0.30608943f, 0.6572437f, 0.69185436f, 0.88646156f, 0.36985755f, 0.5590753f, - 0.5256446f, 0.03342898f, 0.1344396f, 0.68642473f, 0.37953874f, 0.32575172f, - 0.21108444f, 0.5661886f, 0.45378175f, 0.62126315f, 0.26799858f, 0.37272978f, - 0.56637216f, 0.90457034f, 0.69827306f, 0.4353543f, 0.47985056f, 0.42658508f, - 0.14516132f, 0.08081771f, 0.1799732f, 0.9229515f, 0.42420176f, 0.50857586f, - 0.82664067f, 0.4972319f, 0.3752427f, 0.56731623f, 0.18241242f, 0.33252355f, - 0.30608943f, 0.6572437f, 0.69185436f, 0.88646156f, 0.36985755f, 0.5590753f, - 0.5256446f, 0.03342898f, 0.1344396f, 0.68642473f, 0.37953874f, 0.32575172f, - 0.21108444f, 0.5661886f, 0.45378175f, 0.62126315f, 0.26799858f, 0.37272978f}), - } - } -}; - const std::vector> dynamicInputShape = { // im_info / anchors / deltas / scores - static_shapes_to_test_representation({{2, 3}, {2, 6, 3, 4}, {2, 12, 2, 6}, {2, 3, 2, 6}}), + ov::test::static_shapes_to_test_representation({{2, 3}, {2, 6, 3, 4}, {2, 12, 2, 6}, {2, 3, 2, 6}}), { {{-1, -1}, {{2, 3}}}, {{-1, -1, -1, -1}, {{2, 6, 3, 4}}}, @@ -228,32 +40,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::ValuesIn(post_nms_count), ::testing::ValuesIn(pre_nms_count), ::testing::ValuesIn({true, false}), - ::testing::ValuesIn(inputTensors), ::testing::Values(ov::element::Type_t::f32), ::testing::Values(ov::element::Type_t::i32), ::testing::Values(ov::test::utils::DEVICE_CPU)), GenerateProposalsLayerTest::getTestCaseName); - -struct GenerateProposalsBenchmarkTest : ov::test::BenchmarkLayerTest {}; - -TEST_P(GenerateProposalsBenchmarkTest, DISABLED_GenerateProposals_Benchmark) { - run_benchmark("GenerateProposals", std::chrono::milliseconds(2000), 10000); -} - -INSTANTIATE_TEST_SUITE_P( - BasicTest, - GenerateProposalsBenchmarkTest, - ::testing::Combine( - ::testing::ValuesIn(dynamicInputShape), - ::testing::ValuesIn(min_size), - ::testing::ValuesIn(nms_threshold), - ::testing::ValuesIn(post_nms_count), - ::testing::ValuesIn(pre_nms_count), - ::testing::ValuesIn({true, false}), - ::testing::ValuesIn(inputTensors), - ::testing::Values(ov::element::Type_t::f32), - ::testing::Values(ov::element::Type_t::i32), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - GenerateProposalsLayerTest::getTestCaseName); - } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grid_sample.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grid_sample.cpp index 8b99e8661c9960..d7185bc4359e2d 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grid_sample.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/grid_sample.cpp @@ -2,48 +2,45 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/grid_sample.hpp" - #include -using namespace LayerTestsDefinitions; - -using GridSampleOp = ov::op::v9::GridSample; +#include "single_op_tests/grid_sample.hpp" namespace { +using ov::op::v9::GridSample; +using ov::test::GridSampleLayerTest; -const std::vector> data_shapes = { +const std::vector data_shapes = { {5, 2, 3, 5}, // Odd {5, 3, 4, 6}, // Even }; -const std::vector> grid_shapes = { +const std::vector grid_shapes = { {5, 7, 3, 2}, // Odd {5, 2, 8, 2}, // Even }; const std::vector align_corners = {true, false}; -const std::vector modes = { - GridSampleOp::InterpolationMode::BILINEAR, - GridSampleOp::InterpolationMode::BICUBIC, - GridSampleOp::InterpolationMode::NEAREST, +const std::vector modes = { + GridSample::InterpolationMode::BILINEAR, + GridSample::InterpolationMode::BICUBIC, + GridSample::InterpolationMode::NEAREST, }; -const std::vector padding_modes = { - GridSampleOp::PaddingMode::ZEROS, - GridSampleOp::PaddingMode::BORDER, - GridSampleOp::PaddingMode::REFLECTION, +const std::vector padding_modes = { + GridSample::PaddingMode::ZEROS, + GridSample::PaddingMode::BORDER, + GridSample::PaddingMode::REFLECTION, }; -const std::vector data_precisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +const std::vector data_precisions = { + ov::element::f32, + ov::element::f16, }; -const std::vector grid_precisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, +const std::vector grid_precisions = { + ov::element::f32, }; INSTANTIATE_TEST_SUITE_P(smoke_GridSample, @@ -57,5 +54,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_GridSample, testing::ValuesIn(grid_precisions), testing::Values(ov::test::utils::DEVICE_GPU)), GridSampleLayerTest::getTestCaseName); - } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 70e397e665b886..bd18001155466c 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -73,6 +73,8 @@ std::vector disabledTestPatterns() { // TODO: range input with one element should NOT be regarded as dynamic batch model in Program::IsDynBatchModel(). R"(.*smoke_select_CompareWithRefsNumpy_dynamic_range.*)", R"(.*CachingSupportCase.*LoadNetworkCacheTestBase.*CompareWithRefImpl.*)", + // Issue: 124060 + R"(.*smoke_GridSample/GridSampleLayerTest.Inference/.*model_type=f16.*)", #if defined(_WIN32) R"(.*KernelCachingSupportCase.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)", #endif diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/generate_proposals.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/generate_proposals.hpp new file mode 100644 index 00000000000000..471d175b3aa1d2 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/generate_proposals.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/generate_proposals.hpp" + +namespace ov { +namespace test { +TEST_P(GenerateProposalsLayerTest, GenerateProposalsLayerTests) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/grid_sample.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/grid_sample.hpp new file mode 100644 index 00000000000000..c635a1ad6b4359 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/grid_sample.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/grid_sample.hpp" + +namespace ov { +namespace test { +TEST_P(GridSampleLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/generate_proposals.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/generate_proposals.hpp new file mode 100644 index 00000000000000..170c1753efee2f --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/generate_proposals.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_models/utils/ov_helpers.hpp" +#include "common_test_utils/common_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +typedef std::tuple< + std::vector, // Input shapes + float, // min_size: minimum box width & height + float, // nms_threshold: specifies NMS threshold + int64_t, // post_nms_count: number of top-n proposals after NMS + int64_t, // pre_nms_count: number of top-n proposals after NMS + bool, // normalized: specifies whether box is normalized or not + ov::element::Type, // Model type + ov::element::Type, // roi_num precision + std::string // Device name +> GenerateProposalsTestParams; + +class GenerateProposalsLayerTest : + public testing::WithParamInterface, + virtual public SubgraphBaseTest { +protected: + void SetUp() override; +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/grid_sample.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/grid_sample.hpp new file mode 100644 index 00000000000000..07f46613dc450e --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/grid_sample.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +using GridSampleParams = std::tuple; // Device name + +class GridSampleLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/generate_proposals.cpp b/src/tests/functional/shared_test_classes/src/single_op/generate_proposals.cpp new file mode 100644 index 00000000000000..ee6bde40c0dc07 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/generate_proposals.cpp @@ -0,0 +1,95 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/generate_proposals.hpp" + +namespace ov { +namespace test { +std::string GenerateProposalsLayerTest::getTestCaseName( + const testing::TestParamInfo& obj) { + std::vector shapes; + ov::op::v9::GenerateProposals::Attributes attributes; + ov::element::Type model_type; + ov::element::Type roi_num_type; + std::string targetName; + std::tie( + shapes, + attributes.min_size, + attributes.nms_threshold, + attributes.post_nms_count, + attributes.pre_nms_count, + attributes.normalized, + model_type, + roi_num_type, + targetName) = obj.param; + + std::ostringstream result; + using ov::test::operator<<; + result << "im_info=" << shapes[0] << "_"; + result << "anchors=" << shapes[1] << "_"; + result << "deltas=" << shapes[2] << "_"; + result << "scores=" << shapes[3] << "_"; + + using ov::test::operator<<; + result << "attributes={"; + result << "score_threshold=" << attributes.min_size << "_"; + result << "nms_threshold=" << attributes.nms_threshold << "_"; + result << "post_nms_count=" << attributes.post_nms_count << "_"; + result << "pre_nms_count=" << attributes.pre_nms_count; + result << "normalized=" << attributes.normalized; + result << "nms_eta=" << attributes.nms_eta; + result << "}_"; + + result << "netPRC=" << model_type.get_type_name() << "_"; + result << "roiNumPRC=" << roi_num_type.get_type_name() << "_"; + result << "trgDev=" << targetName; + return result.str(); +} + +void GenerateProposalsLayerTest::SetUp() { + std::vector shapes; + ov::op::v9::GenerateProposals::Attributes attributes; + ov::element::Type model_type; + ov::element::Type roi_num_type; + std::tie( + shapes, + attributes.min_size, + attributes.nms_threshold, + attributes.post_nms_count, + attributes.pre_nms_count, + attributes.normalized, + model_type, + roi_num_type, + targetDevice) = this->GetParam(); + + inType = outType = model_type; + if (targetDevice == ov::test::utils::DEVICE_GPU) { + if (model_type == element::Type_t::f16) { + abs_threshold = 0.2; + } else { + abs_threshold = 0.00009; + } + } + + init_input_shapes(shapes); + + ov::ParameterVector params; + for (auto&& shape : inputDynamicShapes) { + params.push_back(std::make_shared(model_type, shape)); + } + + auto generate_proposals = std::make_shared( + params[0], // im_info + params[1], // anchors + params[2], // deltas + params[3], // scores + attributes, + roi_num_type); + function = std::make_shared( + generate_proposals->outputs(), + params, + "GenerateProposals"); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/grid_sample.cpp b/src/tests/functional/shared_test_classes/src/single_op/grid_sample.cpp new file mode 100644 index 00000000000000..a6b1d8db98f684 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/grid_sample.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/grid_sample.hpp" + +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/grid_sample.hpp" + +namespace ov { +namespace test { +std::string GridSampleLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::Shape data_shape; + ov::Shape grid_shape; + bool align_corners; + ov::op::v9::GridSample::InterpolationMode mode; + ov::op::v9::GridSample::PaddingMode padding_mode; + ov::element::Type model_type; + ov::element::Type grid_type; + std::string target_device; + + std::tie(data_shape, grid_shape, align_corners, mode, padding_mode, model_type, grid_type, target_device) = obj.param; + + std::ostringstream result; + result << "DS=" << ov::test::utils::vec2str(data_shape) << "_"; + result << "GS=" << ov::test::utils::vec2str(grid_shape) << "_"; + result << "align_corners=" << align_corners << "_"; + result << "Mode=" << ov::as_string(mode) << "_"; + result << "padding_mode=" << ov::as_string(padding_mode) << "_"; + result << "model_type=" << model_type.get_type_name() << "_"; + result << "grid_type=" << grid_type.get_type_name() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void GridSampleLayerTest::SetUp() { + ov::Shape data_shape; + ov::Shape grid_shape; + bool align_corners; + ov::op::v9::GridSample::InterpolationMode mode; + ov::op::v9::GridSample::PaddingMode padding_mode; + ov::element::Type model_type; + ov::element::Type grid_type; + + std::tie(data_shape, grid_shape, align_corners, mode, padding_mode, model_type, grid_type, targetDevice) = this->GetParam(); + + auto data = std::make_shared(model_type, data_shape); + auto grid = std::make_shared(grid_type, grid_shape); + auto gridSample = std::make_shared( + data, + grid, + ov::op::v9::GridSample::Attributes(align_corners, mode, padding_mode)); + + function = std::make_shared(std::make_shared(gridSample), + ov::ParameterVector{data, grid}, + "GridSample"); +} +} // namespace test +} // namespace ov From 588e96bc3783c73afcc1a01d8e44459aa0b09041 Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Wed, 8 Nov 2023 11:43:53 +0100 Subject: [PATCH 234/275] Refactor MemoryLayerTest (#20914) * Refactor MemoryLayerTest * Apply comments --- .../single_layer_tests/memory.cpp | 26 ++- .../shared/include/single_op_tests/memory.h | 19 ++ .../shared_test_classes/single_op/memory.hpp | 50 +++++ .../src/single_op/memory.cpp | 174 ++++++++++++++++++ .../include/common_test_utils/test_enums.hpp | 9 +- .../common_test_utils/src/test_enums.cpp | 20 ++ 6 files changed, 283 insertions(+), 15 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/memory.h create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/memory.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/memory.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/memory.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/memory.cpp index 8072e642b48dfe..2059180c19acdb 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/memory.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/memory.cpp @@ -4,26 +4,24 @@ #include -#include "single_layer_tests/memory.h" - -using namespace LayerTestsDefinitions; +#include "single_op_tests/memory.h" namespace { +using ov::test::MemoryLayerTest; -std::vector transformation { - ngraph::helpers::MemoryTransformation::NONE, - ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2, - ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API, - ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT, +std::vector transformation { + ov::test::utils::MemoryTransformation::NONE, + ov::test::utils::MemoryTransformation::LOW_LATENCY_V2, + ov::test::utils::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT, }; -const std::vector inShapes = { +const std::vector inShapes = { {3}, {100, 100}, }; -const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32, +const std::vector input_types = { + ov::element::f32, }; const std::vector iterationCount { @@ -32,14 +30,14 @@ const std::vector iterationCount { 10 }; -INSTANTIATE_TEST_SUITE_P(smoke_MemoryTest, MemoryTest, +INSTANTIATE_TEST_SUITE_P(smoke_MemoryTest, MemoryLayerTest, ::testing::Combine( ::testing::ValuesIn(transformation), ::testing::ValuesIn(iterationCount), ::testing::ValuesIn(inShapes), - ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(input_types), ::testing::Values(ov::test::utils::DEVICE_CPU, "HETERO:CPU")), - MemoryTest::getTestCaseName); + MemoryLayerTest::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/memory.h b/src/tests/functional/plugin/shared/include/single_op_tests/memory.h new file mode 100644 index 00000000000000..0b5938682b2bf2 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/memory.h @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/memory.hpp" + +namespace ov { +namespace test { +TEST_P(MemoryLayerTest, Inference) { + run(); +}; + +TEST_P(MemoryV3LayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/memory.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/memory.hpp new file mode 100644 index 00000000000000..2c0ea873ba59b8 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/memory.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_enums.hpp" + +namespace ov { +namespace test { + +using MemoryLayerTestParams = std::tuple< + ov::test::utils::MemoryTransformation, // Apply Memory transformation + int64_t, // iterationCount + ov::Shape, // inputShape + ov::element::Type, // modelType + std::string // targetDevice +>; + +class MemoryLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; + void infer() override; + std::vector calculate_refs() override; + + void CreateCommonFunc(ov::element::Type model_type, ov::Shape input_shape); + void CreateTIFunc(ov::element::Type model_type, ov::Shape input_shape); + void ApplyLowLatency(ov::test::utils::MemoryTransformation transformation); + + bool use_version_3 = false; + int64_t iteration_count; +}; + +class MemoryV3LayerTest : public MemoryLayerTest { +protected: + void SetUp() override; +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/memory.cpp b/src/tests/functional/shared_test_classes/src/single_op/memory.cpp new file mode 100644 index 00000000000000..639d110886ee0e --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/memory.cpp @@ -0,0 +1,174 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/memory.hpp" + +#include "openvino/pass/low_latency.hpp" +#include "openvino/pass/manager.hpp" +#include "template/properties.hpp" + +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/assign.hpp" +#include "openvino/op/read_value.hpp" +#include "openvino/op/util/variable.hpp" +#include "openvino/op/tensor_iterator.hpp" + +namespace ov { +namespace test { + +std::string MemoryLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + int64_t iteration_count; + ov::element::Type model_type; + ov::Shape input_shape; + std::string target_device; + ov::test::utils::MemoryTransformation transformation; + std::tie(transformation, iteration_count, input_shape, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "transformation=" << transformation << "_"; + result << "iteration_count=" << iteration_count << "_"; + result << "IS=" << ov::test::utils::vec2str(input_shape) << "_"; + result << "modelType=" << model_type.get_type_name() << "_"; + result << "trgDev=" << target_device; + result << ")"; + return result.str(); +} + +void MemoryLayerTest::SetUp() { + ov::element::Type model_type; + ov::Shape input_shape; + ov::test::utils::MemoryTransformation transformation; + + std::tie(transformation, iteration_count, input_shape, model_type, targetDevice) = this->GetParam(); + + if (transformation == ov::test::utils::MemoryTransformation::NONE) { + CreateCommonFunc(model_type, input_shape); + } else { + CreateTIFunc(model_type, input_shape); + ApplyLowLatency(transformation); + } +} + +void MemoryLayerTest::CreateCommonFunc(ov::element::Type model_type, ov::Shape input_shape) { + ov::ParameterVector param {std::make_shared(model_type, input_shape)}; + const auto variable_info = targetDevice == ov::test::utils::DEVICE_GPU ? + ov::op::util::VariableInfo{input_shape, model_type, "v0"} : + ov::op::util::VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"}; + auto variable = std::make_shared(variable_info); + + std::shared_ptr read_value; + if (use_version_3) { + read_value = std::make_shared(param[0], variable->get_info().variable_id); + } else { + read_value = std::make_shared(param[0], variable); + } + + auto add = std::make_shared(read_value, param.at(0)); + + std::shared_ptr assign; + if (use_version_3) { + assign = std::make_shared(add, variable->get_info().variable_id); + } else { + assign = std::make_shared(add, variable); + } + + auto res = std::make_shared(add); + function = std::make_shared(ResultVector{res}, SinkVector{assign}, param, "TestMemory"); +} + +void MemoryLayerTest::CreateTIFunc(ov::element::Type model_type, ov::Shape input_shape) { + auto param = std::make_shared(model_type, ov::Shape(input_shape)); + + std::vector> shape = {{static_cast(iteration_count), 1}}; + auto iter_count = std::make_shared(model_type, ov::Shape{static_cast(iteration_count), 1}); + + // Body + auto X = std::make_shared(model_type, ov::Shape(input_shape)); + auto Y = std::make_shared(model_type, ov::Shape(input_shape)); + auto Iter = std::make_shared(model_type, ov::Shape{1, 1}); + auto add = std::make_shared(X, Y); + auto res = std::make_shared(add); + auto Iter_res = std::make_shared(Iter); + auto body = std::make_shared(OutputVector{res, Iter_res}, ParameterVector {X, Y, Iter}); + + // TI construction + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_merged_input(X, param, res); + tensor_iterator->set_invariant_input(Y, param); + tensor_iterator->set_sliced_input(Iter, iter_count, 0, 1, 1, -1, 0); + + auto output = tensor_iterator->get_iter_value(res, -1); + auto output_iter = tensor_iterator->get_concatenated_slices(Iter_res, 0, 1, 1, -1, 0); + function = std::make_shared(OutputVector{output, output_iter}, + ParameterVector{param, iter_count}, + "PureTI"); +} + +void MemoryLayerTest::ApplyLowLatency(ov::test::utils::MemoryTransformation transformation) { + if (transformation == ov::test::utils::MemoryTransformation::LOW_LATENCY_V2) { + function->validate_nodes_and_infer_types(); + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + } else if (transformation == ov::test::utils::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT) { + function->validate_nodes_and_infer_types(); + ov::pass::Manager manager; + manager.register_pass(false); + manager.run_passes(function); + } +} + +void MemoryLayerTest::infer() { + inferRequest = compiledModel.create_infer_request(); + for (size_t iter = 0; iter <= iteration_count; iter++) { + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + } + inferRequest.infer(); + } +} + +std::vector MemoryLayerTest::calculate_refs() { + if (is_report_stages) { + std::cout << "[ REFERENCE ] `SubgraphBaseTest::calculate_refs()` is started"<< std::endl; + } + auto start_time = std::chrono::system_clock::now(); + + update_ref_model(); + match_parameters(); + + auto compiledModelRef = core->compile_model(functionRefs, ov::test::utils::DEVICE_TEMPLATE, {{ ov::template_plugin::disable_transformations(true) }}); + auto inferRequestRef = compiledModelRef.create_infer_request(); + + for (size_t iter = 0; iter <= iteration_count; iter++) { + for (const auto& param : functionRefs->get_parameters()) { + inferRequestRef.set_tensor(param->get_default_output(), inputs.at(matched_parameters[param])); + } + inferRequestRef.infer(); + } + auto outputs = std::vector{}; + for (const auto& output : functionRefs->outputs()) { + outputs.push_back(inferRequestRef.get_tensor(output)); + } + if (is_report_stages) { + auto end_time = std::chrono::system_clock::now(); + std::chrono::duration duration = end_time - start_time; + std::cout << "[ REFERENCE ] `SubgraphBaseTest::calculate_refs()` is finished successfully. Duration is " << duration.count() << "s" << std::endl; + } + return outputs; +} + +void MemoryV3LayerTest::SetUp() { + use_version_3 = true; + MemoryLayerTest::SetUp(); +} + +} // namespace test +} // namespace ov + diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp index 5c93d211cac30b..f0f6fe51521889 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp @@ -163,7 +163,12 @@ enum class TensorIteratorBody { // CNN todo: implement }; -// clang-format on +enum class MemoryTransformation { + NONE, + LOW_LATENCY_V2, + LOW_LATENCY_V2_REGULAR_API, + LOW_LATENCY_V2_ORIGINAL_INIT +}; std::ostream& operator<<(std::ostream& os, const ReductionType& m); @@ -195,6 +200,8 @@ std::ostream& operator<<(std::ostream& os, ov::op::v8::MatrixNms::DecayFunction std::ostream& operator<<(std::ostream& os, TensorIteratorBody type); +std::ostream& operator<<(std::ostream& os, MemoryTransformation type); + } // namespace utils } // namespace test } // namespace ov diff --git a/src/tests/test_utils/common_test_utils/src/test_enums.cpp b/src/tests/test_utils/common_test_utils/src/test_enums.cpp index e67122d9b8af4f..feb2d29a26facf 100644 --- a/src/tests/test_utils/common_test_utils/src/test_enums.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_enums.cpp @@ -351,6 +351,26 @@ std::ostream& operator<<(std::ostream& os, TensorIteratorBody type) { return os; } +std::ostream& operator<<(std::ostream& os, MemoryTransformation type) { + switch (type) { + case MemoryTransformation::NONE: + os << "NONE"; + break; + case MemoryTransformation::LOW_LATENCY_V2: + os << "LOW_LATENCY_V2"; + break; + case MemoryTransformation::LOW_LATENCY_V2_REGULAR_API: + os << "LOW_LATENCY_V2_REGULAR_API"; + break; + case MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT: + os << "LOW_LATENCY_V2_ORIGINAL_INIT"; + break; + default: + throw std::runtime_error("NOT_SUPPORTED_TYPE"); + } + return os; +} + } // namespace utils } // namespace test } // namespace ov From d07f27205406dd1db8c69b0ab78137a0e671a610 Mon Sep 17 00:00:00 2001 From: Oleksii Khovan Date: Wed, 8 Nov 2023 12:26:51 +0100 Subject: [PATCH 235/275] [GPU] Fix cum_sum_partial_sum implementation for dimensions >= BLOCK_SIZE (#20855) - fix cum_sum_partial_sum kernel; - add unit test and func test for big shapes; - add test to compare Partial vs Ref performance; - change kernels' priorities according to performance measurements; - move common profiling helpers to test_utils. Ticket: CVS-123590 --- .../cl_kernels/cum_sum_partial_sum.cl | 27 ++-- .../cum_sum/cum_sum_kernel_partial_sum.cpp | 10 +- .../kernels/cum_sum/cum_sum_kernel_ref.cpp | 2 +- .../single_layer_tests/cum_sum.cpp | 16 +++ .../unit/test_cases/cum_sum_gpu_test.cpp | 115 ++++++++++++++++++ .../unit/test_cases/permute_gpu_test.cpp | 44 +------ .../unit/test_cases/resample_gpu_test.cpp | 44 +------ .../tests/unit/test_utils/test_utils.cpp | 37 ++++++ .../tests/unit/test_utils/test_utils.h | 4 + 9 files changed, 200 insertions(+), 99 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/cum_sum_partial_sum.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/cum_sum_partial_sum.cl index 7919b5ae5dfbf1..1d0c2c6f2d8970 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/cum_sum_partial_sum.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/cum_sum_partial_sum.cl @@ -119,12 +119,17 @@ inline uint FUNC(get_block_num)(int axis) #endif } -inline uint FUNC(get_current_index)(int i) +// This function works incorrect for the last block when there are leftovers (i.e. SUM_ITEMS_NUM % BLOCKSIZE != 0) +// and REVERSE == false. But it is expected, since it will never be called for the last block when calculating +// sum of the previous blocks (see loop in cum_sum_final), thus, no need to make it correct +// at cost of complexity and performance. +inline uint FUNC(get_last_index_in_block)(int block) { + const int num_items_in_blocks_before = (block + 1) * BLOCK_SIZE; #ifdef REVERSE - return SUM_ITEMS_NUM - i*BLOCK_SIZE - BLOCK_SIZE; + return SUM_ITEMS_NUM - num_items_in_blocks_before; #else - return i*BLOCK_SIZE + BLOCK_SIZE - 1; + return num_items_in_blocks_before - 1; #endif } @@ -148,17 +153,15 @@ KERNEL(cum_sum_final)( PARTIAL_TYPE res = partial[ind]; PARTIAL_TYPE sum = 0; - uint block_num = FUNC_CALL(get_block_num)(axes[AXIS]); - int n = 4; - for (int i = 0; i < block_num / n; ++i) { - unroll_for (int j = 0; j < n; ++j) { - axes[AXIS] = FUNC_CALL(get_current_index)(i*n + j); - ind = FUNC_CALL(get_input_index)(axes[0], axes[1], axes[2], axes[3], axes[4], axes[5]); - sum += partial[ind]; - } + const uint current_block = FUNC_CALL(get_block_num)(axes[AXIS]); + + for (int block = 0; block < current_block; ++block) { + axes[AXIS] = FUNC_CALL(get_last_index_in_block)(block); + ind = FUNC_CALL(get_input_index)(axes[0], axes[1], axes[2], axes[3], axes[4], axes[5]); + sum += partial[ind]; } - uint out_ind = FUNC_CALL(get_output_index)(batch, features, w, z, y, x); + const uint out_ind = FUNC_CALL(get_output_index)(batch, features, w, z, y, x); output[out_ind] = ACTIVATION(TO_OUTPUT_TYPE(res + sum), ACTIVATION_PARAMS); } #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_partial_sum.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_partial_sum.cpp index 38b9a10307c0e9..0abe42dcd0dde8 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_partial_sum.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_partial_sum.cpp @@ -140,7 +140,13 @@ KernelsData CumSumKernelPartialSum::GetKernelsData(const Params& params, const o return GetMultiStageKernelsData(params, options); } -KernelsPriority CumSumKernelPartialSum::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return FORCE_PRIORITY_7; +KernelsPriority CumSumKernelPartialSum::GetKernelsPriority(const Params& params, const optional_params& /*options*/) const { + const auto& p = static_cast(params); + const auto& o = p.outputs[0]; + const std::vector dims = {o.Batch().v, o.Feature().v, o.W().v, o.Z().v, o.Y().v, o.X().v}; + + // cum_sum_partial works slower than cum_sum_ref on small shapes. + // Value "3 * BLOCK_SIZE" determined experimentally - see cum_sum_partial.perf_test in unit tests. + return dims[GetRealAxisIndex(p)] >= 3 * BLOCK_SIZE ? FORCE_PRIORITY_7 : DONT_USE_IF_HAVE_SOMETHING_ELSE; } } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp index 486004b1c23c57..e3228b983e3c4c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/cum_sum/cum_sum_kernel_ref.cpp @@ -68,6 +68,6 @@ KernelsData CumSumKernelRef::GetKernelsData(const Params& params, const optional } KernelsPriority CumSumKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return DONT_USE_IF_HAVE_SOMETHING_ELSE; + return FORCE_PRIORITY_9; } } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp index 6d014408fd68f7..93c309a55b6397 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/cum_sum.cpp @@ -44,4 +44,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_CumSum, CumSumLayerTest, ::testing::ValuesIn(reverse), ::testing::Values(ov::test::utils::DEVICE_GPU)), CumSumLayerTest::getTestCaseName); + +const std::vector> inShapesWithBigDims = { + {{64, 64}}, + {{73, 73, 73}}, + {{49, 49, 49, 49}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CumSumBigDims, CumSumLayerTest, + ::testing::Combine( + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inShapesWithBigDims)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn(axes), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + CumSumLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/cum_sum_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/cum_sum_gpu_test.cpp index db0ca85ac9d10d..b5303f8aef177b 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/cum_sum_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/cum_sum_gpu_test.cpp @@ -348,3 +348,118 @@ TEST(cum_sum_gpu_fp32, dynamic) { ASSERT_TRUE(are_equal(answers[i], output_ptr[i])) << i; } } + +TEST(cum_sum_partial, big_shapes) { + auto& engine = get_test_engine(); + + const std::vector input_sizes = {16, 17, 34, 65, 256, 300}; + + for (const auto num_items : input_sizes) { + std::vector input_data; + input_data.resize(num_items); + std::iota(input_data.begin(), input_data.end(), 1); + + const auto shape_num_items = static_cast(num_items); + const tensor shape{shape_num_items, 1, 1, 1}; + const layout in_layout{data_types::f32, format::bfyx, shape}; + const auto input = engine.allocate_memory(in_layout); + + set_values(input, input_data); + + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(cum_sum("cum_sum", input_info("input"))); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::force_implementations( + ov::intel_gpu::ImplForcingMap{ {"cum_sum", {format::bfyx, "cum_sum_partial_sum"}} })); + network network(engine, topology, config); + network.set_input_data("input", input); + + const auto inst = network.get_primitive("cum_sum"); + const auto outputs = network.execute(); + ASSERT_EQ(outputs.begin()->first, "cum_sum"); + + const auto output = outputs.at("cum_sum").get_memory(); + const cldnn::mem_lock output_ptr(output, get_test_stream()); + + const auto expected = cumsum(input_data, format::bfyx, {shape_num_items, 1, 1, 1, 1, 1 }); + + ASSERT_EQ(expected.size(), num_items); + ASSERT_EQ(output->count(), num_items); + for (size_t i = 0; i < num_items; ++i) { + ASSERT_TRUE(are_equal(expected[i], output_ptr[i])) << "num_items=" << num_items << ", i=" << i; + } + } +} + +TEST(cum_sum_partial, DISABLED_perf_test) { + auto& engine = get_test_engine(); + + const std::vector input_sizes = {1, 2, 4, 8, 16, 17, 34, 48, 65, 256, 300, 515, 1025}; + + for (const auto num_items : input_sizes) { + std::vector input_data; + input_data.resize(num_items); + std::iota(input_data.begin(), input_data.end(), 1); + + const auto shape_num_items = static_cast(num_items); + const tensor shape{shape_num_items, 1, 1, 1}; + const layout in_layout{data_types::f32, format::bfyx, shape}; + + + const auto input_ref = engine.allocate_memory(in_layout); + set_values(input_ref, input_data); + topology topology_ref; + topology_ref.add(input_layout("input", in_layout)); + topology_ref.add(cum_sum("cum_sum", input_info("input"))); + ExecutionConfig config_ref(ov::enable_profiling(true)); + config_ref.set_property(ov::intel_gpu::force_implementations( + ov::intel_gpu::ImplForcingMap{ {"cum_sum", {format::bfyx, "cum_sum_ref"}} })); + network network_ref(engine, topology_ref, config_ref); + network_ref.set_input_data("input", input_ref); + + + const auto input_partial = engine.allocate_memory(in_layout); + set_values(input_partial, input_data); + topology topology_partial; + topology_partial.add(input_layout("input", in_layout)); + topology_partial.add(cum_sum("cum_sum", input_info("input"))); + ExecutionConfig config_partial(ov::enable_profiling(true)); + config_partial.set_property(ov::intel_gpu::force_implementations( + ov::intel_gpu::ImplForcingMap{ {"cum_sum", {format::bfyx, "cum_sum_partial_sum"}} })); + network network_partial(engine, topology_partial, config_partial); + network_partial.set_input_data("input", input_partial); + + + std::map output_ref; + std::map output_partial; + + constexpr int WARMUP_ROUNDS = 10; + for (int i = 0; i < WARMUP_ROUNDS; ++i) { + output_ref = network_ref.execute(); + output_partial = network_partial.execute(); + } + + constexpr int PERFTEST_ROUNDS = 100; + double exectime_ref = 0.f; + double exectime_partial = 0.f; + for (int i = 0; i < PERFTEST_ROUNDS; ++i) { + output_ref = network_ref.execute(); + const auto t_ref = get_profiling_exectime(output_ref, "cum_sum"); + exectime_ref += t_ref; + + output_partial = network_partial.execute(); + const auto t_partial = get_profiling_exectime(output_partial, "cum_sum"); + exectime_partial += t_partial; + } + exectime_ref /= PERFTEST_ROUNDS; + exectime_partial /= PERFTEST_ROUNDS; + + std::cout << std::endl; + std::cout << "Execution time for num_items=" << num_items << " " + << "cum_sum_ref" << " " << exectime_ref << std::endl; + std::cout << "Execution time for num_items=" << num_items << " " + << "cum_sum_partial_sum" << " " << exectime_partial << std::endl; + } +} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp index ac22cc773f885a..0c7eab8bc26cca 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp @@ -2273,46 +2273,6 @@ TEST_P(permute_f_y_axes_tile, combined) { struct TiledPerformancePermuteTest : TiledPermuteTest { - static double get_exectime(const std::map& outputs, - const std::string& primitive_id) - { - using namespace std::chrono; - std::shared_ptr e = outputs.at(primitive_id).get_event(); - e->wait(); // should ensure execution completion, if not segfault will occur - double avg_time = 0.0; - auto intervals = e->get_profiling_info(); - for (const auto& q : intervals) - { - if (q.stage != instrumentation::profiling_stage::executing) { - continue; - } - avg_time = duration_cast>(q.value->value()).count(); - break; - } - return avg_time; - } - - static void print_all_perf(std::map outputs) - { - std::cout << "Print last run time" << std::endl; - using namespace std::chrono; - for( const auto &n : outputs ) { - std::shared_ptr e = n.second.get_event(); - auto intervals = e->get_profiling_info(); - double time = 0.0; - for (const auto& q : intervals) - { - if (q.stage == instrumentation::profiling_stage::executing) { - continue; - } - time = duration_cast>(q.value->value()).count(); - break; - } - std::cout << n.first << ":" << time << std::endl; - } - std::cout << std::endl; - } - template void execute_perf_test(const std::vector& sizes, cldnn::format format_fsv, const std::string & kernel_name, std::vector permute_order) @@ -2382,11 +2342,11 @@ struct TiledPerformancePermuteTest : TiledPermuteTest double exectime_opt = 0.f; for (int i = 0; i < r; ++i) { output_permute_opt = network_tile.execute(); - auto t_opt = get_exectime(output_permute_opt, "output"); + auto t_opt = get_profiling_exectime(output_permute_opt, "output"); exectime_opt += t_opt; output_permute_ref = network_ref.execute(); - auto t_ref = get_exectime(output_permute_ref, "output"); + auto t_ref = get_profiling_exectime(output_permute_ref, "output"); exectime_ref += t_ref; } exectime_ref /= r; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp index 16c911fca45a2e..652d5d16d26a44 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp @@ -2099,46 +2099,6 @@ struct resample_opt_random_test : testing::TestWithParam& outputs, - const std::string& primitive_id) - { - using namespace std::chrono; - std::shared_ptr e = outputs.at(primitive_id).get_event(); - e->wait(); // should ensure execution completion, if not segfault will occur - double avg_time = 0.0; - auto intervals = e->get_profiling_info(); - for (const auto& q : intervals) - { - if (q.stage == instrumentation::profiling_stage::executing) { - continue; - } - avg_time = duration_cast>(q.value->value()).count(); - break; - } - return avg_time; - } - - static void print_all_perf(std::map outputs) - { - std::cout << "Print last run time" << std::endl; - using namespace std::chrono; - for( const auto &n : outputs ) { - std::shared_ptr e = n.second.get_event(); - auto intervals = e->get_profiling_info(); - double time = 0.0; - for (const auto& q : intervals) - { - if (q.stage == instrumentation::profiling_stage::executing) { - continue; - } - time = duration_cast>(q.value->value()).count(); - break; - } - std::cout << n.first << ":" << time << std::endl; - } - std::cout << std::endl; - } - void execute_perf_test(const resample_opt_random_test_params& params, const std::string& kernel, const bool do_planar = false) { auto& engine = get_test_engine(); @@ -2174,7 +2134,7 @@ struct resample_opt_random_test_ext : resample_opt_random_test double exectime = 0.f; for (int i = 0; i < r; ++i) { result_opt = net_opt.execute(); - exectime += get_exectime(result_opt, "resample_opt"); + exectime += get_profiling_exectime(result_opt, "resample_opt"); } exectime /= r; std::string frm_str = format(working_format).to_string(); @@ -2197,7 +2157,7 @@ struct resample_opt_random_test_ext : resample_opt_random_test << frm_str << " " << input_type << " " << exectime << std::endl; // Uncomment line below if you like to see the latencies of all operations from last iteration - //print_all_perf(result_opt); + //print_profiling_all_exectimes(result_opt); } }; diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp index 895e727e07e735..7af091ac84d534 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp @@ -435,4 +435,41 @@ std::vector generic_test::test_batch_sizes = { 1, 2 };// 4, 8, 16}; std::vector generic_test::test_feature_sizes = { 1, 2 };// , 3, 15}; std::vector generic_test::test_input_sizes = { { 1, 1, 100, 100 } ,{ 1, 1, 277, 277 } ,{ 1, 1, 400, 600 } }; +namespace { +double get_exectime_from_profiling_info(const std::vector& intervals) +{ + using namespace std::chrono; + double time = 0.0; + for (const auto& i : intervals) { + if (i.stage != instrumentation::profiling_stage::executing) { + continue; + } + time = duration_cast>(i.value->value()).count(); + break; + } + return time; +} +} // namespace + +double get_profiling_exectime(const std::map& outputs, + const std::string& primitive_id) +{ + const auto event = outputs.at(primitive_id).get_event(); + event->wait(); // should ensure execution completion, if not segfault will occur + const auto intervals = event->get_profiling_info(); + return get_exectime_from_profiling_info(intervals); +} + +void print_profiling_all_exectimes(const std::map& outputs) +{ + std::cout << "Print last run time" << std::endl; + for (const auto& o : outputs) { + const auto event = o.second.get_event(); + const auto intervals = event->get_profiling_info(); + const auto time = get_exectime_from_profiling_info(intervals); + std::cout << o.first << ":" << time << std::endl; + } + std::cout << std::endl; +} + } // namespace tests diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h index 08d49918a0de10..680a87283fcbfd 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h @@ -730,4 +730,8 @@ inline cldnn::network::ptr get_network(cldnn::engine& engine, return network; } +double get_profiling_exectime(const std::map& outputs, + const std::string& primitive_id); +void print_profiling_all_exectimes(const std::map& outputs); + } // namespace tests From fdaa4b5d03aa40c284756476d04fc96ae061944a Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Wed, 8 Nov 2023 13:40:52 +0100 Subject: [PATCH 236/275] [DOCS] Small fixes in articles for master (#20947) * Fixes * Update deployment_intro.md * Update docs/articles_en/openvino_workflow/deployment_intro.md Co-authored-by: Sebastian Golebiewski --------- Co-authored-by: Tatiana Savina Co-authored-by: Sebastian Golebiewski --- docs/articles_en/openvino_workflow/deployment_intro.md | 5 ++--- .../model_optimization_guide/tmo_introduction.md | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/articles_en/openvino_workflow/deployment_intro.md b/docs/articles_en/openvino_workflow/deployment_intro.md index 5a36e5f99407b1..446b78ac54ca6f 100644 --- a/docs/articles_en/openvino_workflow/deployment_intro.md +++ b/docs/articles_en/openvino_workflow/deployment_intro.md @@ -28,8 +28,7 @@ Local Deployment Options - using PIP package manager on PyPI - the default approach for Python-based applications; - using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO from Docker ` -Furthermore, to customize your OpenVINO Docker image, use the `Docker CI Framework `__ to generate a Dockerfile and built the image. - + - Furthermore, to customize your OpenVINO Docker image, use the `Docker CI Framework `__ to generate a Dockerfile and build the image. - Grab a necessary functionality of OpenVINO together with your application, also called "local distribution": - using :doc:`OpenVINO Deployment Manager ` - providing a convenient way for creating a distribution package; @@ -45,7 +44,7 @@ The table below shows which distribution type can be used for what target operat - Operating systems * - Debian packages - Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit - * - RMP packages + * - RPM packages - Red Hat Enterprise Linux 8, 64-bit * - Docker images - Ubuntu 22.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit diff --git a/docs/articles_en/openvino_workflow/model_optimization_guide/tmo_introduction.md b/docs/articles_en/openvino_workflow/model_optimization_guide/tmo_introduction.md index 7791cc9c2ac8ef..a633abf3460f6a 100644 --- a/docs/articles_en/openvino_workflow/model_optimization_guide/tmo_introduction.md +++ b/docs/articles_en/openvino_workflow/model_optimization_guide/tmo_introduction.md @@ -76,7 +76,7 @@ Quantization is the process of converting the weights and activation values in a Quantization-aware training inserts nodes into the neural network during training that simulate the effect of lower precision. This allows the training algorithm to consider quantization errors as part of the overall training loss that gets minimized during training. The network is then able to achieve enhanced accuracy when quantized. -The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the :doc:`Quantization-ware Training guide ` to learn more. +The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the :doc:`Quantization-aware Training guide ` to learn more. Filter pruning -------------------- From 68e6484ecb24eeb0e9678e93d4e15f4527fa7946 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 8 Nov 2023 17:30:15 +0400 Subject: [PATCH 237/275] Fixed version detection without git (#20951) --- cmake/developer_package/version.cmake | 24 ++++++++++++++++------ src/core/include/openvino/core/version.hpp | 2 +- src/inference/include/ie/ie_version.hpp | 2 +- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cmake/developer_package/version.cmake b/cmake/developer_package/version.cmake index 1b71befe448b76..effb320014452a 100644 --- a/cmake/developer_package/version.cmake +++ b/cmake/developer_package/version.cmake @@ -10,8 +10,11 @@ function(ov_branch_name VAR REPO_ROOT) COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD WORKING_DIRECTORY ${REPO_ROOT} OUTPUT_VARIABLE GIT_BRANCH + RESULT_VARIABLE EXIT_CODE OUTPUT_STRIP_TRAILING_WHITESPACE) - set (${VAR} ${GIT_BRANCH} PARENT_SCOPE) + if(EXIT_CODE EQUAL 0) + set(${VAR} ${GIT_BRANCH} PARENT_SCOPE) + endif() endif() endfunction() @@ -21,22 +24,31 @@ function(ov_commit_hash VAR REPO_ROOT) COMMAND ${GIT_EXECUTABLE} rev-parse --short=11 HEAD WORKING_DIRECTORY ${REPO_ROOT} OUTPUT_VARIABLE GIT_COMMIT_HASH + RESULT_VARIABLE EXIT_CODE OUTPUT_STRIP_TRAILING_WHITESPACE) - set (${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE) + if(EXIT_CODE EQUAL 0) + set(${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE) + endif() endif() endfunction() function(ov_commit_number VAR REPO_ROOT) + set(GIT_COMMIT_NUMBER_FOUND OFF) if(GIT_FOUND) execute_process( COMMAND ${GIT_EXECUTABLE} rev-list --count --first-parent HEAD WORKING_DIRECTORY ${REPO_ROOT} OUTPUT_VARIABLE GIT_COMMIT_NUMBER + RESULT_VARIABLE EXIT_CODE OUTPUT_STRIP_TRAILING_WHITESPACE) - set (${VAR} ${GIT_COMMIT_NUMBER} PARENT_SCOPE) - else() + if(EXIT_CODE EQUAL 0) + set(GIT_COMMIT_NUMBER_FOUND ON) + set(${VAR} ${GIT_COMMIT_NUMBER} PARENT_SCOPE) + endif() + endif() + if(NOT GIT_COMMIT_NUMBER_FOUND) # set zeros since git is not available - set (${VAR} "000" PARENT_SCOPE) + set(${VAR} "000" PARENT_SCOPE) endif() endfunction() @@ -140,7 +152,7 @@ macro(ov_parse_ci_build_number repo_root) ov_branch_name(GIT_BRANCH "${repo_root}") ov_commit_hash(GIT_COMMIT_HASH "${repo_root}") - if(NOT GIT_BRANCH STREQUAL "master") + if(NOT GIT_BRANCH MATCHES "^(master|HEAD)$") set(GIT_BRANCH_POSTFIX "-${GIT_BRANCH}") endif() diff --git a/src/core/include/openvino/core/version.hpp b/src/core/include/openvino/core/version.hpp index 33df7fe4ab3ec5..53c1f307d8eebd 100644 --- a/src/core/include/openvino/core/version.hpp +++ b/src/core/include/openvino/core/version.hpp @@ -20,7 +20,7 @@ */ #define OPENVINO_VERSION_MAJOR 2023 -#define OPENVINO_VERSION_MINOR 2 +#define OPENVINO_VERSION_MINOR 3 #define OPENVINO_VERSION_PATCH 0 namespace ov { diff --git a/src/inference/include/ie/ie_version.hpp b/src/inference/include/ie/ie_version.hpp index b0de98fe9a3636..08ab44f7bff8a5 100644 --- a/src/inference/include/ie/ie_version.hpp +++ b/src/inference/include/ie/ie_version.hpp @@ -31,7 +31,7 @@ */ #define IE_VERSION_MAJOR 2023 -#define IE_VERSION_MINOR 2 +#define IE_VERSION_MINOR 3 #define IE_VERSION_PATCH 0 #include "ie_api.h" From 25d94bd98b8f044d614d36fdceaff0aef9a0b0fa Mon Sep 17 00:00:00 2001 From: Sofya Balandina Date: Wed, 8 Nov 2023 15:50:49 +0000 Subject: [PATCH 238/275] [conformance] Skip empty test cache error (#20924) --- .../layer_tests_summary/run_parallel.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py b/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py index 581a51105a5703..5916290b6abd52 100644 --- a/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py +++ b/src/tests/test_utils/functional_test_utils/layer_tests_summary/run_parallel.py @@ -332,14 +332,12 @@ def __init__( self._gtest_filter = "" self._command = self.__init_basic_command_line_for_exec_file(test_command_line) self._worker_num = worker_num - if not os.path.exists(self._working_dir): - os.mkdir(self._working_dir) - if cache_path == "": + os.makedirs(self._working_dir, exist_ok=True) + if cache_path == "" or not os.path.exists(cache_path): cache_path = os.path.join(self._working_dir, "test_cache.lst") self._cache_path = os.path.join(cache_path) head, _ = os.path.split(self._cache_path) - if not os.path.exists(head): - os.mkdir(head) + os.makedirs(head, exist_ok=True) self._is_save_cache = True if split_unit in constants.UNIT_NAMES: self._split_unit = split_unit @@ -875,7 +873,7 @@ def __save_log(logs_dir, dir, test_name): ) if os.path.isfile(interapted_log_path): test_cnt_real_saved_now += 1 - if self._is_save_cache: + if self._is_save_cache and os.path.isfile(self._cache_path): test_times.sort(reverse=True) with open(self._cache_path, "w", encoding=constants.ENCODING) as cache_file: cache_file.writelines( From c2d09b9a156bc0ca1e1800f1f0b34c3ad8241294 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Wed, 8 Nov 2023 16:54:23 +0100 Subject: [PATCH 239/275] FuseU4WeightsAndZeroPoint tests: avoid std::vector usage (#20918) --- .../convert_u4_weights_zero_point_to_scalar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp index 8fc896065e9001..70d22a1a1c41fd 100644 --- a/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp +++ b/src/common/transformations/tests/common_optimizations/convert_u4_weights_zero_point_to_scalar.cpp @@ -113,7 +113,7 @@ TEST_F(TransformationTestsF, FuseU4WeightsAndZeroPointNotScalarLikeZP) { ov::Shape decompression_shape{32, 1, 64}; auto weights = ov::op::v0::Constant::create(weights_precision, weights_shape, {4}); auto convert = std::make_shared(weights, decompression_precision); - std::vector zero_point_values(ov::shape_size(decompression_shape), 8); + std::vector zero_point_values(ov::shape_size(decompression_shape), 8); zero_point_values.back() = 6; auto zero_point = ov::op::v0::Constant::create(weights_precision, decompression_shape, zero_point_values); auto zero_point_convert = std::make_shared(zero_point, decompression_precision); From 9616c8f510c7d430892bafeceb785f4f7f3c5e69 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Wed, 8 Nov 2023 17:45:53 +0100 Subject: [PATCH 240/275] corrected timeouts (#20954) --- .github/workflows/fedora.yml | 2 +- .github/workflows/linux.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index 522fc5fb53fa6e..210b4e68ac76e2 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -179,7 +179,7 @@ jobs: RPM_Packages: needs: Build - timeout-minutes: 5 + timeout-minutes: 10 defaults: run: shell: bash diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0e3d5feba1a8af..082d1981d95d7a 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -646,7 +646,7 @@ jobs: CXX_Unit_Tests: name: C++ unit tests needs: Build - timeout-minutes: 15 + timeout-minutes: 20 defaults: run: shell: bash From 24cd7283e3cbf1f0aecd72501198d0b60c244bb1 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Wed, 8 Nov 2023 17:28:08 +0000 Subject: [PATCH 241/275] make cache space showing optional (#20962) --- .github/actions/setup_python/action.yml | 6 +++++- .github/workflows/linux.yml | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup_python/action.yml b/.github/actions/setup_python/action.yml index 5c26561cac3ca2..0bbbf45522e0cd 100644 --- a/.github/actions/setup_python/action.yml +++ b/.github/actions/setup_python/action.yml @@ -15,6 +15,10 @@ inputs: description: 'If the runner is self-hosted' required: false default: 'true' + show-cache-info: + description: 'If the action should show the share space occupied by cache' + required: false + default: 'false' runs: using: 'composite' steps: @@ -52,7 +56,7 @@ runs: echo "PIP_CACHE_DIR=${{ inputs.pip-cache-path }}/${PIP_VER}" >> $GITHUB_ENV echo "PIP_INSTALL_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" >> $GITHUB_ENV - - if: ${{ inputs.should-setup-pip-paths == 'true' }} + - if: ${{ inputs.show-cache-info == 'true' }} name: Get pip cache info shell: bash run: | diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 082d1981d95d7a..ec23f7aba5aa33 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -110,6 +110,7 @@ jobs: pip-cache-path: ${{ env.PIP_CACHE_PATH }} should-setup-pip-paths: 'true' self-hosted-runner: 'true' + show-cache-info: 'true' - name: Install python dependencies run: | From f627172e5a433b91989b66a1506ed8401817c622 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Wed, 8 Nov 2023 19:19:35 +0100 Subject: [PATCH 242/275] Add separate label for docs snippets (#20966) --- .github/labeler.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index d393b2354e1c36..52fb7ab07080c1 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -55,8 +55,12 @@ '!thirdparty/**/CMakeLists.txt'] 'category: docs': -- 'docs/**/*' - '**/*.md' +- any: ['docs/**/*', + '!docs/snippets/**/*'] + +'category: docs_snippets': +- 'docs/snippets/**/*' 'category: extensions': - 'src/core/include/openvino/core/extension.hpp' From 854158612ff6c49384a7676255dc6e95e7612dfd Mon Sep 17 00:00:00 2001 From: Sergey Lyalin Date: Wed, 8 Nov 2023 23:17:13 +0400 Subject: [PATCH 243/275] Scaled dot product attention (#20492) * Added experimental ScaledDotProductAttention operation in opset12. Supported in PT FE for aten::scaled_dot_product_attention translation. Decomposed in the common optimizations as functional reference. * Better ScaledDotProductAttention - Moved decomposition to the decomposing transformation - Implemented more ctors for the op - Renamed is_causal to causal - Shape/type inference native code instead of using decomposition - Moved the op from opset12 to opset13 - Added Python wrapper for ScaledDotProductAttention * Fix test that counts ops in the opsets * Update src/core/src/op/scaled_dot_product_attention.cpp Co-authored-by: Katarzyna Mitrus * Update src/core/src/op/scaled_dot_product_attention.cpp Co-authored-by: Katarzyna Mitrus * Move ScaledDotProductAttentionDecomposition from fusions to decompositions. * Remove not used legacy shape inference in ScaledDotProductAttention * Better namespace usage * Register all nodes in ScaledDotProductDecomposition for correct tracking of nodes and running next mather passes on all new nodes. * Don't use register_new_node_ * ScaledDotProductAttention specification (with an extra scale argument) * Code style fix * Scale input implementation for ScaledDotProductAttention * Handle attention_mask=0 case in the op spec * Better description of scale input * N->M in scale description * Code style fix, remove debug print. * Apply suggestions from code review Co-authored-by: Katarzyna Mitrus Co-authored-by: Mateusz Mikolajczyk * Fix for case when is_causal is not passed * Extended description of ScaledDotProduct op * Better description in py op wrapper * Basic shape propagation tests for ScaledDotProductAttention * Added ScaledDotProductAttention to toc. * Add op impl check --------- Co-authored-by: Katarzyna Mitrus Co-authored-by: Mateusz Mikolajczyk --- .../available_opsets/opset13.md | 1 + .../operations_specifications.md | 1 + .../sequence/ScaledDotProductAttention.md | 137 ++++++++++++++ .../src/openvino/runtime/opset13/__init__.py | 1 + .../src/openvino/runtime/opset13/ops.py | 39 +++- ...ed_dot_product_attention_decomposition.hpp | 24 +++ .../common_optimizations.cpp | 2 + ...ed_dot_product_attention_decomposition.cpp | 136 ++++++++++++++ src/core/include/openvino/op/ops.hpp | 1 + .../op/scaled_dot_product_attention.hpp | 59 +++++++ .../include/openvino/opsets/opset13_tbl.hpp | 1 + .../src/op/scaled_dot_product_attention.cpp | 73 ++++++++ src/core/tests/opset.cpp | 2 +- .../scaled_dot_product_attention.cpp | 167 ++++++++++++++++++ .../src/op/scaled_dot_product_attention.cpp | 106 ++--------- .../src/op_impl_check/single_op_graph.cpp | 14 ++ .../test_scaled_dot_product_attention.py | 22 ++- 17 files changed, 688 insertions(+), 98 deletions(-) create mode 100644 docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sequence/ScaledDotProductAttention.md create mode 100644 src/common/transformations/include/transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp create mode 100644 src/common/transformations/src/transformations/op_conversions/scaled_dot_product_attention_decomposition.cpp create mode 100644 src/core/include/openvino/op/scaled_dot_product_attention.hpp create mode 100644 src/core/src/op/scaled_dot_product_attention.cpp create mode 100644 src/core/tests/type_prop/scaled_dot_product_attention.cpp diff --git a/docs/articles_en/documentation/openvino_ir/operation_sets/available_opsets/opset13.md b/docs/articles_en/documentation/openvino_ir/operation_sets/available_opsets/opset13.md index d5d7169c8af127..8f016a0daddb41 100644 --- a/docs/articles_en/documentation/openvino_ir/operation_sets/available_opsets/opset13.md +++ b/docs/articles_en/documentation/openvino_ir/operation_sets/available_opsets/opset13.md @@ -167,6 +167,7 @@ Table of Contents * :doc:`ROIPooling ` * :doc:`Roll ` * :doc:`Round ` +* :doc:`ScaledDotProductAttention ` * :doc:`ScatterElementsUpdate ` * :doc:`ScatterNDUpdate ` * :doc:`ScatterUpdate ` diff --git a/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications.md b/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications.md index 0e5756824b69e5..b26b57022d7bfa 100644 --- a/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications.md +++ b/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications.md @@ -182,6 +182,7 @@ ROIPooling-1 Roll-7 Round-5 + ScaledDotProductAttention-13 ScatterElementsUpdate-3 ScatterElementsUpdate-12 ScatterNDUpdate-3 diff --git a/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sequence/ScaledDotProductAttention.md b/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sequence/ScaledDotProductAttention.md new file mode 100644 index 00000000000000..fecb70ac91c51e --- /dev/null +++ b/docs/articles_en/documentation/openvino_ir/operation_sets/operations_specifications/sequence/ScaledDotProductAttention.md @@ -0,0 +1,137 @@ +# ScaledDotProductAttention {#openvino_docs_ops_sequence_ScaledDotProductAttention_13} + +@sphinxdirective + +.. meta:: + :description: Learn about ScaledDotProductAttention-13 - a basic block for the transformer attention mechanism. + +**Versioned name**: *ScaledDotProductAttention-13* + +**Category**: *Sequence processing* + +**Short description**: *ScaledDotProductAttention* partially implements +`torch.nn.functional.scaled_dot_product_attention `__, +omitting training-related parameter. + +**Detailed description**: + +*ScaledDotProductAttention* provides functionality according to the following pseudo-code using other operations from OpenVINO opset and ``numpy``: + +.. code-block:: py + +def ScaledDotProductAttention(query, key, value, attn_mask=None, scale=None, *, causal): + L, S = Gather(ShapeOf(query), -2), Gather(ShapeOf(key), -2) + if scale is None: + scale = 1.0 / Sqrt(ConvertLike(Gather(ShapeOf(query), -1), query)) + attn_bias = Broadcast(ConvertLike(0, query), [L, S]) + if causal: + attn_bias = numpy.triu(Broadcast(ConvertLike(-inf, query), [L, S]), k=1) + elif attn_mask is not None: + if attn_mask.element_type == boolean: + attn_bias = Select(LogicalNot(attn_mask), ConvertLike(-inf, query), ConvertLike(0, query)) + else: + attn_bias += attn_mask + attn_weight = MatMul(query, Transpose(key, [-2, -1])) * scale + attn_weight += attn_bias + attn_weight = Softmax(attn_weight, axis=-1) + return MatMul(attn_weight, value) + + +**Attributes** + +* *causal* + + * **Description**: If true, assumes causal attention masking according to the pseudo-code. In this case ``attention_mask`` input described below is ignored. + * **Range of values**: a boolean value + * **Type**: ``bool`` + * **Required**: *yes* + + +**Inputs** + +* **1**: ``query`` - at least 3 dimensional tensor of type *T* and shape ``[N, ..., L, E]``. **Required.** + +* **2**: ``key`` - at least 3 dimensional tensor of type *T* and shape ``[N, ..., S, E]``. **Required.** + +* **3**: ``value`` - at least 3 dimensional tensor of type *T* and shape ``[N, ..., S, Ev]``. **Required.** + +* **4**: ``attention_mask`` - two options: + ** at least 3 dimensional tensor of type *T* or ``boolean`` and shape ``[M, ..., L, S]``, or + ** a scalar of type *T* with value ``0``. Scalar zero value is used to indicate that `attention_mask` is really not required to be applied (``attention_mask=None`` in the pseudo-code above) but ``scale`` is required to be set. + + ``attention_mask`` is ignored if ``causal`` is set to ``True``. **Optional.** + +* **5**: ``scale`` a scalar tensor of type *T*, an alternative scale factor instead of 1/sqrt(query.shape[-1]) used by default in the pseudo-code above. **Optional.** + + +**Outputs** + +* **1**: - the result of scaled dot-product attention, a tensor of type *T* and shape ``[N, ..., L, Ev]``. + +**Types** + +* *T*: any supported floating-point type. + + +**Dimensions** + +* ``N, ...`` - one or more batch dimensions + +* ``S`` - source sequence length + +* ``L`` - target sequence length + +* ``E`` - embedding dimension of the query and key + +* ``Ev`` - embedding dimension of the value + +* ``M, ...`` - one of more batch dimensions of the mask, should be broadcastable to ``N, ...`` + +At least one batch dimension ``N`` is required and should match among ``query``, ``key`` and ``value`` inputs. +Other batch dimensions ``...`` are optional, if present should match among ``query``, ``key`` and ``value`` inputs as well. + + +**Example** + +.. code-block:: xml + :force: + + + + + + 1 + 32 + -1 + 80 + + + 1 + 32 + -1 + 80 + + + 1 + 32 + -1 + 80 + + + 1 + 1 + -1 + -1 + + + + + 1 + 32 + -1 + 80 + + + + +@endsphinxdirective diff --git a/src/bindings/python/src/openvino/runtime/opset13/__init__.py b/src/bindings/python/src/openvino/runtime/opset13/__init__.py index 9cdb7149569ebb..80741ecd4af919 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset13/__init__.py @@ -151,6 +151,7 @@ from openvino.runtime.opset2.ops import roi_pooling from openvino.runtime.opset7.ops import roll from openvino.runtime.opset5.ops import round +from openvino.runtime.opset13.ops import scaled_dot_product_attention from openvino.runtime.opset12.ops import scatter_elements_update from openvino.runtime.opset3.ops import scatter_update from openvino.runtime.opset1.ops import select diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops.py b/src/bindings/python/src/openvino/runtime/opset13/ops.py index fff95b33d234d6..a058e0526c2a1c 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset13/ops.py @@ -136,7 +136,8 @@ def multinomial( inputs = as_nodes(probs, num_samples) if global_seed < 0: - raise RuntimeError(f"global_seed should be positive or 0. Got: {global_seed}") + raise RuntimeError( + f"global_seed should be positive or 0. Got: {global_seed}") if op_seed < 0: raise RuntimeError(f"op_seed should be positive or 0. Got: {op_seed}") @@ -178,7 +179,8 @@ def nms_rotated( :param clockwise: Flag that specifies direction of the box rotation. :return: The new node which performs NMSRotated """ - inputs = as_nodes(boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold) + inputs = as_nodes(boxes, scores, max_output_boxes_per_class, + iou_threshold, score_threshold) attributes = { "sort_result_descending": sort_result_descending, @@ -187,3 +189,36 @@ def nms_rotated( } return _get_node_factory_opset13().create("NMSRotated", inputs, attributes) + + +@nameable_op +def scaled_dot_product_attention( + query: NodeInput, + key: NodeInput, + value: NodeInput, + attention_mask: Optional[NodeInput] = None, + scale: Optional[NodeInput] = None, + causal: bool = False, + name: Optional[str] = None, +) -> Node: + """Return a node which implements Scaled Dot Product Attention. + + :param query: Query tensor of shape [N, ..., L, E] and floating-point datatype. + :param key: Key tensor of shape [N, ..., S, E] and floating-point datatype. + :param value: Value tensor of shape [N, ..., S, Ev] and floating-point datatype. + :param attention_mask: Optional attention mask tensor of shape [N, ..., L, S] or scalar float type zero value. + Refer to the operation specification for a complete description. + :param scale: Optional alternative scale, a floating-point type scalar. + :param causal: If true, then autogenerates causal attention mask instead of using attention_mask input. + In this case attention_mask input is ignored. + :param name: The optional new name for output node. + + :return: The new node performing Scaled Dot Product Attention operation. + """ + inputs = as_nodes(query, key, value, attention_mask) if attention_mask is not None else as_nodes( + query, key, value, scale) + + attributes = { + "causal": causal, + } + return _get_node_factory_opset13().create("ScaledDotProductAttention", inputs, attributes) diff --git a/src/common/transformations/include/transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp new file mode 100644 index 00000000000000..6cd7df71c86050 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ScaledDotProductAttentionDecomposition; + +} // namespace pass +} // namespace ov + +class ov::pass::ScaledDotProductAttentionDecomposition : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ScaledDotProductAttentionDecomposition", "0"); + ScaledDotProductAttentionDecomposition(); + std::shared_ptr decompose(std::shared_ptr node); +}; diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 4357fdc2607d35..f05ef2f37af52a 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -107,6 +107,7 @@ #include "transformations/op_conversions/normalize_l2_decomposition.hpp" #include "transformations/op_conversions/reduce_l1_decomposition.hpp" #include "transformations/op_conversions/reduce_l2_decomposition.hpp" +#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp" #include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp" #include "transformations/op_conversions/softmax_decomposition.hpp" #include "transformations/op_conversions/softsign_decomposition.hpp" @@ -145,6 +146,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr(); + ADD_MATCHER(decomp, ScaledDotProductAttentionDecomposition) ADD_MATCHER(decomp, Gelu7Downgrade) ADD_MATCHER(decomp, BidirectionalSequenceDecomposition) ADD_MATCHER(decomp, ReduceL1Decomposition) diff --git a/src/common/transformations/src/transformations/op_conversions/scaled_dot_product_attention_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/scaled_dot_product_attention_decomposition.cpp new file mode 100644 index 00000000000000..9d3b6fe2297206 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/scaled_dot_product_attention_decomposition.cpp @@ -0,0 +1,136 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp" + +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/greater_eq.hpp" +#include "openvino/op/logical_not.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/softmax.hpp" +#include "openvino/op/sqrt.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" + +ov::pass::ScaledDotProductAttentionDecomposition::ScaledDotProductAttentionDecomposition() { + MATCHER_SCOPE(ScaledDotProductAttentionDecomposition); + auto pattern_node = ov::pass::pattern::wrap_type(); + + matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto node = std::dynamic_pointer_cast( + pattern_to_output.at(pattern_node).get_node_shared_ptr()); + + if (node == nullptr || transformation_callback(node)) { + return false; + } + + auto new_output_node = decompose(node); + ov::replace_node(node, new_output_node); + return true; + }; + + auto m = std::make_shared(pattern_node, matcher_name); + register_matcher(m, callback); +} + +std::shared_ptr ov::pass::ScaledDotProductAttentionDecomposition::decompose( + std::shared_ptr node) { + using namespace ov::op; + auto query = node->input_value(0); + auto key = node->input_value(1); + auto value = node->input_value(2); + auto q_shape = register_new_node(query, element::i32); + auto k_shape = register_new_node(key, element::i32); + auto minus_one = register_new_node(v0::Constant::create(element::i32, Shape{}, {-1})); + auto minus_two = register_new_node(v0::Constant::create(element::i32, Shape{}, {-2})); + auto zero_i = register_new_node(v0::Constant::create(element::i32, Shape{}, {0})); + auto one_i = register_new_node(v0::Constant::create(element::i32, Shape{}, {1})); + auto one_f = register_new_node(one_i, query); + auto zero_f = register_new_node(zero_i, query); + + Output scale; + if (node->get_input_size() < 5) { + scale = register_new_node(q_shape, minus_one, zero_i)->output(0); + scale = register_new_node(scale, query); + auto sqrt_scale = register_new_node(scale); + scale = register_new_node(one_f, sqrt_scale); + } else { + scale = node->input_value(4); + } + + auto q_scaled = register_new_node(query, scale); + auto k_rank = register_new_node(k_shape, element::i32)->output(0); + auto k_last_dim = register_new_node(k_rank, minus_one); + auto k_next_dim = register_new_node(k_rank, minus_two)->output(0); + k_rank = register_new_node(k_rank, zero_i); + auto minus_inf = + register_new_node(v0::Constant::create(element::f32, Shape{}, {-std::numeric_limits::infinity()})) + ->output(0); + auto keep_dim_last = register_new_node(k_next_dim, zero_i); + auto k_dims_before_transpose = register_new_node(zero_i, keep_dim_last, one_i, element::i32); + + auto transpose_dims = + register_new_node(OutputVector{k_dims_before_transpose, k_last_dim, k_next_dim}, 0); + auto k_transposed = register_new_node(key, transpose_dims); + auto scaled_atten = register_new_node(q_scaled, k_transposed)->output(0); + minus_inf = register_new_node(minus_inf, scaled_atten); + + if (node->get_causal() || node->get_input_size() > 3) { + Output mask; + Output atten_mask; + if (!node->get_causal()) { + mask = node->input_value(3); + + // two types of masks are supported. A boolean mask where a value of True indicates that the element should + // take part in attention. A float mask of the same type as query, key, value that is added to the attention + // score. + if (mask.get_element_type() == element::boolean) { + atten_mask = register_new_node(mask, scaled_atten); + auto inv_mask = register_new_node(mask); + atten_mask = register_new_node(inv_mask, atten_mask, minus_inf); + } else { + atten_mask = mask; + } + } else { + auto target_s_len = register_new_node(q_shape, minus_two, zero_i); + auto source_s_len = register_new_node(k_shape, minus_two, zero_i); + auto ssl = register_new_node(source_s_len, zero_i); + auto tsl = register_new_node(target_s_len, zero_i); + auto mask_shape = register_new_node(OutputVector{tsl, ssl}, 0); + mask = register_new_node(minus_inf, mask_shape); + auto horizontal_range = register_new_node(zero_i, source_s_len, one_i, element::i32)->output(0); + horizontal_range = register_new_node(horizontal_range, zero_i); + auto stop = register_new_node(target_s_len, one_i); + auto vertical_range = register_new_node(one_i, stop, one_i, element::i32)->output(0); + vertical_range = register_new_node(vertical_range, one_i); + auto triu = register_new_node(horizontal_range, vertical_range); + atten_mask = register_new_node(triu, mask, zero_f); + } + scaled_atten = register_new_node(scaled_atten, atten_mask); + } + + scaled_atten = register_new_node(scaled_atten, -1); + auto result = register_new_node(scaled_atten, value); + result->set_friendly_name(node->get_friendly_name()); + copy_runtime_info(node, get_new_nodes()); + return result; +} diff --git a/src/core/include/openvino/op/ops.hpp b/src/core/include/openvino/op/ops.hpp index 24ba54ce37b94a..5b28762933a1ec 100644 --- a/src/core/include/openvino/op/ops.hpp +++ b/src/core/include/openvino/op/ops.hpp @@ -156,6 +156,7 @@ #include "openvino/op/roi_pooling.hpp" #include "openvino/op/roll.hpp" #include "openvino/op/round.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" #include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/scatter_nd_update.hpp" #include "openvino/op/scatter_update.hpp" diff --git a/src/core/include/openvino/op/scaled_dot_product_attention.hpp b/src/core/include/openvino/op/scaled_dot_product_attention.hpp new file mode 100644 index 00000000000000..313e743ddedb7c --- /dev/null +++ b/src/core/include/openvino/op/scaled_dot_product_attention.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace v13 { +/// \brief Scaled dot product attention operation from PyTorch +/// +/// \ingroup ov_ops_cpp_api + +class OPENVINO_API ScaledDotProductAttention : public Op { +public: + OPENVINO_OP("ScaledDotProductAttention", "opset13", op::Op); + + /// \brief Constructs a ScaledDotProductAttention operation. + ScaledDotProductAttention() = default; + + ScaledDotProductAttention(const OutputVector& inputs, bool causal); + + ScaledDotProductAttention(const Output& query, + const Output& key, + const Output& value, + const Output& attn_mask, + const Output& scale, + bool causal); + + ScaledDotProductAttention(const Output& query, + const Output& key, + const Output& value, + const Output& attn_mask, + bool causal); + + ScaledDotProductAttention(const Output& query, + const Output& key, + const Output& value, + bool causal); + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + bool visit_attributes(AttributeVisitor& visitor) override; + void validate_and_infer_types() override; + + bool get_causal() const { + return m_causal; + } + +private: + bool m_causal; +}; + +} // namespace v13 +} // namespace op +} // namespace ov diff --git a/src/core/include/openvino/opsets/opset13_tbl.hpp b/src/core/include/openvino/opsets/opset13_tbl.hpp index 95d4ca0f375511..13b598eaca11f0 100644 --- a/src/core/include/openvino/opsets/opset13_tbl.hpp +++ b/src/core/include/openvino/opsets/opset13_tbl.hpp @@ -215,3 +215,4 @@ _OPENVINO_OP_REG(BitwiseOr, ov::op::v13) _OPENVINO_OP_REG(BitwiseXor, ov::op::v13) _OPENVINO_OP_REG(NMSRotated, ov::op::v13) _OPENVINO_OP_REG(Multinomial, ov::op::v13) +_OPENVINO_OP_REG(ScaledDotProductAttention, ov::op::v13) diff --git a/src/core/src/op/scaled_dot_product_attention.cpp b/src/core/src/op/scaled_dot_product_attention.cpp new file mode 100644 index 00000000000000..9d5819f4fdf5a6 --- /dev/null +++ b/src/core/src/op/scaled_dot_product_attention.cpp @@ -0,0 +1,73 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/op/scaled_dot_product_attention.hpp" + +#include "itt.hpp" + +using namespace std; +namespace ov { + +op::v13::ScaledDotProductAttention::ScaledDotProductAttention(const OutputVector& inputs, bool causal) + : op::Op(inputs), + m_causal(causal) { + constructor_validate_and_infer_types(); +} + +op::v13::ScaledDotProductAttention::ScaledDotProductAttention(const Output& query, + const Output& key, + const Output& value, + const Output& attn_mask, + const Output& scale, + bool causal) + : ScaledDotProductAttention({query, key, value, attn_mask, scale}, causal) {} + +op::v13::ScaledDotProductAttention::ScaledDotProductAttention(const Output& query, + const Output& key, + const Output& value, + const Output& attn_mask, + bool causal) + : ScaledDotProductAttention({query, key, value, attn_mask}, causal) {} + +op::v13::ScaledDotProductAttention::ScaledDotProductAttention(const Output& query, + const Output& key, + const Output& value, + bool causal) + : ScaledDotProductAttention({query, key, value}, causal) {} + +void op::v13::ScaledDotProductAttention::validate_and_infer_types() { + OV_OP_SCOPE(v13_ScaledDotProductAttention_validate_and_infer_types); + NODE_VALIDATION_CHECK(this, get_input_size() >= 3 && get_input_size() <= 5); + // TODO: More checks and accurate deduction of dimensions in case when various + // dynamic combinations appear. + auto query = get_input_partial_shape(0); + auto key = get_input_partial_shape(1); + auto value = get_input_partial_shape(2); + + // using particular dimensions from query and value, to do that need to have them statically ranked + if (query.rank().is_dynamic() || value.rank().is_dynamic()) { + set_output_type(0, get_input_element_type(0), PartialShape::dynamic()); + return; + } + + OPENVINO_ASSERT(query.rank().get_length() >= 3); + OPENVINO_ASSERT(value.rank().get_length() >= 3); + + auto dimensions = std::vector(query.begin(), query.end() - 1); + dimensions.push_back(*(value.end() - 1)); + set_output_type(0, get_input_element_type(0), PartialShape(dimensions)); +} + +std::shared_ptr op::v13::ScaledDotProductAttention::clone_with_new_inputs(const OutputVector& new_args) const { + OV_OP_SCOPE(v13_ScaledDotProductAttention_clone_with_new_inputs); + return std::make_shared(new_args, m_causal); +} + +bool op::v13::ScaledDotProductAttention::visit_attributes(AttributeVisitor& visitor) { + OV_OP_SCOPE(v13_ScaledDotProductAttention_visit_attributes); + visitor.on_attribute("causal", m_causal); + return true; +} + +} // namespace ov diff --git a/src/core/tests/opset.cpp b/src/core/tests/opset.cpp index ee055befd4ba58..29e9df2b4dab2c 100644 --- a/src/core/tests/opset.cpp +++ b/src/core/tests/opset.cpp @@ -71,7 +71,7 @@ INSTANTIATE_TEST_SUITE_P(opset, OpsetTestParams{ov::get_opset10, 177}, OpsetTestParams{ov::get_opset11, 177}, OpsetTestParams{ov::get_opset12, 178}, - OpsetTestParams{ov::get_opset13, 184}), + OpsetTestParams{ov::get_opset13, 185}), OpsetTestNameGenerator{}); class MyOpOld : public ov::op::Op { diff --git a/src/core/tests/type_prop/scaled_dot_product_attention.cpp b/src/core/tests/type_prop/scaled_dot_product_attention.cpp new file mode 100644 index 00000000000000..5b1da9e577644e --- /dev/null +++ b/src/core/tests/type_prop/scaled_dot_product_attention.cpp @@ -0,0 +1,167 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/type_prop.hpp" +#include "openvino/openvino.hpp" +#include "openvino/opsets/opset13.hpp" + +using namespace ov; +using namespace testing; + +TEST(type_prop, scale_dot_product_attention_static_5_inputs) { + const auto query = std::make_shared(element::f32, Shape{2, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 5, 6}); + const auto attention_mask = std::make_shared(element::f32, Shape{1, 3, 5}); + const auto scale = std::make_shared(element::f32, Shape{}); + auto causal = false; + + const auto gn = + std::make_shared(query, key, value, attention_mask, scale, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_4_inputs) { + const auto query = std::make_shared(element::f32, Shape{2, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 5, 6}); + const auto attention_mask = std::make_shared(element::f32, Shape{1, 3, 5}); + auto causal = false; + + const auto gn = std::make_shared(query, key, value, attention_mask, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_3_inputs) { + const auto query = std::make_shared(element::f32, Shape{2, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 5, 6}); + auto causal = false; + + const auto gn = std::make_shared(query, key, value, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_3_inputs_causal) { + const auto query = std::make_shared(element::f32, Shape{2, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 5, 6}); + auto causal = true; + + const auto gn = std::make_shared(query, key, value, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_ignored_attention_mask) { + const auto query = std::make_shared(element::f32, Shape{2, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 5, 6}); + const auto attention_mask = std::make_shared(element::f32, Shape{7, 8, 9, 10, 11}); + const auto scale = std::make_shared(element::f32, Shape{}); + auto causal = false; + + const auto gn = + std::make_shared(query, key, value, attention_mask, scale, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_5_inputs_extra_batch) { + const auto query = std::make_shared(element::f32, Shape{2, 7, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 7, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 7, 5, 6}); + const auto attention_mask = std::make_shared(element::f32, Shape{1, 1, 3, 5}); + const auto scale = std::make_shared(element::f32, Shape{}); + auto causal = false; + + const auto gn = + std::make_shared(query, key, value, attention_mask, scale, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 7, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_4_inputs_extra_batch) { + const auto query = std::make_shared(element::f32, Shape{2, 7, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 7, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 7, 5, 6}); + const auto attention_mask = std::make_shared(element::f32, Shape{1, 1, 3, 5}); + auto causal = false; + + const auto gn = std::make_shared(query, key, value, attention_mask, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 7, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_3_inputs_extra_batch) { + const auto query = std::make_shared(element::f32, Shape{2, 7, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 7, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 7, 5, 6}); + auto causal = false; + + const auto gn = std::make_shared(query, key, value, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 7, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_3_inputs_extra_batch_causal_true) { + const auto query = std::make_shared(element::f32, Shape{2, 7, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 7, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 7, 5, 6}); + auto causal = true; + + const auto gn = std::make_shared(query, key, value, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 7, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_static_ignored_attention_mask_extra_batch) { + const auto query = std::make_shared(element::f32, Shape{2, 7, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 7, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 7, 5, 6}); + const auto attention_mask = std::make_shared(element::f32, Shape{7, 8, 9, 10, 11}); + const auto scale = std::make_shared(element::f32, Shape{}); + auto causal = false; + + const auto gn = + std::make_shared(query, key, value, attention_mask, scale, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_shape(), (Shape{2, 7, 3, 6})); +} + +TEST(type_prop, scale_dot_product_attention_dynamic_3d) { + const auto dynamic = PartialShape{-1, -1, -1}; + const auto query = std::make_shared(element::f32, dynamic); + const auto key = std::make_shared(element::f32, dynamic); + const auto value = std::make_shared(element::f32, dynamic); + const auto attention_mask = std::make_shared(element::f32, dynamic); + const auto scale = std::make_shared(element::f32, Shape{}); + auto causal = false; + + const auto gn = + std::make_shared(query, key, value, attention_mask, scale, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_output_partial_shape(0), (dynamic)); +} + +TEST(type_prop, scale_dot_product_attention_dynamic_4d) { + const auto dynamic = PartialShape{-1, -1, -1, -1}; + const auto query = std::make_shared(element::f32, dynamic); + const auto key = std::make_shared(element::f32, dynamic); + const auto value = std::make_shared(element::f32, dynamic); + const auto attention_mask = std::make_shared(element::f32, dynamic); + const auto scale = std::make_shared(element::f32, Shape{}); + auto causal = false; + + const auto gn = + std::make_shared(query, key, value, attention_mask, scale, causal); + EXPECT_EQ(gn->get_element_type(), element::f32); + EXPECT_EQ(gn->get_output_partial_shape(0), (dynamic)); +} diff --git a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp index 82231472e401be..ed8ecc4b0f846f 100644 --- a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp +++ b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp @@ -1,28 +1,11 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/scaled_dot_product_attention.hpp" #include "openvino/frontend/pytorch/node_context.hpp" -#include "openvino/op/add.hpp" -#include "openvino/op/broadcast.hpp" -#include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert_like.hpp" -#include "openvino/op/divide.hpp" -#include "openvino/op/gather.hpp" -#include "openvino/op/greater_eq.hpp" -#include "openvino/op/logical_not.hpp" -#include "openvino/op/matmul.hpp" -#include "openvino/op/multiply.hpp" -#include "openvino/op/range.hpp" -#include "openvino/op/reshape.hpp" -#include "openvino/op/select.hpp" -#include "openvino/op/shape_of.hpp" -#include "openvino/op/softmax.hpp" -#include "openvino/op/sqrt.hpp" -#include "openvino/op/squeeze.hpp" -#include "openvino/op/transpose.hpp" -#include "openvino/op/unsqueeze.hpp" #include "openvino/op/util/framework_node.hpp" #include "utils.hpp" @@ -37,85 +20,34 @@ std::shared_ptr translate_scaled_dot_product_attention_common(const No auto query = context.get_input(0); auto key = context.get_input(1); auto value = context.get_input(2); - auto q_shape = context.mark_node(std::make_shared(query, element::i32)); - auto k_shape = context.mark_node(std::make_shared(key, element::i32)); - auto minus_one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})); - auto minus_two = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-2})); - auto zero_i = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); - auto one_i = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); - auto scale = context.mark_node(std::make_shared(q_shape, minus_one, zero_i)); - scale = context.mark_node(std::make_shared(scale, query)); - auto sqrt_scale = context.mark_node(std::make_shared(scale)); - auto one_f = context.mark_node(std::make_shared(one_i, sqrt_scale)); - auto zero_f = context.mark_node(std::make_shared(zero_i, sqrt_scale)); - scale = context.mark_node(std::make_shared(one_f, sqrt_scale)); - auto q_scaled = context.mark_node(std::make_shared(query, scale)); - auto k_rank = context.mark_node(std::make_shared(k_shape, element::i32)); - auto k_last_dim = context.mark_node(std::make_shared(k_rank, minus_one)); - auto k_next_dim = context.mark_node(std::make_shared(k_rank, minus_two)); - k_rank = context.mark_node(std::make_shared(k_rank, zero_i)); - auto minus_inf = - context.mark_node(v0::Constant::create(element::f32, Shape{}, {-std::numeric_limits::infinity()})); - auto keep_dim_last = context.mark_node(std::make_shared(k_next_dim, zero_i)); - auto k_dims_before_transpose = - context.mark_node(std::make_shared(zero_i, keep_dim_last, one_i, element::i32)); - auto transpose_dims = context.mark_node( - std::make_shared(OutputVector{k_dims_before_transpose, k_last_dim, k_next_dim}, 0)); - auto k_transposed = context.mark_node(std::make_shared(key, transpose_dims)); - auto scaled_atten = context.mark_node(std::make_shared(q_scaled, k_transposed)); - minus_inf = context.mark_node(std::make_shared(minus_inf, scaled_atten)); - // two types of masks are supported. A boolean mask where a value of True indicates that the element should take - // part in attention. A float mask of the same type as query, key, value that is added to the attention score. - auto is_causal = false; - if (!context.input_is_none(5)) { - is_causal = context.const_input(5); - } - if (is_causal || !context.input_is_none(3)) { - Output mask; - Output atten_mask; - if (!context.input_is_none(3)) { - mask = context.get_input(3); - if (mask.get_element_type() == element::boolean) { - atten_mask = context.mark_node(std::make_shared(mask, scaled_atten)); - auto inv_mask = context.mark_node(std::make_shared(mask)); - atten_mask = context.mark_node(std::make_shared(inv_mask, atten_mask, minus_inf)); - } else { - atten_mask = mask; - } - } else { - auto target_s_len = context.mark_node(std::make_shared(q_shape, minus_two, zero_i)); - auto source_s_len = context.mark_node(std::make_shared(k_shape, minus_two, zero_i)); - auto ssl = context.mark_node(std::make_shared(source_s_len, zero_i)); - auto tsl = context.mark_node(std::make_shared(target_s_len, zero_i)); - auto mask_shape = context.mark_node(std::make_shared(OutputVector{tsl, ssl}, 0)); - mask = context.mark_node(std::make_shared(minus_inf, mask_shape)); - auto horizontal_range = - context.mark_node(std::make_shared(zero_i, source_s_len, one_i, element::i32)); - horizontal_range = context.mark_node(std::make_shared(horizontal_range, zero_i)); - auto stop = context.mark_node(std::make_shared(target_s_len, one_i)); - auto vertical_range = context.mark_node(std::make_shared(one_i, stop, one_i, element::i32)); - vertical_range = context.mark_node(std::make_shared(vertical_range, one_i)); - auto triu = context.mark_node(std::make_shared(horizontal_range, vertical_range)); - atten_mask = context.mark_node(std::make_shared(triu, mask, zero_f)); - } - scaled_atten = context.mark_node(std::make_shared(scaled_atten, atten_mask)); + auto is_causal = context.input_is_none(5) ? false : context.const_input(5); + OutputVector inputs = {query, key, value}; // mandatory inputs + + if (!context.input_is_none(3)) + inputs.push_back(context.get_input(3)); + else if (!context.input_is_none(6)) { + // need to fill a gap in inputs with scalar 0 to be able to pass one extra input after that + auto zero = op::v0::Constant::create(element::f32, Shape{}, {0}); + inputs.push_back(context.mark_node(std::make_shared(zero, query))); } - scaled_atten = context.mark_node(std::make_shared(scaled_atten, -1)); - return context.mark_node(std::make_shared(scaled_atten, value)); -}; + if (!context.input_is_none(6)) + inputs.push_back(context.get_input(6)); + + return context.mark_node(std::make_shared(inputs, is_causal)); +} OutputVector translate_scaled_dot_product_attention(const NodeContext& context) { // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float - // dropout_p=0., bool is_causal=False) - num_inputs_check(context, 6, 6); + // dropout_p=0., bool is_causal=False, float scale=None) + num_inputs_check(context, 6, 7); return {translate_scaled_dot_product_attention_common(context)}; }; OutputVector translate_scaled_dot_product_attention_fx(const NodeContext& context) { // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float - // dropout_p=0., bool is_causal=False) - num_inputs_check(context, 3, 6); + // dropout_p=0., bool is_causal=False) TODO: Scale parameter? + num_inputs_check(context, 3, 6); // TODO: Set 7 instead of 6 if `scale` argument supported in FX. auto output = translate_scaled_dot_product_attention_common(context); // TODO: scaled_dot_product_flash_attention has 9 outputs but for most cases only // the first input is used. Rest of the outputs should be returned properly as diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp index c57017666bd463..bf6ec48e210a3c 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/op_impl_check/single_op_graph.cpp @@ -1013,6 +1013,20 @@ std::shared_ptr generate(const std::shared_ptr &nod return std::make_shared(results, params, "RollGraph"); } +std::shared_ptr generate(const std::shared_ptr &node) { + const auto query = std::make_shared(element::f32, Shape{2, 3, 4}); + const auto key = std::make_shared(element::f32, Shape{2, 5, 4}); + const auto value = std::make_shared(element::f32, Shape{2, 5, 6}); + const auto attention_mask = std::make_shared(element::f32, Shape{1, 3, 5}); + const auto scale = std::make_shared(element::f32, Shape{}); + auto causal = false; + + const auto op = + std::make_shared(query, key, value, attention_mask, scale, causal); + ov::ResultVector results{std::make_shared(op)}; + return std::make_shared(results, ov::ParameterVector{query, key, value, attention_mask, scale}, "ScaledDotProductAttentionGraph"); +} + std::shared_ptr generate(const std::shared_ptr &node) { ov::ParameterVector params{std::make_shared(ov::element::f32, ov::Shape{{2, 2}}), std::make_shared(ov::element::f32, ov::Shape{{2, 2}})}; diff --git a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py index 69c600a0b7562d..67e6322ef0ddb0 100644 --- a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py +++ b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py @@ -12,31 +12,37 @@ class TestScaledDotProductAttention(PytorchLayerTest): def _prepare_input(self): return (np.random.randn(1, 2, 8, 4).astype(np.float32), np.random.randn(1, 2, 8, 4).astype(np.float32), np.random.randn(1, 2, 8, 4).astype(np.float32)) - def create_model(self, mask, is_causal): + def create_model(self, mask, is_causal, scale): import torch.nn.functional as F import torch class aten_scaled_dot_product_atten(torch.nn.Module): - def __init__(self, mask=False, is_causal=False) -> None: + def __init__(self, mask=False, is_causal=False, scale=False) -> None: super().__init__() - self.mask = None if not mask else torch.from_numpy(np.random.randint(0, 2, (8, 8)).astype(np.float32)) + self.mask = None if not mask else torch.from_numpy( + np.random.randint(0, 2, (8, 8)).astype(np.float32)) self.is_causal = is_causal if is_causal and mask: self.mask.to(torch.bool) self.is_causal = False + self.scale = None if not scale else torch.tensor( + 5, dtype=torch.float) + def forward(self, query, key, value): - return F.scaled_dot_product_attention(query, key, value, attn_mask=self.mask, is_causal=self.is_causal) + return F.scaled_dot_product_attention(query, key, value, attn_mask=self.mask, is_causal=self.is_causal, scale=self.scale) ref_net = None - return aten_scaled_dot_product_atten(mask, is_causal), ref_net, "aten::scaled_dot_product_attention" + return aten_scaled_dot_product_atten(mask, is_causal, scale), ref_net, 'aten::scaled_dot_product_attention' @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_fx_backend - @pytest.mark.parametrize(['mask', "is_causal"], [(False, False), (False, True), (True, True), (True, False)]) - def test_scaled_dot_product_atten(self, ie_device, precision, ir_version, mask, is_causal): - self._test(*self.create_model(mask, is_causal),ie_device, precision, ir_version) + @pytest.mark.parametrize(['mask', 'is_causal'], [(False, False), (False, True), (True, True), (True, False)]) + @pytest.mark.parametrize('scale', [False, True]) + def test_scaled_dot_product_atten(self, ie_device, precision, ir_version, mask, is_causal, scale): + self._test(*self.create_model(mask, is_causal, scale), + ie_device, precision, ir_version) From 319a6584a2cfd6c494ec6771910929a44e59cbc4 Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Thu, 9 Nov 2023 13:10:08 +0900 Subject: [PATCH 244/275] [GPU] Decompose test combination to reduce test time (#20968) --- .../extract_image_patches.cpp | 25 +++++++------ .../single_layer_tests/rnn_cell.cpp | 17 ++++++++- .../dynamic/detection_output.cpp | 36 +++++++++++++++---- 3 files changed, 60 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp index 8492067daeb1f7..7e3611c3f40d18 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/extract_image_patches.cpp @@ -48,22 +48,25 @@ const std::vector netPrecisions = { ov::element::f32 }; -const auto extractImagePatchesParamsSet = ::testing::Combine( - ::testing::ValuesIn(inDataShape), - ::testing::ValuesIn(kernels), - ::testing::ValuesIn(strides), - ::testing::ValuesIn(rates), - ::testing::ValuesIn(autoPads) -); +INSTANTIATE_TEST_SUITE_P(smoke_layers_GPU1, ExtractImagePatchesTest, + ::testing::Combine( + ::testing::Values(ov::test::static_shapes_to_test_representation(inDataShape)[0]), + ::testing::Values(kernels[0]), + ::testing::Values(strides[0]), + ::testing::ValuesIn(rates), + ::testing::ValuesIn(autoPads), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + ExtractImagePatchesTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_layers_GPU, ExtractImagePatchesTest, +INSTANTIATE_TEST_SUITE_P(smoke_layers_GPU2, ExtractImagePatchesTest, ::testing::Combine( ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inDataShape)), ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), - ::testing::ValuesIn(rates), - ::testing::ValuesIn(autoPads), - ::testing::ValuesIn(netPrecisions), + ::testing::Values(rates[0]), + ::testing::Values(autoPads[0]), + ::testing::Values(netPrecisions[0]), ::testing::Values(ov::test::utils::DEVICE_GPU)), ExtractImagePatchesTest::getTestCaseName); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp index b721d406aacffe..9b1cbda00ffd2b 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/rnn_cell.cpp @@ -23,13 +23,28 @@ namespace { std::vector model_types = {ov::element::f32, ov::element::f16}; - INSTANTIATE_TEST_SUITE_P(smoke_RNNCellCommon, RNNCellTest, + INSTANTIATE_TEST_SUITE_P(smoke_RNNCellCommon1, RNNCellTest, ::testing::Combine( ::testing::ValuesIn(should_decompose), ::testing::ValuesIn(batch), ::testing::ValuesIn(hidden_size), ::testing::ValuesIn(input_size), ::testing::ValuesIn(activations), + ::testing::Values(clip[0]), + ::testing::Values(layer_types[0]), + ::testing::Values(layer_types[0]), + ::testing::Values(layer_types[0]), + ::testing::Values(model_types[0]), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + RNNCellTest::getTestCaseName); + + INSTANTIATE_TEST_SUITE_P(smoke_RNNCellCommon2, RNNCellTest, + ::testing::Combine( + ::testing::Values(should_decompose[0]), + ::testing::Values(batch[0]), + ::testing::Values(hidden_size[0]), + ::testing::Values(input_size[0]), + ::testing::Values(activations[0]), ::testing::ValuesIn(clip), ::testing::ValuesIn(layer_types), ::testing::ValuesIn(layer_types), diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp index e58f749c93e964..481d769cc97606 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp @@ -272,7 +272,7 @@ const std::vector decreaseLabelId = {true, false}; const float objectnessScore = 0.4f; const std::vector numberBatch = {1, 2}; -const auto commonAttributes = ::testing::Combine( +const auto commonAttributes1 = ::testing::Combine( ::testing::Values(numClasses[0]), ::testing::Values(backgroundLabelId), ::testing::ValuesIn(topK), @@ -284,7 +284,18 @@ const auto commonAttributes = ::testing::Combine( ::testing::ValuesIn(clipBeforeNms), ::testing::ValuesIn(decreaseLabelId) ); - +const auto commonAttributes2 = ::testing::Combine( + ::testing::Values(numClasses[0]), + ::testing::Values(backgroundLabelId), + ::testing::Values(topK[0]), + ::testing::Values(keepTopK[0]), + ::testing::Values(codeType[0]), + ::testing::Values(nmsThreshold), + ::testing::Values(confidenceThreshold), + ::testing::Values(clipAfterNms[0]), + ::testing::Values(clipBeforeNms[0]), + ::testing::Values(decreaseLabelId[0]) +); const auto commonAttributes_v8 = ::testing::Combine( ::testing::Values(numClasses[1]), ::testing::Values(backgroundLabelId), @@ -382,8 +393,17 @@ const std::vector specificParams3InDynamic = { }, }; -const auto params3InputsDynamic = ::testing::Combine( - commonAttributes, +const auto params3InputsDynamic1 = ::testing::Combine( + commonAttributes1, + ::testing::Values(specificParams3InDynamic[0]), + ::testing::ValuesIn(numberBatch), + ::testing::Values(objectnessScore), + ::testing::Values(false, true), + ::testing::Values(ov::test::utils::DEVICE_GPU) +); + +const auto params3InputsDynamic2 = ::testing::Combine( + commonAttributes2, ::testing::ValuesIn(specificParams3InDynamic), ::testing::ValuesIn(numberBatch), ::testing::Values(objectnessScore), @@ -400,8 +420,12 @@ const auto params3InputsDynamic_v8 = ::testing::Combine( ::testing::Values(ov::test::utils::DEVICE_GPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputDynamic3In, DetectionOutputLayerGPUTest, - params3InputsDynamic, +INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputDynamic3In1, DetectionOutputLayerGPUTest, + params3InputsDynamic1, + DetectionOutputLayerGPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputDynamic3In2, DetectionOutputLayerGPUTest, + params3InputsDynamic2, DetectionOutputLayerGPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputV8Dynamic3In, DetectionOutputLayerGPUTest, From 8f406067d17f01463f646f6d03671657f78defa9 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 9 Nov 2023 09:54:46 +0400 Subject: [PATCH 245/275] [GPU] Remove binary convolution primitive and all related code (#20889) --- .../intel_gpu/plugin/primitives_list.hpp | 2 +- .../primitives/binary_convolution.hpp | 124 ----- .../include/intel_gpu/runtime/format.hpp | 2 - .../include/intel_gpu/runtime/layout.hpp | 6 - .../src/graph/binary_convolution.cpp | 119 ----- .../graph_optimizer/post_optimize_weights.cpp | 3 - .../graph/graph_optimizer/prepare_padding.cpp | 82 +-- .../prepare_primitive_fusing.cpp | 40 +- .../graph_optimizer/prepare_quantization.cpp | 62 +-- .../remove_redundant_reorders.cpp | 1 - .../graph/graph_optimizer/reorder_inputs.cpp | 28 +- .../graph/impls/ocl/binary_convolution.cpp | 89 ---- .../impls/ocl/kernel_selector_helper.cpp | 17 - .../src/graph/impls/ocl/quantize.cpp | 3 - .../src/graph/impls/ocl/register.cpp | 1 - .../src/graph/impls/ocl/register.hpp | 2 - .../graph/include/binary_convolution_inst.h | 67 --- .../src/graph/include/layout_optimizer.h | 1 - .../src/graph/include/pass_manager.h | 1 - .../src/graph/include/quantize_inst.h | 5 +- .../intel_gpu/src/graph/layout_optimizer.cpp | 5 - src/plugins/intel_gpu/src/graph/network.cpp | 37 -- src/plugins/intel_gpu/src/graph/program.cpp | 25 +- src/plugins/intel_gpu/src/graph/quantize.cpp | 22 - .../src/kernel_selector/cache/cache.json | 404 +------------- .../cl_kernels/binary_convolution_gpu_1x1.cl | 215 -------- ...inary_convolution_gpu_1x1_b_fs_yx_fsv16.cl | 168 ------ .../binary_convolution_gpu_generic.cl | 201 ------- .../cl_kernels/binary_convolution_gpu_ref.cl | 111 ---- .../cl_kernels/quantize_gpu_ref.cl | 38 -- .../cl_kernels/reorder_data_binary.cl | 89 ---- .../cl_kernels/reorder_weights_binary.cl | 42 -- .../src/kernel_selector/common_tools.h | 2 - .../src/kernel_selector/common_types.h | 3 - .../intel_gpu/src/kernel_selector/jitter.cpp | 7 - .../kernel_selector_common.cpp | 6 - .../kernel_selector_params.cpp | 12 - .../kernel_selector/kernel_selector_params.h | 5 - .../kernel_selector/kernel_selector_utils.cpp | 3 - .../binary_convolution_kernel_1x1.cpp | 237 --------- .../binary_convolution_kernel_1x1.h | 34 -- ...y_convolution_kernel_1x1_b_fs_yx_fsv16.cpp | 200 ------- ...ary_convolution_kernel_1x1_b_fs_yx_fsv16.h | 34 -- .../binary_convolution_kernel_base.cpp | 272 ---------- .../binary_convolution_kernel_base.h | 73 --- .../binary_convolution_kernel_generic.cpp | 251 --------- .../binary_convolution_kernel_generic.h | 34 -- .../binary_convolution_kernel_ref.cpp | 98 ---- .../binary_convolution_kernel_ref.h | 33 -- .../binary_convolution_kernel_selector.cpp | 23 - .../binary_convolution_kernel_selector.h | 23 - .../binary_convolution_params.cpp | 52 -- .../binary_convolution_params.h | 39 -- .../kernels/quantize/quantize_kernel_base.cpp | 22 - .../kernels/quantize/quantize_kernel_params.h | 4 - .../kernels/quantize/quantize_kernel_ref.cpp | 17 +- .../kernels/reorder/reorder_kernel_base.cpp | 1 - .../kernels/reorder/reorder_kernel_binary.cpp | 90 ---- .../kernels/reorder/reorder_kernel_binary.h | 22 - .../reorder/reorder_kernel_selector.cpp | 2 - .../reorder/reorder_weights_binary_kernel.cpp | 42 -- .../reorder/reorder_weights_binary_kernel.h | 21 - .../reorder_weights_kernel_selector.cpp | 2 - .../src/kernel_selector/tensor_type.cpp | 11 - .../src/kernel_selector/tensor_type.h | 2 - src/plugins/intel_gpu/src/plugin/graph.cpp | 1 - .../intel_gpu/src/plugin/ops/constant.cpp | 1 - .../intel_gpu/src/plugin/ops/convolution.cpp | 36 -- .../transformations/binary_conv_to_conv.cpp | 120 +++++ .../transformations/binary_conv_to_conv.hpp | 19 + .../src/plugin/transformations_pipeline.cpp | 2 + src/plugins/intel_gpu/src/runtime/format.cpp | 5 +- src/plugins/intel_gpu/src/runtime/layout.cpp | 7 - .../binary_convolution_fusion_test.cpp | 277 ---------- .../tests/unit/fusions/fusion_test_common.hpp | 17 +- .../unit/shape_infer/quantize_si_test.cpp | 4 - .../binary_convolution_gpu_test.cpp | 495 ------------------ .../unit/test_cases/quantize_gpu_test.cpp | 66 --- .../unit/test_cases/reorder_gpu_test.cpp | 100 +--- .../tests/unit/test_utils/test_utils.h | 125 ----- .../convert_binary_conv_to_conv_test.cpp | 72 +++ 81 files changed, 232 insertions(+), 4804 deletions(-) delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/primitives/binary_convolution.hpp delete mode 100644 src/plugins/intel_gpu/src/graph/binary_convolution.cpp delete mode 100644 src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp delete mode 100644 src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1.cl delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1_b_fs_yx_fsv16.cl delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_generic.cl delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_ref.cl delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_binary.cl delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_binary.cl delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.h delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.cpp delete mode 100644 src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.h create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.hpp delete mode 100644 src/plugins/intel_gpu/tests/unit/fusions/binary_convolution_fusion_test.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/binary_convolution_gpu_test.cpp create mode 100644 src/plugins/intel_gpu/tests/unit/transformations/convert_binary_conv_to_conv_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index ceba5be5a5dd53..2377670585f618 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -94,7 +94,7 @@ REGISTER_FACTORY(v0, Unsqueeze); REGISTER_FACTORY(v1, Add); REGISTER_FACTORY(v1, AvgPool); REGISTER_FACTORY(v1, BatchToSpace); -REGISTER_FACTORY(v1, BinaryConvolution); +// REGISTER_FACTORY(v1, BinaryConvolution); Supported via BinaryConvolution->Convolution conversion REGISTER_FACTORY(v1, Broadcast); REGISTER_FACTORY(v1, ConvertLike); REGISTER_FACTORY(v1, Convolution); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/binary_convolution.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/binary_convolution.hpp deleted file mode 100644 index 5ba028bda788f1..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/binary_convolution.hpp +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include "primitive.hpp" -#include "openvino/core/coordinate_diff.hpp" -#include "openvino/core/strides.hpp" -#include - -namespace cldnn { - -/// @brief Performs forward spatial binary_convolution with weight sharing. -struct binary_convolution : public primitive_base { - CLDNN_DECLARE_PRIMITIVE(binary_convolution) - - binary_convolution() : primitive_base("", {}) {} - - /// @brief Constructs binary_convolution primitive. - /// @param id This primitive id. - /// @param input Input primitive id. - /// @param weights List of primitive ids containing weights data. - /// @param pad Defines logical pad value added to input tensor - /// @param stride Defines shift in input buffer between adjacent calculations of output values. - /// @param dilation Defines gaps in the input - dilation rate k=1 is normal binary_convolution, - /// k=2 means skipping one pixel per input, k=4 means skipping 3 pixels. - /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1. - /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4]. - /// @param output_size User-defined output data size of the primitive (w/o padding). - /// @param groups Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1. - /// @param pad_value Logical value of padding. Can be one of 3 values: 1 - pad bits equal to 1; -1 -> pad bits equal to 0; 0 -> pad is not counted - /// @param calc_precision Precision of intermediate accumulators - binary_convolution(const primitive_id& id, - const input_info& input, - const std::vector& weights, - ov::Strides stride = {1, 1}, - ov::CoordinateDiff pad = {0, 0}, - ov::Strides dilation = {1, 1}, - tensor output_size = {0, 0, 0, 0}, - int groups = 1, - float pad_value = 0.0f, - data_types calc_precision = data_types::f32, - const padding& output_padding = padding()) - : primitive_base(id, {input}, {output_padding}, {optional_data_type {calc_precision}}), - pad(pad), - stride(stride), - dilation(dilation), - output_size(output_size), - groups(groups), - pad_value(pad_value), - weights(weights) {} - - /// @brief Defines logical pad value added to input tensor - ov::CoordinateDiff pad; - /// @brief Defines shift in input buffer between adjacent calculations of output values. - ov::Strides stride; - /// @brief Defines gaps in the input - dilation rate k=1 is normal binary_convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels. - /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1. - /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4]. - ov::Strides dilation; - /// @brief User-defined output data size of the primitive (w/o padding). - tensor output_size; - /// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1. - int groups = 1; - /// @brief Logical value of padding. Can be one of 3 values: 1 - pad bits equal to 1; -1 -> pad bits equal to 0; 0 -> pad is not counted - float pad_value = 0.0f; - /// @brief List of primitive ids containing weights data. - const primitive_id_arr weights; - - size_t hash() const override { - size_t seed = primitive::hash(); - seed = hash_range(seed, pad.begin(), pad.end()); - seed = hash_range(seed, stride.begin(), stride.end()); - seed = hash_range(seed, dilation.begin(), dilation.end()); - seed = hash_combine(seed, groups); - seed = hash_combine(seed, pad_value); - seed = hash_combine(seed, weights.size()); - return seed; - } - - bool operator==(const primitive& rhs) const override { - if (!compare_common_params(rhs)) - return false; - - auto rhs_casted = downcast(rhs); - - return pad == rhs_casted.pad && - stride == rhs_casted.stride && - dilation == rhs_casted.dilation && - groups == rhs_casted.groups && - pad_value == rhs_casted.pad_value && - weights.size() == rhs_casted.weights.size(); - } - - void save(BinaryOutputBuffer& ob) const override { - primitive_base::save(ob); - ob << pad; - ob << stride; - ob << dilation; - ob << output_size; - ob << groups; - ob << pad_value; - ob << weights; - } - - void load(BinaryInputBuffer& ib) override { - primitive_base::load(ib); - ib >> pad; - ib >> stride; - ib >> dilation; - ib >> output_size; - ib >> groups; - ib >> pad_value; - ib >> *const_cast(&weights); - } - - std::vector> get_dependencies() const override { - std::vector> ret; - ret.reserve(weights.size()); - for (auto& w : weights) ret.push_back(std::ref(w)); - return ret; - } -}; -} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp index ea3e2aec0274d0..f155dcea481f30 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp @@ -123,7 +123,6 @@ struct format { bs_fs_fsv8_bsv16, ///< format used only for fully connected bs_f_bsv16, ///< format used only for fully connected weights fp16 batch=1 : bs - batch slice ///< (responses slice), bsv16 - 16 values of single batch slice, f - flattened plane of (fyx) - b_fs_yx_32fp, ///< format for data for binary convolutions winograd_2x3_s1_data, ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1 nv12, ///< format for media nv12 input image_2d_rgba, ///< format for image2d RGBA, always allocates memory for 4 feature maps (even when only 3 are used) @@ -219,7 +218,6 @@ struct format { os_is_yx_osv32_isv4_swizzled_by_2, ///< format for weights for IMAD convolutions os_is_yx_osv32_isv4, ///< format for weights for IMAD convolutions os_is_zyx_osv32_isv4, ///< format for weights for IMAD convolutions - os_is_yx_osv32_isv32p, ///< format for weights for binary convolutions lstm_weights_dio, ///< dynamic_lstm, direction, ///< than IO (I - input size, O - 4 * hidden_size) os_is_osv32_isv32_swizzled_by_4, ///< format for weights for 1x1 IMAD convolution diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 679f4c51ea6881..f3e2af08a33f0a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -49,12 +49,6 @@ struct data_type_traits { } static ov::element::Type max_type(ov::element::Type t1, ov::element::Type t2) { - if (t1 == ov::element::u1) - return t2; - - if (t2 == ov::element::u1) - return t1; - if (t1.bitwidth() < t2.bitwidth()) return t2; diff --git a/src/plugins/intel_gpu/src/graph/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/binary_convolution.cpp deleted file mode 100644 index ff8a34c3364db1..00000000000000 --- a/src/plugins/intel_gpu/src/graph/binary_convolution.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// -#include "binary_convolution_inst.h" -#include "convolution_inst.h" -#include "primitive_type_base.h" -#include "intel_gpu/runtime/error_handler.hpp" -#include "json_object.h" -#include - -namespace cldnn { -GPU_DEFINE_PRIMITIVE_TYPE_ID(binary_convolution) - -layout binary_convolution_inst::calc_output_layout(binary_convolution_node const& node, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto output_type = *desc->output_data_types[0]; - auto output_size = desc->output_size; - auto layout = cldnn::layout{output_type, format::bfyx, output_size}; - if (impl_param.has_fused_primitives()) { - layout = impl_param.get_fused_output_layout(); - } - - auto users = node.get_users(); - if (users.size() == 1 && users.front()->is_type()) { - auto conv_groups = (int32_t)users.front()->as().get_groups(); - - bool next_is_dw = conv_groups > 1 && conv_groups == output_size.feature[0]; - - if ((layout.data_type == data_types::f16 || layout.data_type == data_types::f32) && next_is_dw) { - layout.format = cldnn::format::b_fs_yx_fsv16; - } - } - - return layout; -} - -std::string binary_convolution_inst::to_string(binary_convolution_node const& node) { - auto desc = node.get_primitive(); - auto strd = desc->stride; - auto dilation = desc->dilation; - auto node_info = node.desc_to_json(); - - std::stringstream primitive_description; - json_composite conv_info; - conv_info.add("stride", cldnn::to_string(strd)); - conv_info.add("pad", cldnn::to_string(desc->pad)); - conv_info.add("dilation", cldnn::to_string(dilation)); - conv_info.add("out size", desc->output_size.to_string()); - - node_info->add("binary convolution info", conv_info); - node_info->dump(primitive_description); - - return primitive_description.str(); -} - -binary_convolution_inst::typed_primitive_inst(network& network, binary_convolution_node const& node) - : parent(network, node) { - auto stride = argument->stride; - auto pad = argument->pad; - - auto input_layout = node.input().get_output_layout(); - auto output_layout = node.get_output_layout(); - auto output_size = output_layout.get_tensor(); - - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Input number of dimensions", - input_layout.get_rank(), - "output number of dimensions", - output_layout.get_rank(), - "Input/output rank mismatch"); - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Stride number of dimensions", - stride.size(), - "output number of dimensions", - output_layout.get_spatial_rank(), - "stride/output dims mismatch"); - CLDNN_ERROR_NOT_EQUAL(node.id(), - "pad number of dimensions", - pad.size(), - "input number of dimensions", - input_layout.get_spatial_rank(), - "Input offset/ input size mismatch"); - - auto filter_inst = node.weights().get_output_layout(); // convolution filter - - - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Weights number of dimensions", - filter_inst.get_rank(), - "output number of dimensions", - output_layout.get_rank(), - "Weights/output dims mismatch"); - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Convolution padding mode", - node.get_output_layout().data_padding.filling_value(), - "padding value", - 0.0f, - "Unknown padding mode."); - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Output feature size", - output_size.feature.size(), - "expected feature size", - 1, - "Only one-dimensional features are supported"); - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Output batch size", - output_size.batch.size(), - "expected output size", - 1, - "Only one-dimensional batch size are supported"); - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Weights feature maps number", - input_layout.feature(), - "input feature maps number", - filter_inst.feature(), - "Weights/ifm mismatch"); -} -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index 4f48148c39549b..3f568b96ebacda 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -7,7 +7,6 @@ #include "implementation_map.hpp" #include "convolution_inst.h" -#include "binary_convolution_inst.h" #include "deconvolution_inst.h" #include "deformable_convolution_inst.h" #include "fully_connected_inst.h" @@ -124,8 +123,6 @@ void post_optimize_weights::run(program& p) { for (auto& node : p.get_processing_order()) { if (node->is_type()) { optimize_weights(node->as(), p); - } else if (node->is_type()) { - optimize_weights(node->as(), p); } else if (node->is_type()) { optimize_weights(node->as(), p); } else if (node->is_type()) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp index 499c8eee49fde5..87193ff304a65b 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp @@ -134,12 +134,6 @@ void prepare_padding::run(program& p) { else needed_padding = prim_node.input().get_output_layout().data_padding; - add_required_padding(prim_node, needed_padding); - } else if (node->is_type()) { - auto& prim_node = node->as(); - - auto needed_padding = prim_node.input().get_output_layout().data_padding; - add_required_padding(prim_node, needed_padding); } } @@ -168,8 +162,7 @@ void prepare_padding::run(program& p) { conv_layout.format != cldnn::format::b_fs_zyx_fsv16 && conv_layout.format != cldnn::format::bs_fs_yx_bsv16_fsv16 && conv_layout.format != cldnn::format::b_fs_yx_fsv4 && - conv_layout.format != cldnn::format::fs_b_yx_fsv32 && - conv_layout.format != cldnn::format::b_fs_yx_32fp) { + conv_layout.format != cldnn::format::fs_b_yx_fsv32) { continue; } @@ -257,77 +250,4 @@ void prepare_padding::run(program& p) { needed_padding = padding::max(prev_prim_output_layout.data_padding, needed_padding); p.apply_needed_padding(node, conv_input_node, needed_padding); } - - for (auto& pair : p.nodes_map) { - if (pair.second->type() != binary_convolution::type_id()) - continue; - - auto& node = pair.second->as(); - if (node.get_dependencies().empty()) - continue; - - if (node.is_dynamic()) continue; - auto conv = node.get_primitive(); - auto& conv_input_node = node.get_dependency(0); - auto conv_layout = node.get_output_layout(); - - // right now output padding optimization is only available for bfyx format and data type = float32 - if (conv_layout.format != cldnn::format::bfyx && conv_layout.format != cldnn::format::b_fs_yx_32fp) - continue; - - // We shoudn't apply any padding to nodes which are marked as outputs or have type as data - if (conv_input_node.is_output() || conv_input_node.is_type()) - continue; - - // Calculating input padding needed for convolution - auto& filter_node = node.as().weights(); - auto filter_prim = filter_node.get_primitive(); - - layout filter_layout = filter_node.get_output_layout(); - - // convolution have only one input primitive - auto prev_prim_output_layout = conv_input_node.get_output_layout(); - - // Compute initial required paddings for primitive used as input for convolution. - auto pad = conv->pad; - auto stride = conv->stride; - auto dilation = conv->dilation; - - auto stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1; - auto stride_y = stride.size() >= 2 ? stride[stride.size() - 2] : 1; - auto stride_x = stride.size() >= 1 ? stride[stride.size() - 1] : 1; - - auto dilation_z = dilation.size() >= 3 ? dilation[dilation.size() - 3] : 1; - auto dilation_y = dilation.size() >= 2 ? dilation[dilation.size() - 2] : 1; - auto dilation_x = dilation.size() >= 1 ? dilation[dilation.size() - 1] : 1; - - auto pad_z = pad.size() >= 3 ? pad[pad.size() - 3] : 0; - auto pad_y = pad.size() >= 2 ? pad[pad.size() - 2] : 0; - auto pad_x = pad.size() >= 1 ? pad[pad.size() - 1] : 0; - - auto input_limit_x = -pad_x + (conv_layout.spatial(0) - 1) * stride_x + - (filter_layout.spatial(0) - 1) * dilation_x + 1; - auto input_limit_y = -pad_y + (conv_layout.spatial(1) - 1) * stride_y + - (filter_layout.spatial(1) - 1) * dilation_y + 1; - auto input_limit_z = -pad_z + (conv_layout.spatial(2) - 1) * stride_z + - (filter_layout.spatial(2) - 1) * dilation_z + 1; - - auto padding_begin_x = std::max(pad_x, 0); - auto padding_begin_y = std::max(pad_y, 0); - auto padding_begin_z = std::max(pad_z, 0); - auto padding_end_x = std::max( - static_cast(input_limit_x) - prev_prim_output_layout.spatial(0), - 0); - auto padding_end_y = std::max( - static_cast(input_limit_y) - prev_prim_output_layout.spatial(1), - 0); - auto padding_end_z = std::max( - static_cast(input_limit_z) - prev_prim_output_layout.spatial(2), - 0); - - cldnn::padding needed_padding({0, 0, padding_begin_x, padding_begin_y, padding_begin_z}, {0, 0, padding_end_x, padding_end_y, padding_end_z}, 0); - needed_padding = padding::max(prev_prim_output_layout.data_padding, needed_padding); - - p.apply_needed_padding(node, conv_input_node, needed_padding); - } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 167b77c74afc55..fb61e577a36e58 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -9,7 +9,6 @@ #include "proposal_inst.h" #include "roi_pooling_inst.h" #include "quantize_inst.h" -#include "binary_convolution_inst.h" #include "activation_inst.h" #include "batch_to_space_inst.h" #include "crop_inst.h" @@ -545,25 +544,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { return data_type_traits::is_i8_u8(in_dt); }; - auto bin_conv_supports_eltw_fusings = [](binary_convolution_node& conv_node) -> bool { - auto& eltw_node = static_cast(*conv_node.get_users().front()); - auto& eltw_prim = *eltw_node.get_primitive(); - - if (eltw_node.get_dependencies().size() < 2) - return false; - - auto const_layout = eltw_node.get_input_layout(1); - auto conv_layout = conv_node.get_output_layout(); - auto per_channel_eltwise = const_layout.feature() == conv_layout.feature(); - - if (eltw_node.get_dependency(1).is_constant() && per_channel_eltwise && - (eltw_prim.mode == eltwise_mode::sum || eltw_prim.mode == eltwise_mode::prod) && - all_ones(conv_node.get_primitive()->dilation)) - return true; - - return false; - }; - auto fc_supports_fusings = [&](fully_connected_node& node) -> bool { if (_lo.get_optimization_attributes().use_onednn_impls && _lo.get_preferred_impl_type(node, format::any /*dummy*/) == impl_types::onednn) { @@ -734,9 +714,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { } } - bool should_fuse = input.is_type(); - - should_fuse |= input.is_type() && conv_supports_fusings(input.as()); + bool should_fuse = input.is_type() && conv_supports_fusings(input.as()); should_fuse |= input.is_type() && fc_supports_fusings(input.as()); @@ -849,18 +827,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { quantize_node.get_per_tensor_output_shift() && quantize_node.get_per_tensor_output_range(); - auto& input_lo = quantize_node.get_dependency(1); - auto& input_hi = quantize_node.get_dependency(2); - bool should_fuse = input_data.is_type() && - ((out_dt == data_types::u1 && - quantize_node.get_dependencies().size() == 5 && - ((in_layout.feature() == input_lo.get_output_layout().feature() && - in_layout.feature() == input_hi.get_output_layout().feature()) || - (input_lo.get_output_layout().feature() == 1 && - input_hi.get_output_layout().feature() == 1)))) && - all_ones(input_data.as().get_primitive()->dilation); - - should_fuse |= input_data.is_type() && conv_supports_fusings(input_data.as()) && + bool should_fuse = input_data.is_type() && conv_supports_fusings(input_data.as()) && quantize_node.get_scale_shift_opt() && ((out_dt == data_types::f32 || out_dt == data_types::f16) || in_layout.format == format::b_fs_yx_fsv16 || @@ -954,8 +921,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { for (size_t i = 0; i < parents.size(); i++) { can_fuse_parents[i] = (parents[i].first->is_type() && conv_supports_fusings(parents[i].first->as())) || - (parents[i].first->is_type() && - bin_conv_supports_eltw_fusings(parents[i].first->as())) || (parents[i].first->is_type() && mvn_supports_fusings(parents[i].first->as(), true)) || (parents[i].first->is_type()) || @@ -1197,7 +1162,6 @@ void prepare_primitive_fusing::fuse_constant_transposes(program& p) { if (next_node->is_type() || next_node->is_type() || next_node->is_type() || - next_node->is_type() || next_node->is_type()) { size_t weights_offset = next_node->get_primitive()->input_size(); std::vector valid_weights_indices = {next_node->get_primitive()->input_size()}; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp index f55d99b6a5fa80..13e21ab4c21754 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp @@ -6,13 +6,11 @@ #include "pooling_inst.h" #include "quantize_inst.h" #include "reorder_inst.h" -#include "binary_convolution_inst.h" #include "eltwise_inst.h" #include "data_inst.h" #include "pass_manager.h" #include "program_helpers.h" #include "to_string_utils.h" -#include "intel_gpu/runtime/error_handler.hpp" #include #include @@ -23,24 +21,6 @@ using namespace cldnn; namespace { -template -bool check_binarization(memory::ptr mem_input_low, memory::ptr mem_input_high, program& p) { - bool is_binarization = true; - const auto& stream = p.get_stream(); - mem_lock data_input_low_lock{mem_input_low, stream}; - mem_lock data_input_high_lock{mem_input_high, stream}; - auto data_input_low = data_input_low_lock.data(); - auto data_input_high = data_input_high_lock.data(); - const size_t number_mem_layout_elements = mem_input_high->get_layout().count(); - for (size_t i = 0; i < number_mem_layout_elements; i++) { - if (data_input_high[i] != data_input_low[i]) { - is_binarization = false; - break; - } - } - return is_binarization; -} - inline float clamp(float val) { return std::max(std::numeric_limits::lowest(), std::min(std::numeric_limits::max(), val)); } @@ -322,50 +302,12 @@ void prepare_quantization::handle_quantize_node(program& p, quantize_node& quant if (optimize_quantize(p, quantize_node)) return; - if (quantize_node.get_primitive()->levels == 2) { - prepare_packed_quantize(p, quantize_node); - } else if (quantize_node.get_primitive()->levels <= 256 && !quantize_node.get_scale_shift_opt() && !quantize_node.is_constant()) { + auto l = quantize_node.get_primitive()->levels; + if (l > 2 && l <= 256 && !quantize_node.get_scale_shift_opt() && !quantize_node.is_constant()) { prepare_scale_shift_opt(p, quantize_node); } } -void prepare_quantization::prepare_packed_quantize(program& p, quantize_node& quantize_node) { - program_node &input_low_node = quantize_node.get_dependency(1); - program_node &input_high_node = quantize_node.get_dependency(2); - - if (quantize_node.is_output() || !input_low_node.is_type() || !input_high_node.is_type()) { - return; - } - - auto &input_low = input_low_node.as(); - auto &input_high = input_high_node.as(); - - auto mem_input_low = input_low.get_attached_memory_ptr(); - auto mem_input_high = input_high.get_attached_memory_ptr(); - - bool is_binarization = true; - switch (mem_input_high->get_layout().data_type) { - case data_types::f32: { - is_binarization = check_binarization(mem_input_low, mem_input_high, p); - break; - } - case data_types::f16: { - is_binarization = check_binarization(mem_input_low, mem_input_high, p); - break; - } - default: - CLDNN_ERROR_MESSAGE(quantize_node.id(), "prepare_quantization: Unsupported precision of quantize inputs"); - } - - auto output_dt = quantize_node.get_output_layout().data_type; - if (is_binarization) { - output_dt = data_types::u1; - } - - quantize_node.typed_desc()->output_data_types = {optional_data_type{output_dt}}; - quantize_node.recalc_output_layout(); -} - void prepare_quantization::prepare_dequantize_merge(program& p, eltwise_node& eltwise_node) { for (size_t i = 1; i < eltwise_node.get_dependencies().size(); i++) { if (!eltwise_node.get_dependency(i).is_type()) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index 362cf5bf63df4c..efdaee8a3ab6d4 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -7,7 +7,6 @@ #include "pass_manager.h" #include "program_helpers.h" -#include "binary_convolution_inst.h" #include "reshape_inst.h" #include "convert_color_inst.h" #include "one_hot_inst.h" diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 769134e440b848..24f414af467408 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -9,7 +9,6 @@ #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/utils.hpp" #include "program_helpers.h" -#include "binary_convolution_inst.h" #include "mvn_inst.h" #include "to_string_utils.h" #include "pooling_inst.h" @@ -742,30 +741,6 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) } }; - const auto reorder_input_and_weights_binary_convolution = [&p, &rf](typed_program_node& binary_conv_node) { - auto& input = binary_conv_node.input(); - auto input_layout = input.get_output_layout(); - auto new_layout = input_layout; - new_layout.data_type = data_types::u1; - - auto reorder = rf.get_reorder(input.id(), input_layout, new_layout); - - if (reorder.first) { - p.add_intermediate(reorder.first, binary_conv_node, 0, !reorder.second); - } - - auto& weights = binary_conv_node.weights(); - auto weights_layout = weights.get_output_layout(); - if (!weights.is_type() && !weights.is_constant()) { - auto new_layout = layout{ weights_layout.get_partial_shape(), data_types::u1, format::b_fs_yx_32fp }; - auto reorder = rf.get_reorder(weights.id(), weights_layout, new_layout); - if (reorder.first) { - p.add_intermediate(reorder.first, binary_conv_node, 1, !reorder.second); - p.get_or_create(reorder.first).recalc_output_layouts(false); - } - } - }; - const auto reorder_input_and_weights_deconvolution = [&p, &lo, &rf](typed_program_node& deconv_node) { auto& input = deconv_node.input(); auto input_layout = input.get_output_layout(); @@ -928,10 +903,9 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) }; for (auto& prim : p.get_processing_order()) { - program_helpers::do_for_types( + program_helpers::do_for_types( *prim, reorder_input_detection_output, - reorder_input_and_weights_binary_convolution, reorder_input_and_weights_deconvolution, reorder_convolution, reorder_input_fully_connected, diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp deleted file mode 100644 index 41ad3b6d92d3fc..00000000000000 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/binary_convolution.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "primitive_base.hpp" - -#include "binary_convolution_inst.h" -#include "kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h" -#include "kernel_selector/kernels/binary_convolution/binary_convolution_params.h" - -namespace cldnn { -namespace ocl { - -struct binary_convolution_impl : typed_primitive_impl_ocl { - using parent = typed_primitive_impl_ocl; - using parent::parent; - using kernel_selector_t = kernel_selector::binary_convolution_kernel_selector; - using kernel_params_t = std::pair; - - DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::binary_convolution_impl) - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -protected: - kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); - - args.weights = instance.weights_memory(); - return args; - } - -public: - static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { - const auto& primitive = impl_param.typed_desc(); - const auto& weights_layout = (*impl_param.weights_layout).convert_to_weights_layout(false); - const auto& weights_size = weights_layout.get_tensor(); - - const auto& groups = primitive->groups; - const auto& stride = primitive->stride; - const auto& dilation = primitive->dilation; - const auto& pad = primitive->pad; - - auto params = get_weights_bias_default_params(impl_param); - auto optional_params = get_default_weights_bias_optional_params(impl_param.get_program()); - - params.pad_value = primitive->pad_value; - params.out_dt = to_data_type(*primitive->output_data_types[0]); - params.groups = static_cast(groups); - params.filterSize = { - (uint32_t)weights_size.spatial[0], - (uint32_t)weights_size.spatial[1], - (uint32_t)weights_size.spatial[2], - }; - - uint32_t pad_z = std::max(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0); - uint32_t pad_y = std::max(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0); - uint32_t pad_x = std::max(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0); - params.padding = {pad_x, pad_y, pad_z}; - - uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; - uint32_t stride_y = stride.size() >= 2 ? static_cast(stride[stride.size() - 2]) : 1; - uint32_t stride_x = stride.size() >= 1 ? static_cast(stride[stride.size() - 1]) : 1; - params.stride = {stride_x, stride_y, stride_z}; - - uint32_t dilation_z = dilation.size() >= 3 ? static_cast(dilation[dilation.size() - 3]) : 1; - uint32_t dilation_y = dilation.size() >= 2 ? static_cast(dilation[dilation.size() - 2]) : 1; - uint32_t dilation_x = dilation.size() >= 1 ? static_cast(dilation[dilation.size() - 1]) : 1; - params.dilation = {dilation_x, dilation_y, dilation_z}; - - return {params, optional_params}; - } -}; - -namespace detail { - -attach_binary_convolution_impl::attach_binary_convolution_impl() { - implementation_map::add(impl_types::ocl, typed_primitive_impl_ocl::create, { - std::make_tuple(data_types::u1, format::b_fs_yx_32fp), - }); -} - -} // namespace detail -} // namespace ocl -} // namespace cldnn - -BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::binary_convolution_impl) -BIND_BINARY_BUFFER_WITH_TYPE(cldnn::binary_convolution) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index e9b98a57e0022f..0ec066f00995d6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -121,8 +121,6 @@ namespace cldnn { kernel_selector::data_type to_data_type(data_types dt) { switch (dt) { - case cldnn::data_types::u1: - return kernel_selector::data_type::BINARY; case cldnn::data_types::i4: return kernel_selector::data_type::INT4; case cldnn::data_types::u4: @@ -146,8 +144,6 @@ kernel_selector::data_type to_data_type(data_types dt) { data_types from_data_type(kernel_selector::data_type dt) { switch (dt) { - case kernel_selector::data_type::BINARY: - return cldnn::data_types::u1; case kernel_selector::data_type::INT4: return cldnn::data_types::i4; case kernel_selector::data_type::UINT4: @@ -171,8 +167,6 @@ data_types from_data_type(kernel_selector::data_type dt) { kernel_selector::weights_type to_weights_type(data_types dt) { switch (dt) { - case cldnn::data_types::u1: - return kernel_selector::weights_type::BINARY; case cldnn::data_types::u4: return kernel_selector::weights_type::UINT4; case cldnn::data_types::i4: @@ -194,8 +188,6 @@ kernel_selector::weights_type to_weights_type(data_types dt) { data_types from_weights_type(kernel_selector::weights_type dt) { switch (dt) { - case kernel_selector::weights_type::BINARY: - return data_types::u1; case kernel_selector::weights_type::INT4: return data_types::i4; case kernel_selector::weights_type::UINT4: @@ -255,8 +247,6 @@ kernel_selector::data_layout to_data_layout(format f) { return kernel_selector::data_layout::bs_f_bsv8__af8; case format::winograd_2x3_s1_data: return kernel_selector::data_layout::winograd_2x3_s1_data; - case format::b_fs_yx_32fp: - return kernel_selector::data_layout::b_fs_yx_32fp; case format::bfzyx: return kernel_selector::data_layout::bfzyx; case format::bzyxf: @@ -360,8 +350,6 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) { return cldnn::format::bs_f_bsv16; case kernel_selector::data_layout::winograd_2x3_s1_data: return cldnn::format::winograd_2x3_s1_data; - case kernel_selector::data_layout::b_fs_yx_32fp: - return cldnn::format::b_fs_yx_32fp; case kernel_selector::data_layout::bfzyx: return cldnn::format::bfzyx; case kernel_selector::data_layout::fs_b_yx_fsv32: @@ -538,9 +526,6 @@ kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) { return kernel_selector::weights_layout::os_is_yx_osv32_isv4; case format::os_is_zyx_osv32_isv4: return kernel_selector::weights_layout::os_is_zyx_osv32_isv4; - case format::b_fs_yx_32fp: - case format::os_is_yx_osv32_isv32p: - return kernel_selector::weights_layout::os_is_yx_osv32_isv32p; case format::os_is_yx_isv16_osv16: return kernel_selector::weights_layout::os_is_yx_isv16_osv16; case format::os_is_y_x8_osv8_isv4_swizzled_by_4: @@ -865,8 +850,6 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { return format::os_is_zyx_osv32_isv4; case kernel_selector::weights_layout::os_is_y_x8_osv8_isv4_swizzled_by_4: return cldnn::format::os_is_y_x8_osv8_isv4_swizzled_by_4; - case kernel_selector::weights_layout::os_is_yx_osv32_isv32p: - return cldnn::format::os_is_yx_osv32_isv32p; case kernel_selector::weights_layout::oizyx: return cldnn::format::oizyx; case kernel_selector::weights_layout::iozyx: diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp index ef3ea9b4316dc1..2164c922a2af08 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp @@ -51,7 +51,6 @@ struct quantize_impl : typed_primitive_impl_ocl { get_default_optional_params(impl_param.get_program()); quantize_params.levels = arg.get_levels(); - quantize_params.packed_binary_output = arg.get_packed_binary_output(); quantize_params.scale_shift_opt = arg.get_scale_shift_opt(); quantize_params.has_post_scale = arg.get_need_post_scale(); quantize_params.has_post_shift = arg.get_need_post_shift(); @@ -90,8 +89,6 @@ struct quantize_impl : typed_primitive_impl_ocl { void update_dispatch_data(const kernel_impl_params& impl_param) override { auto quantize_params = get_default_params(impl_param); - const auto& output_layout = impl_param.get_output_layout(); - quantize_params.packed_binary_output = output_layout.data_type == data_types::u1; (_kernel_data.update_dispatch_data_func)(quantize_params, _kernel_data); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp index 3cc96ee00735ce..b03944ae774baf 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp @@ -12,7 +12,6 @@ void register_implementations() { REGISTER_OCL(activation); REGISTER_OCL(adaptive_pooling); REGISTER_OCL(arg_max_min); - REGISTER_OCL(binary_convolution); REGISTER_OCL(border); REGISTER_OCL(broadcast); REGISTER_OCL(bucketize); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp index d591499148e77a..17731a3f634239 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp @@ -7,7 +7,6 @@ #include "intel_gpu/primitives/activation.hpp" #include "intel_gpu/primitives/arg_max_min.hpp" #include "intel_gpu/primitives/batch_to_space.hpp" -#include "intel_gpu/primitives/binary_convolution.hpp" #include "intel_gpu/primitives/border.hpp" #include "intel_gpu/primitives/broadcast.hpp" #include "intel_gpu/primitives/bucketize.hpp" @@ -93,7 +92,6 @@ REGISTER_OCL(activation); REGISTER_OCL(adaptive_pooling); REGISTER_OCL(arg_max_min); REGISTER_OCL(batch_to_space); -REGISTER_OCL(binary_convolution); REGISTER_OCL(border); REGISTER_OCL(broadcast); REGISTER_OCL(bucketize); diff --git a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h b/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h deleted file mode 100644 index 1a910bcf81e499..00000000000000 --- a/src/plugins/intel_gpu/src/graph/include/binary_convolution_inst.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include "intel_gpu/primitives/binary_convolution.hpp" -#include "primitive_inst.h" - -#include -#include -#include - -namespace cldnn { - -template <> -struct typed_program_node : public typed_program_node_base { - using parent = typed_program_node_base; - -public: - typed_program_node(std::shared_ptr prim, program& prog) - : parent(prim, prog) {} - - program_node& input() const { return get_dependency(0); } - program_node& weights() const { return get_dependency(1); } - - std::unique_ptr get_kernel_impl_params(const std::vector& in_layouts, const std::vector& out_layouts) const override { - auto params = parent::get_kernel_impl_params(in_layouts, out_layouts); - params->weights_layout = optional_layout(weights().get_output_layout()); - return params; - } -}; - -using binary_convolution_node = typed_program_node; - -template <> -class typed_primitive_inst : public typed_primitive_inst_base { - using parent = typed_primitive_inst_base; - using parent::parent; - -public: - static layout calc_output_layout(binary_convolution_node const& node, kernel_impl_params const& impl_param); - static std::string to_string(binary_convolution_node const& node); - typed_primitive_inst(network& network, binary_convolution_node const& node); - - bool need_reset_input_memory(size_t idx = 0) const override { - if (idx != 0) - return false; - - auto input_layout = _deps[0].first->_impl_params->get_output_layout(0); - return input_layout.data_padding ? true : false; - } - - bool need_reset_output_memory() const override { - bool res = parent::need_reset_output_memory(); - auto output_layout = _impl_params->get_output_layout(0); - if (output_layout.data_padding) { - return true; - } - return res; - } - - memory::ptr weights_memory() const { return dep_memory_ptr(1); } -}; - -using binary_convolution_inst = typed_primitive_inst; - -} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h index e9ed4ec959ae1c..aee3bff0a6df6f 100644 --- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h +++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h @@ -14,7 +14,6 @@ #include "convolution_inst.h" #include "deconvolution_inst.h" #include "detection_output_inst.h" -#include "binary_convolution_inst.h" #include "quantize_inst.h" #include diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index d6409eab31e9a4..8973cd74aa45e1 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -149,7 +149,6 @@ class prepare_quantization : public base_pass { private: void run(program& p) override; void handle_quantize_node(program& p, quantize_node& quantize_node); - void prepare_packed_quantize(program& p, quantize_node& quantize_node); void prepare_dequantize_merge(program& p, eltwise_node& eltwise_node); void remove_fake_reorders(program& p, reorder_node& reorder_node); void prepare_asymmetric_quantization(program& p, convolution_node& convolution_node); diff --git a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h index 8d213f68bc3f29..fa616980fe0bd2 100644 --- a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h @@ -142,7 +142,6 @@ struct typed_program_node : public typed_program_node_base { program_node& input(size_t index = 0) const { return get_dependency(index); } int get_levels() const { return get_primitive()->levels; } - bool get_packed_binary_output() const { return get_output_layout().data_type == data_types::u1; } bool get_scale_shift_opt() const { return get_primitive()->scale_shift_opt; } bool get_need_pre_shift() const { return get_primitive()->need_pre_shift; } bool get_need_post_scale() const { return get_primitive()->need_post_scale; } @@ -201,7 +200,9 @@ class typed_primitive_inst : public typed_primitive_inst_base - static std::vector calc_output_layouts(quantize_node const& node, kernel_impl_params const& impl_param); + static std::vector calc_output_layouts(quantize_node const& node, kernel_impl_params const& impl_param) { + return forward_input0_shape(impl_param); + } static layout calc_output_layout(quantize_node const& node, kernel_impl_params const& impl_param); static std::string to_string(quantize_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index ca4569a7df7099..eb02ba9779ceca 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -450,9 +450,6 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node || fmt_next == format::bs_fs_yx_bsv32_fsv16 || fmt_next == format::bs_fs_yx_bsv32_fsv32)) return true; - if (prev.is_type() && fmt_next == format::b_fs_yx_fsv16) - return true; - if (prev.is_type() && !data_type_traits::is_floating_point(dt_prev) && data_type_traits::is_floating_point(dt_next) && @@ -1741,8 +1738,6 @@ format layout_optimizer::get_preferred_format(program_node& node) { expected = _forcing_map.at(node.id()).first; } else if (node.is_type()) { expected = get_expected_format(node.as()); - } else if (node.is_type()) { - expected = cldnn::format::b_fs_yx_32fp; } else if (node.is_type()) { expected = get_expected_format(node.as()); } else if (node.is_type() || node.is_type()) { diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index c8a081dadbc45f..2aaca0c73b2df5 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -212,41 +212,6 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump file_stream << buffer.str(); } -template <> -void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream, bool dump_raw) { - auto&& l = mem->get_layout(); - - file_stream << "shape: "; - file_stream << l.batch() << " "; - file_stream << l.feature() << " "; - file_stream << l.spatial(1) << " "; - file_stream << l.spatial(0) << " "; - file_stream << "(" << l.batch() * l.feature() * l.spatial(1) * l.spatial(0) << ")" << std::endl; - - mem_lock lock(mem, stream); - auto mem_ptr = lock.data(); - - if (!dump_raw) { - for (cldnn::tensor::value_type b = 0; b < l.batch(); ++b) { - for (cldnn::tensor::value_type f = 0; f < (cldnn::tensor::value_type)ceil_div(l.feature(), 32); ++f) { - for (cldnn::tensor::value_type z = 0; z < l.spatial(2); ++z) { - for (cldnn::tensor::value_type y = 0; y < l.spatial(1); ++y) { - for (cldnn::tensor::value_type x = 0; x < l.spatial(0); ++x) { - cldnn::tensor t(cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, z, 0)); - size_t input_it = mem->get_layout().get_linear_offset(t); - file_stream << mem_ptr[input_it] << std::endl; - } - } - } - } - } - } else { - for (size_t i = 0; i < lock.size(); ++i) { - file_stream << std::fixed << std::setprecision(6) << mem_ptr[i] << std::endl; - } - } -} - void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std::string layerName, bool dump_raw) { std::cout << "Dump " << (dump_raw ? "raw " : "") << layerName << std::endl; GPU_DEBUG_GET_INSTANCE(debug_config); @@ -266,8 +231,6 @@ void log_memory_to_file(memory::ptr mem, layout data_layout, stream& stream, std dump(actual_mem, stream, file_stream, dump_raw); else if (mem_dt == cldnn::data_types::f16) dump(actual_mem, stream, file_stream, dump_raw); - else if (mem_dt == cldnn::data_types::u1) - dump(actual_mem, stream, file_stream, dump_raw); else if (mem_dt == cldnn::data_types::i64) dump(actual_mem, stream, file_stream, dump_raw); else if (mem_dt == cldnn::data_types::i32) diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 9bd5d57090c7ef..7035005be3354d 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -28,7 +28,6 @@ #include "softmax_inst.h" #include "permute_inst.h" #include "custom_gpu_primitive_inst.h" -#include "binary_convolution_inst.h" #include "resample_inst.h" #include "reshape_inst.h" #include "quantize_inst.h" @@ -299,28 +298,7 @@ bool program::analyze_output_size_handling_need() { // Calculate output size and compare with specified. for (const auto& node : processing_order) { - if (node->is_type()) { - auto& prim_node = node->as(); - const auto& prim = prim_node.get_primitive(); - - tensor specified_output_range( - {0, 0, prim->output_size.spatial[0], prim->output_size.spatial[1], prim->output_size.spatial[2]}, - 1); - - auto filter_size = prim_node.weights().get_output_layout().get_tensor(); - - auto primInputSize = prim_node.input().get_output_layout().get_tensor(); - auto calc_output_range = - calc_sliding_window_output_range(primInputSize, - filter_size, - prim->pad, - prim->stride, - prim->dilation, - true, - 1); - if (specified_output_range != calc_output_range) - handling_needed = true; - } else if (node->is_type()) { + if (node->is_type()) { auto& prim_node = node->as(); const auto& prim = prim_node.get_primitive(); @@ -1439,7 +1417,6 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::permute::type_id() && prim.type() != cldnn::reshape::type_id() && prim.type() != cldnn::detection_output::type_id() && - prim.type() != cldnn::binary_convolution::type_id() && prim.type() != cldnn::quantize::type_id() && prim.type() != cldnn::custom_gpu_primitive::type_id() && prim.type() != cldnn::concatenation::type_id() && diff --git a/src/plugins/intel_gpu/src/graph/quantize.cpp b/src/plugins/intel_gpu/src/graph/quantize.cpp index dde4aa4b7ec8af..c4125ae40eeb0f 100644 --- a/src/plugins/intel_gpu/src/graph/quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/quantize.cpp @@ -3,7 +3,6 @@ // #include "quantize_inst.h" -#include "binary_convolution_inst.h" #include "primitive_type_base.h" #include "intel_gpu/runtime/memory.hpp" #include "json_object.h" @@ -22,30 +21,9 @@ layout quantize_inst::calc_output_layout(quantize_node const& node, kernel_impl_ if (desc->output_data_types[0]) out_dt = *desc->output_data_types[0]; - if (out_dt == data_types::u1) { - output_format = format::b_fs_yx_32fp; - } - return layout{out_dt, output_format, input_layout.get_tensor()}; } -template -std::vector quantize_inst::calc_output_layouts(quantize_node const&, kernel_impl_params const& impl_param) { - auto desc = impl_param.typed_desc(); - - auto input_layout = impl_param.get_input_layout(); - auto output_format = input_layout.format; - auto out_dt = desc->output_data_types[0].value_or(input_layout.data_type); - - if (out_dt == data_types::u1) { - output_format = format::b_fs_yx_32fp; - } - - return { layout{input_layout.get(), out_dt, output_format} }; -} - -template std::vector quantize_inst::calc_output_layouts(quantize_node const& node, const kernel_impl_params& impl_param); - std::string quantize_inst::to_string(quantize_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cache/cache.json b/src/plugins/intel_gpu/src/kernel_selector/cache/cache.json index a9662f4606ef88..59574741bb44b1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cache/cache.json +++ b/src/plugins/intel_gpu/src/kernel_selector/cache/cache.json @@ -10894,28 +10894,17 @@ "4424123045426419379": ["convolution_gpu_bfyx_f16_depthwise", 0], "4163001530200549687": ["convolution_gpu_bfyx_f16", 8], "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 0], - "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 1], - "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 2], - "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821402568274932830": ["binary_convolution_gpu_1x1", 0], "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 1], "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2], "10657672650587258853": ["convolution_gpu_bfyx_f16", 5], - "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2], "13626797216057420236": ["convolution_gpu_bfyx_f16", 6], "2506095387855338923": ["convolution_gpu_bfyx_f16", 3], @@ -10941,52 +10930,22 @@ "8980088396308495358": ["convolution_gpu_bfyx_f16", 7], "6051363798671277490": ["convolution_gpu_bfyx_f16", 5], "15256882419569076308": ["convolution_gpu_bfyx_gemm_like", 2], - "5264667632252570871": ["binary_convolution_gpu_1x1", 0], - "13666815947927457789": ["binary_convolution_gpu_generic", 0], - "17044275001224274100": ["binary_convolution_gpu_1x1", 0], - "12262607945825744026": ["binary_convolution_gpu_1x1", 0], - "14361360851358312136": ["binary_convolution_gpu_1x1", 0], - "3860430324097549563": ["binary_convolution_gpu_generic", 0], - "7128208160650643266": ["binary_convolution_gpu_1x1", 0], - "1062027263129762214": ["binary_convolution_gpu_1x1", 0], - "16561633756389098051": ["binary_convolution_gpu_generic", 0], - "18052555090835437234": ["binary_convolution_gpu_1x1", 0], - "6603476907029730789": ["binary_convolution_gpu_generic", 0], - "6440401458387219749": ["binary_convolution_gpu_1x1", 0], - "7943355244336393643": ["binary_convolution_gpu_1x1", 0], - "11265761184374928749": ["binary_convolution_gpu_generic", 0], - "1225084982500358091": ["binary_convolution_gpu_1x1", 0], - "9666917304428574817": ["binary_convolution_gpu_generic", 0], - "15210383919838660019": ["binary_convolution_gpu_1x1", 0], - "12329467286607927665": ["binary_convolution_gpu_1x1", 0], - "13821628145640330381": ["binary_convolution_gpu_generic", 0], "10766710068843786211": ["fully_connected_gpu_bfyx_ref", 2], "10399951843541697656": ["convolution_gpu_bfyx_to_bfyx_f16", 8], "6121182450365731169": ["convolution_gpu_bfyx_f16_depthwise", 0], "1420839373798024197": ["convolution_gpu_bfyx_f16", 6], - "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 2], - "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 0], - "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 1], - "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2], - "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2], - "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "656647291151035001": ["convolution_gpu_bfyx_f16", 4], "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2], "13523379689227815262": ["convolution_gpu_bfyx_f16", 5], "10093198489340308880": ["convolution_gpu_bfyx_f16", 5], - "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 0], "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2], "11234282887624973651": ["convolution_gpu_bfyx_f16_1x1", 2], @@ -15152,28 +15111,17 @@ "10677387047764489263": ["convolution_gpu_bfyx_f16_depthwise", 1], "1537866870296831307": ["convolution_gpu_bfyx_f16", 6], "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 1], - "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 1], - "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 0], - "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13526783681740823304": ["binary_convolution_gpu_1x1", 0], "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 522], "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 2], "16642117060176841433": ["convolution_gpu_bfyx_f16", 5], - "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2], "4859984633862705344": ["convolution_gpu_bfyx_f16", 7], "6643541161570220487": ["convolution_gpu_bfyx_f16", 4], @@ -15198,36 +15146,21 @@ "4669930370801439013": ["convolution_gpu_bfyx_f16", 4], "2049835121645334394": ["convolution_gpu_bfyx_f16", 1], "4179197899143727062": ["convolution_gpu_bfyx_f16", 1], - "17629208725190652410": ["binary_convolution_gpu_1x1", 0], - "10203558295793180608": ["binary_convolution_gpu_1x1", 0], - "14083006767377408735": ["binary_convolution_gpu_1x1", 0], - "11002601216030213097": ["binary_convolution_gpu_1x1", 0], "791829835282095596": ["convolution_gpu_bfyx_to_bfyx_f16", 8], "13741392821104156137": ["convolution_gpu_bfyx_f16_depthwise", 2], "14407270906917824601": ["convolution_gpu_bfyx_f16", 6], - "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 1], - "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 1], - "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 0], - "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 0], - "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7285564639878424393": ["convolution_gpu_bfyx_f16", 8], "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 1], "5183001506630431534": ["convolution_gpu_bfyx_f16", 5], "8365841447443821412": ["convolution_gpu_bfyx_f16", 5], - "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 551], "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2], "12722030162332410659": ["convolution_gpu_bfyx_f16_1x1", 1], @@ -23738,28 +23671,17 @@ "4424123045426419379": ["convolution_gpu_bfyx_f16_depthwise", 2], "4163001530200549687": ["convolution_gpu_bfyx_f16", 8], "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 2], - "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821402568274932830": ["binary_convolution_gpu_1x1", 0], "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 1], "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2], "10657672650587258853": ["convolution_gpu_bfyx_f16", 6], - "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2], "13626797216057420236": ["convolution_gpu_bfyx_f16", 8], "2506095387855338923": ["convolution_gpu_bfyx_f16", 2], @@ -23785,52 +23707,22 @@ "8980088396308495358": ["convolution_gpu_bfyx_f16", 2], "6051363798671277490": ["convolution_gpu_bfyx_f16", 2], "15256882419569076308": ["convolution_gpu_bfyx_gemm_like", 2], - "5264667632252570871": ["binary_convolution_gpu_1x1", 0], - "13666815947927457789": ["binary_convolution_gpu_generic", 0], - "17044275001224274100": ["binary_convolution_gpu_1x1", 0], - "12262607945825744026": ["binary_convolution_gpu_1x1", 0], - "14361360851358312136": ["binary_convolution_gpu_1x1", 0], - "3860430324097549563": ["binary_convolution_gpu_generic", 0], - "7128208160650643266": ["binary_convolution_gpu_1x1", 0], - "1062027263129762214": ["binary_convolution_gpu_1x1", 0], - "16561633756389098051": ["binary_convolution_gpu_generic", 0], - "18052555090835437234": ["binary_convolution_gpu_1x1", 0], - "6603476907029730789": ["binary_convolution_gpu_generic", 0], - "6440401458387219749": ["binary_convolution_gpu_1x1", 0], - "7943355244336393643": ["binary_convolution_gpu_1x1", 0], - "11265761184374928749": ["binary_convolution_gpu_generic", 0], - "1225084982500358091": ["binary_convolution_gpu_1x1", 0], - "9666917304428574817": ["binary_convolution_gpu_generic", 0], - "15210383919838660019": ["binary_convolution_gpu_1x1", 0], - "12329467286607927665": ["binary_convolution_gpu_1x1", 0], - "13821628145640330381": ["binary_convolution_gpu_generic", 0], "10766710068843786211": ["fully_connected_gpu_bfyx_ref", 2], "10399951843541697656": ["convolution_gpu_bfyx_to_bfyx_f16", 8], "6121182450365731169": ["convolution_gpu_bfyx_f16_depthwise", 2], "1420839373798024197": ["convolution_gpu_bfyx_f16", 8], - "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 2], - "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2], - "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2], - "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2], - "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "656647291151035001": ["convolution_gpu_bfyx_f16", 7], "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2], "13523379689227815262": ["convolution_gpu_bfyx_f16", 8], "10093198489340308880": ["convolution_gpu_bfyx_f16", 5], - "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 1], "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2], "11234282887624973651": ["convolution_gpu_bfyx_f16", 8], @@ -37425,28 +37317,17 @@ "10677387047764489263": ["convolution_gpu_bfyx_f16_depthwise", 0], "1537866870296831307": ["convolution_gpu_bfyx_f16", 6], "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 1], - "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 1], - "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13526783681740823304": ["binary_convolution_gpu_1x1", 0], "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 1078], "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 2], "16642117060176841433": ["convolution_gpu_bfyx_f16", 6], - "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2], "4859984633862705344": ["convolution_gpu_bfyx_f16", 8], "6643541161570220487": ["convolution_gpu_bfyx_f16", 3], @@ -37471,36 +37352,21 @@ "4669930370801439013": ["convolution_gpu_bfyx_f16", 2], "2049835121645334394": ["convolution_gpu_bfyx_f16", 5], "4179197899143727062": ["convolution_gpu_bfyx_f16", 5], - "17629208725190652410": ["binary_convolution_gpu_1x1", 0], - "10203558295793180608": ["binary_convolution_gpu_1x1", 0], - "14083006767377408735": ["binary_convolution_gpu_1x1", 0], - "11002601216030213097": ["binary_convolution_gpu_1x1", 0], "791829835282095596": ["convolution_gpu_bfyx_to_bfyx_f16", 8], "13741392821104156137": ["convolution_gpu_bfyx_f16_depthwise", 2], "14407270906917824601": ["convolution_gpu_bfyx_f16", 8], - "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 2], - "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 2], - "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2], - "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7285564639878424393": ["convolution_gpu_bfyx_f16", 7], "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 2], "5183001506630431534": ["convolution_gpu_bfyx_f16", 7], "8365841447443821412": ["convolution_gpu_bfyx_f16", 8], - "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 547], "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2], "12722030162332410659": ["convolution_gpu_bfyx_f16_1x1", 2], @@ -65402,28 +65268,17 @@ "4424123045426419379": ["convolution_gpu_bfyx_f16_depthwise", 0], "4163001530200549687": ["convolution_gpu_bfyx_f16", 7], "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 0], - "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 2], - "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821402568274932830": ["binary_convolution_gpu_1x1", 0], "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 2], "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2], "10657672650587258853": ["convolution_gpu_bfyx_f16", 7], - "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2], "13626797216057420236": ["convolution_gpu_bfyx_f16", 7], "2506095387855338923": ["convolution_gpu_bfyx_f16", 1], @@ -65449,52 +65304,22 @@ "8980088396308495358": ["convolution_gpu_bfyx_f16", 4], "6051363798671277490": ["convolution_gpu_bfyx_f16", 4], "15256882419569076308": ["convolution_gpu_bfyx_gemm_like", 2], - "5264667632252570871": ["binary_convolution_gpu_1x1", 0], - "13666815947927457789": ["binary_convolution_gpu_generic", 0], - "17044275001224274100": ["binary_convolution_gpu_1x1", 0], - "12262607945825744026": ["binary_convolution_gpu_1x1", 0], - "14361360851358312136": ["binary_convolution_gpu_1x1", 0], - "3860430324097549563": ["binary_convolution_gpu_generic", 0], - "7128208160650643266": ["binary_convolution_gpu_1x1", 0], - "1062027263129762214": ["binary_convolution_gpu_1x1", 0], - "16561633756389098051": ["binary_convolution_gpu_generic", 0], - "18052555090835437234": ["binary_convolution_gpu_1x1", 0], - "6603476907029730789": ["binary_convolution_gpu_generic", 0], - "6440401458387219749": ["binary_convolution_gpu_1x1", 0], - "7943355244336393643": ["binary_convolution_gpu_1x1", 0], - "11265761184374928749": ["binary_convolution_gpu_generic", 0], - "1225084982500358091": ["binary_convolution_gpu_1x1", 0], - "9666917304428574817": ["binary_convolution_gpu_generic", 0], - "15210383919838660019": ["binary_convolution_gpu_1x1", 0], - "12329467286607927665": ["binary_convolution_gpu_1x1", 0], - "13821628145640330381": ["binary_convolution_gpu_generic", 0], "10766710068843786211": ["fully_connected_gpu_bfyx_ref", 2], "10399951843541697656": ["convolution_gpu_bfyx_to_bfyx_f16", 6], "6121182450365731169": ["convolution_gpu_bfyx_f16_depthwise", 0], "1420839373798024197": ["convolution_gpu_bfyx_f16", 8], - "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 0], - "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2], - "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2], - "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2], - "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "656647291151035001": ["convolution_gpu_bfyx_f16_1x1", 2], "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2], "13523379689227815262": ["convolution_gpu_bfyx_f16", 4], "10093198489340308880": ["convolution_gpu_bfyx_f16", 5], - "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 2], "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2], "11234282887624973651": ["convolution_gpu_bfyx_f16", 7], @@ -69720,28 +69545,17 @@ "10677387047764489263": ["convolution_gpu_bfyx_f16_depthwise", 2], "1537866870296831307": ["convolution_gpu_bfyx_f16", 4], "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 0], - "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 1], - "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 1], - "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13526783681740823304": ["binary_convolution_gpu_1x1", 0], "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 201], "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 1], "16642117060176841433": ["convolution_gpu_bfyx_f16", 7], - "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2], "4859984633862705344": ["convolution_gpu_bfyx_f16", 6], "6643541161570220487": ["convolution_gpu_bfyx_f16", 5], @@ -69766,36 +69580,21 @@ "4669930370801439013": ["convolution_gpu_bfyx_f16", 3], "2049835121645334394": ["convolution_gpu_bfyx_f16", 5], "4179197899143727062": ["convolution_gpu_bfyx_f16", 4], - "17629208725190652410": ["binary_convolution_gpu_1x1", 0], - "10203558295793180608": ["binary_convolution_gpu_1x1", 0], - "14083006767377408735": ["binary_convolution_gpu_1x1", 0], - "11002601216030213097": ["binary_convolution_gpu_1x1", 0], "791829835282095596": ["convolution_gpu_bfyx_to_bfyx_f16", 8], "13741392821104156137": ["convolution_gpu_bfyx_f16_depthwise", 2], "14407270906917824601": ["convolution_gpu_bfyx_f16", 8], - "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 0], - "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 0], - "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 0], - "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2], - "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 1], - "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7285564639878424393": ["convolution_gpu_bfyx_f16_1x1", 1], "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 2], "5183001506630431534": ["convolution_gpu_bfyx_f16", 7], "8365841447443821412": ["convolution_gpu_bfyx_f16", 3], - "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 545], "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2], "12722030162332410659": ["convolution_gpu_bfyx_f16", 6], @@ -75070,28 +74869,17 @@ "4424123045426419379": ["convolution_gpu_bfyx_f16_depthwise", 1], "4163001530200549687": ["convolution_gpu_bfyx_f16", 8], "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 2], - "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 1], - "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 1], - "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821402568274932830": ["binary_convolution_gpu_1x1", 0], "14214799641428760795": ["convolution_gpu_bfyx_os_iyx_osv16", 1032], "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2], "10657672650587258853": ["convolution_gpu_bfyx_f16", 6], - "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2], "13626797216057420236": ["convolution_gpu_bfyx_f16", 6], "2506095387855338923": ["convolution_gpu_bfyx_f16", 1], @@ -75117,52 +74905,22 @@ "8980088396308495358": ["convolution_gpu_bfyx_f16", 3], "6051363798671277490": ["convolution_gpu_bfyx_f16", 2], "15256882419569076308": ["convolution_gpu_bfyx_gemm_like", 2], - "5264667632252570871": ["binary_convolution_gpu_1x1", 0], - "13666815947927457789": ["binary_convolution_gpu_generic", 0], - "17044275001224274100": ["binary_convolution_gpu_1x1", 0], - "12262607945825744026": ["binary_convolution_gpu_1x1", 0], - "14361360851358312136": ["binary_convolution_gpu_1x1", 0], - "3860430324097549563": ["binary_convolution_gpu_generic", 0], - "7128208160650643266": ["binary_convolution_gpu_1x1", 0], - "1062027263129762214": ["binary_convolution_gpu_1x1", 0], - "16561633756389098051": ["binary_convolution_gpu_generic", 0], - "18052555090835437234": ["binary_convolution_gpu_1x1", 0], - "6603476907029730789": ["binary_convolution_gpu_generic", 0], - "6440401458387219749": ["binary_convolution_gpu_1x1", 0], - "7943355244336393643": ["binary_convolution_gpu_1x1", 0], - "11265761184374928749": ["binary_convolution_gpu_generic", 0], - "1225084982500358091": ["binary_convolution_gpu_1x1", 0], - "9666917304428574817": ["binary_convolution_gpu_generic", 0], - "15210383919838660019": ["binary_convolution_gpu_1x1", 0], - "12329467286607927665": ["binary_convolution_gpu_1x1", 0], - "13821628145640330381": ["binary_convolution_gpu_generic", 0], "10766710068843786211": ["fully_connected_gpu_bfyx_ref", 1], "10399951843541697656": ["convolution_gpu_bfyx_to_bfyx_f16", 6], "6121182450365731169": ["convolution_gpu_bfyx_f16_depthwise", 1], "1420839373798024197": ["convolution_gpu_bfyx_f16", 8], - "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 2], - "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2], - "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2], - "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 1], - "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "656647291151035001": ["convolution_gpu_bfyx_f16_1x1", 2], "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2], "13523379689227815262": ["convolution_gpu_bfyx_f16", 6], "10093198489340308880": ["convolution_gpu_bfyx_f16", 4], - "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15179725479322599748": ["convolution_gpu_bfyx_os_iyx_osv16", 168], "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2], "11234282887624973651": ["convolution_gpu_bfyx_f16", 7], @@ -79463,28 +79221,17 @@ "10677387047764489263": ["convolution_gpu_bfyx_f16_depthwise", 2], "1537866870296831307": ["convolution_gpu_bfyx_f16", 5], "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 1], - "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 1], - "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13526783681740823304": ["binary_convolution_gpu_1x1", 0], "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 886], "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 1], "16642117060176841433": ["convolution_gpu_bfyx_f16", 7], - "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 1], "4859984633862705344": ["convolution_gpu_bfyx_f16", 7], "6643541161570220487": ["convolution_gpu_bfyx_f16", 3], @@ -79509,36 +79256,21 @@ "4669930370801439013": ["convolution_gpu_bfyx_f16", 8], "2049835121645334394": ["convolution_gpu_bfyx_f16", 5], "4179197899143727062": ["convolution_gpu_bfyx_f16", 7], - "17629208725190652410": ["binary_convolution_gpu_1x1", 0], - "10203558295793180608": ["binary_convolution_gpu_1x1", 0], - "14083006767377408735": ["binary_convolution_gpu_1x1", 0], - "11002601216030213097": ["binary_convolution_gpu_1x1", 0], "791829835282095596": ["convolution_gpu_bfyx_to_bfyx_f16", 8], "13741392821104156137": ["convolution_gpu_bfyx_f16_depthwise", 2], "14407270906917824601": ["convolution_gpu_bfyx_f16", 5], - "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 2], - "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 2], - "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2], - "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 1], - "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7285564639878424393": ["convolution_gpu_bfyx_f16", 6], "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 2], "5183001506630431534": ["convolution_gpu_bfyx_f16", 6], "8365841447443821412": ["convolution_gpu_bfyx_f16", 5], - "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 544], "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 1], "12722030162332410659": ["convolution_gpu_bfyx_f16", 6], @@ -89452,28 +89184,17 @@ "4424123045426419379": ["convolution_gpu_bfyx_f16_depthwise", 0], "4163001530200549687": ["convolution_gpu_bfyx_f16", 8], "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 2], - "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821402568274932830": ["binary_convolution_gpu_1x1", 0], "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 2], "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2], "10657672650587258853": ["convolution_gpu_bfyx_f16", 4], - "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2], "13626797216057420236": ["convolution_gpu_bfyx_f16", 6], "2506095387855338923": ["convolution_gpu_bfyx_f16", 2], @@ -89499,52 +89220,22 @@ "8980088396308495358": ["convolution_gpu_bfyx_f16", 2], "6051363798671277490": ["convolution_gpu_bfyx_f16", 1], "15256882419569076308": ["convolution_gpu_bfyx_gemm_like", 2], - "5264667632252570871": ["binary_convolution_gpu_1x1", 0], - "13666815947927457789": ["binary_convolution_gpu_generic", 0], - "17044275001224274100": ["binary_convolution_gpu_1x1", 0], - "12262607945825744026": ["binary_convolution_gpu_1x1", 0], - "14361360851358312136": ["binary_convolution_gpu_1x1", 0], - "3860430324097549563": ["binary_convolution_gpu_generic", 0], - "7128208160650643266": ["binary_convolution_gpu_1x1", 0], - "1062027263129762214": ["binary_convolution_gpu_1x1", 0], - "16561633756389098051": ["binary_convolution_gpu_generic", 0], - "18052555090835437234": ["binary_convolution_gpu_1x1", 0], - "6603476907029730789": ["binary_convolution_gpu_generic", 0], - "6440401458387219749": ["binary_convolution_gpu_1x1", 0], - "7943355244336393643": ["binary_convolution_gpu_1x1", 0], - "11265761184374928749": ["binary_convolution_gpu_generic", 0], - "1225084982500358091": ["binary_convolution_gpu_1x1", 0], - "9666917304428574817": ["binary_convolution_gpu_generic", 0], - "15210383919838660019": ["binary_convolution_gpu_1x1", 0], - "12329467286607927665": ["binary_convolution_gpu_1x1", 0], - "13821628145640330381": ["binary_convolution_gpu_generic", 0], "10766710068843786211": ["fully_connected_gpu_bfyx_ref", 2], "10399951843541697656": ["convolution_gpu_bfyx_to_bfyx_f16", 8], "6121182450365731169": ["convolution_gpu_bfyx_f16_depthwise", 2], "1420839373798024197": ["convolution_gpu_bfyx_f16", 8], - "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 2], - "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2], - "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2], - "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2], - "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "656647291151035001": ["convolution_gpu_bfyx_f16_1x1", 2], "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2], "13523379689227815262": ["convolution_gpu_bfyx_f16", 4], "10093198489340308880": ["convolution_gpu_bfyx_f16", 2], - "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 2], "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2], "11234282887624973651": ["convolution_gpu_bfyx_f16", 7], @@ -93768,28 +93459,17 @@ "10677387047764489263": ["convolution_gpu_bfyx_f16_depthwise", 2], "1537866870296831307": ["convolution_gpu_bfyx_f16", 8], "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 1], - "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 2], - "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 1], - "13526783681740823304": ["binary_convolution_gpu_1x1", 0], "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 576], "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 1], "16642117060176841433": ["convolution_gpu_bfyx_f16", 8], - "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2], "4859984633862705344": ["convolution_gpu_bfyx_f16", 7], "6643541161570220487": ["convolution_gpu_bfyx_f16", 2], @@ -93814,36 +93494,21 @@ "4669930370801439013": ["convolution_gpu_bfyx_f16", 5], "2049835121645334394": ["convolution_gpu_bfyx_f16", 5], "4179197899143727062": ["convolution_gpu_bfyx_f16", 8], - "17629208725190652410": ["binary_convolution_gpu_1x1", 0], - "10203558295793180608": ["binary_convolution_gpu_1x1", 0], - "14083006767377408735": ["binary_convolution_gpu_1x1", 0], - "11002601216030213097": ["binary_convolution_gpu_1x1", 0], "791829835282095596": ["convolution_gpu_bfyx_to_bfyx_f16", 6], "13741392821104156137": ["convolution_gpu_bfyx_f16_depthwise", 2], "14407270906917824601": ["convolution_gpu_bfyx_f16", 8], - "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 0], - "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 2], - "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 1], - "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 1], - "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2], - "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7285564639878424393": ["convolution_gpu_bfyx_f16", 8], "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 2], "5183001506630431534": ["convolution_gpu_bfyx_f16", 8], "8365841447443821412": ["convolution_gpu_bfyx_f16", 3], - "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 89], "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2], "12722030162332410659": ["convolution_gpu_bfyx_f16", 7], @@ -103558,28 +103223,17 @@ "4424123045426419379": ["convolution_gpu_bfyx_f16_depthwise", 1], "4163001530200549687": ["convolution_gpu_bfyx_f16", 6], "6890722566263723898": ["convolution_gpu_bfyx_f16_depthwise", 1], - "13967737018625834884": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4904008439880070743": ["convolution_gpu_bfyx_f16_depthwise", 1], - "12495525202846933706": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4591223941823315334": ["convolution_gpu_bfyx_f16_depthwise", 2], - "544756362416159697": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8054350595915663704": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11732173920945220656": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7719296864138745692": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11084677377269310947": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3900078181903132788": ["convolution_gpu_bfyx_f16_depthwise", 1], - "12063794501602674144": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10104159986220401403": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2769623751530494205": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "5912445578783112178": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4676013859334121048": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8428605412862257526": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10635621202663297160": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821402568274932830": ["binary_convolution_gpu_1x1", 0], "14214799641428760795": ["convolution_gpu_bfyx_direct_10_12_16", 0], "2419835076951229610": ["convolution_gpu_bfyx_f16_depthwise", 2], "10657672650587258853": ["convolution_gpu_bfyx_f16", 1], - "13401815977163875034": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "1118760218381327639": ["convolution_gpu_bfyx_f16_depthwise", 2], "13626797216057420236": ["convolution_gpu_bfyx_f16", 3], "2506095387855338923": ["convolution_gpu_bfyx_f16", 0], @@ -103605,52 +103259,22 @@ "8980088396308495358": ["convolution_gpu_bfyx_f16", 6], "6051363798671277490": ["convolution_gpu_bfyx_f16", 3], "15256882419569076308": ["convolution_gpu_bfyx_gemm_like", 2], - "5264667632252570871": ["binary_convolution_gpu_1x1", 0], - "13666815947927457789": ["binary_convolution_gpu_generic", 0], - "17044275001224274100": ["binary_convolution_gpu_1x1", 0], - "12262607945825744026": ["binary_convolution_gpu_1x1", 0], - "14361360851358312136": ["binary_convolution_gpu_1x1", 0], - "3860430324097549563": ["binary_convolution_gpu_generic", 0], - "7128208160650643266": ["binary_convolution_gpu_1x1", 0], - "1062027263129762214": ["binary_convolution_gpu_1x1", 0], - "16561633756389098051": ["binary_convolution_gpu_generic", 0], - "18052555090835437234": ["binary_convolution_gpu_1x1", 0], - "6603476907029730789": ["binary_convolution_gpu_generic", 0], - "6440401458387219749": ["binary_convolution_gpu_1x1", 0], - "7943355244336393643": ["binary_convolution_gpu_1x1", 0], - "11265761184374928749": ["binary_convolution_gpu_generic", 0], - "1225084982500358091": ["binary_convolution_gpu_1x1", 0], - "9666917304428574817": ["binary_convolution_gpu_generic", 0], - "15210383919838660019": ["binary_convolution_gpu_1x1", 0], - "12329467286607927665": ["binary_convolution_gpu_1x1", 0], - "13821628145640330381": ["binary_convolution_gpu_generic", 0], "10766710068843786211": ["fully_connected_gpu_bfyx_ref", 0], "10399951843541697656": ["convolution_gpu_bfyx_to_bfyx_f16", 6], "6121182450365731169": ["convolution_gpu_bfyx_f16_depthwise", 2], "1420839373798024197": ["convolution_gpu_bfyx_f16", 6], - "13139718073646557611": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6766478895508954889": ["convolution_gpu_bfyx_f16_depthwise", 0], - "17134103923720311191": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13452284390313542161": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2726108976392323449": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15079819271991253405": ["convolution_gpu_bfyx_f16_depthwise", 1], - "9323432656779660443": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13582860960891838539": ["convolution_gpu_bfyx_f16_depthwise", 2], - "1055817291271670229": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2823755942522128459": ["convolution_gpu_bfyx_f16_depthwise", 2], - "3384212664007545715": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "2453671492344359798": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "10377368418548257894": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14821668718539890122": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "3048467327118481877": ["convolution_gpu_bfyx_f16_depthwise", 2], - "275456580066174196": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "18142781007687401165": ["convolution_gpu_bfyx_f16_depthwise", 2], - "11817977686815992972": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "656647291151035001": ["convolution_gpu_bfyx_f16_1x1", 0], "14551802214127931636": ["convolution_gpu_bfyx_f16_depthwise", 2], "13523379689227815262": ["convolution_gpu_bfyx_f16", 0], "10093198489340308880": ["convolution_gpu_bfyx_f16", 5], - "12285668048424773773": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15179725479322599748": ["convolution_gpu_bfyx_direct_10_12_16", 0], "5314501484112365200": ["convolution_gpu_bfyx_f16_depthwise", 2], "11234282887624973651": ["convolution_gpu_bfyx_f16_1x1", 0], @@ -107791,28 +107415,17 @@ "10677387047764489263": ["convolution_gpu_bfyx_f16_depthwise", 2], "1537866870296831307": ["convolution_gpu_bfyx_f16", 3], "6121043402577263178": ["convolution_gpu_bfyx_f16_depthwise", 2], - "6925053265869446926": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "6571448459512229759": ["convolution_gpu_bfyx_f16_depthwise", 1], - "5033302751957212880": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17763423818624479514": ["convolution_gpu_bfyx_f16_depthwise", 2], - "14590866505568013579": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4444924555401610608": ["convolution_gpu_bfyx_f16_depthwise", 1], - "6203626494792050078": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2300190318489790800": ["convolution_gpu_bfyx_f16_depthwise", 2], - "2242155068249197061": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2904120218680757524": ["convolution_gpu_bfyx_f16_depthwise", 1], - "12956000960440491758": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8351838766968536267": ["convolution_gpu_bfyx_f16_depthwise", 2], - "8556976994485015619": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "15984235378444812956": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5573407848022795004": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15497405578993446736": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "13977494186365957972": ["convolution_gpu_bfyx_f16_depthwise", 2], - "13526783681740823304": ["binary_convolution_gpu_1x1", 0], "11430675853825242111": ["convolution_gpu_bfyx_os_iyx_osv16", 95], "229385769741075054": ["convolution_gpu_bfyx_f16_depthwise", 1], "16642117060176841433": ["convolution_gpu_bfyx_f16", 0], - "10567925043930198424": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "12594060950826322919": ["convolution_gpu_bfyx_f16_depthwise", 2], "4859984633862705344": ["convolution_gpu_bfyx_f16", 3], "6643541161570220487": ["convolution_gpu_bfyx_f16", 0], @@ -107837,36 +107450,21 @@ "4669930370801439013": ["convolution_gpu_bfyx_f16_1x1", 2], "2049835121645334394": ["convolution_gpu_bfyx_f16", 2], "4179197899143727062": ["convolution_gpu_bfyx_f16", 2], - "17629208725190652410": ["binary_convolution_gpu_1x1", 0], - "10203558295793180608": ["binary_convolution_gpu_1x1", 0], - "14083006767377408735": ["binary_convolution_gpu_1x1", 0], - "11002601216030213097": ["binary_convolution_gpu_1x1", 0], "791829835282095596": ["convolution_gpu_bfyx_to_bfyx_f16", 6], "13741392821104156137": ["convolution_gpu_bfyx_f16_depthwise", 2], "14407270906917824601": ["convolution_gpu_bfyx_f16", 8], - "10520976832008005001": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8640243682990139429": ["convolution_gpu_bfyx_f16_depthwise", 2], - "5483210158429664653": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "2044520988682161997": ["convolution_gpu_bfyx_f16_depthwise", 0], - "1305091083986203859": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5659956897985857329": ["convolution_gpu_bfyx_f16_depthwise", 2], - "4005952778869826841": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "17364300506911036439": ["convolution_gpu_bfyx_f16_depthwise", 2], - "9812558313251709379": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "5598344570994891971": ["convolution_gpu_bfyx_f16_depthwise", 1], - "13865812989618108181": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], - "3791901918413409048": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "8565954012969407126": ["convolution_gpu_bfyx_f16_depthwise", 2], - "10881232647513304568": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "4973139580034915617": ["convolution_gpu_bfyx_f16_depthwise", 1], - "5367180062414144278": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "15140881728515527701": ["convolution_gpu_bfyx_f16_depthwise", 2], - "15616026263121816018": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "7285564639878424393": ["convolution_gpu_bfyx_f16", 5], "2742457992410896516": ["convolution_gpu_bfyx_f16_depthwise", 1], "5183001506630431534": ["convolution_gpu_bfyx_f16", 0], "8365841447443821412": ["convolution_gpu_bfyx_f16", 8], - "11741754254612323251": ["binary_convolution_gpu_1x1_b_fs_yx_fsv16", 0], "616934627583263600": ["convolution_gpu_bfyx_os_iyx_osv16", 183], "15327993174794686756": ["convolution_gpu_bfyx_f16_depthwise", 2], "12722030162332410659": ["convolution_gpu_bfyx_f16_1x1", 0], @@ -114131,4 +113729,4 @@ "8519379094225608238": ["fully_connected_gpu_fb_io_ref", 0] } } -} \ No newline at end of file +} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1.cl deleted file mode 100644 index 4e4165bd99c6e0..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1.cl +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "include/batch_headers/sub_group_block_read.cl" -#include "include/batch_headers/sub_group_block_write.cl" -#include "include/batch_headers/sub_group_shuffle.cl" -#include "include/batch_headers/fetch_data.cl" - -#define OC_BLOCK_SIZE 32 - -#define GET_WEI(data, id) _sub_group_shuffle(data, id) -#define ALIGNED_BLOCK_READ(ptr, byte_offset) as_uint(_sub_group_block_read((const __global uint*)(ptr) + (byte_offset))) -#define ALIGNED_BLOCK_WRITE(ptr, byte_offset, val) _sub_group_block_write((__global uint*)(ptr) + (byte_offset), as_uint(val)) -#define ALIGNED_BLOCK_READ2(ptr, byte_offset) as_uint2(_sub_group_block_read2((const __global uint*)(ptr) + (byte_offset))) - -REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE) -__attribute__((reqd_work_group_size(SUB_GROUP_SIZE, 1, 1))) -KERNEL(binary_convolution_1x1)(const __global INPUT0_TYPE* input, - __global OUTPUT_TYPE* output, - const __global FILTER_TYPE* weights -#if HAS_FUSED_OPS_DECLS - , FUSED_OPS_DECLS -#endif -) -{ - const int xy = get_group_id(0); - const int f_block = get_global_id(1); - const int b = get_global_id(2); - const int lid = get_sub_group_local_id(); -#if PADDED_INPUT - const int x = (xy * XY_BLOCK_SIZE + lid) % OUTPUT_SIZE_X; - const int y = (xy * XY_BLOCK_SIZE + lid) / OUTPUT_SIZE_X; - const uint input_offset = INPUT0_OFFSET - + b*INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH - + y*INPUT0_Y_PITCH; -#else - const int x = (xy * XY_BLOCK_SIZE + lid) % OUTPUT_SIZE_X; - const int y = (xy * XY_BLOCK_SIZE + lid) / OUTPUT_SIZE_X; - const uint input_offset = INPUT0_OFFSET - + b*INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH - + xy*XY_BLOCK_SIZE; -#endif - typedef MAKE_VECTOR_TYPE(FILTER_TYPE, 2) wei_t; - -#if BINARY_PACKED_OUTPUT - const uint dst_index = OUTPUT_OFFSET - + b*OUTPUT_FEATURE_NUM_PACKED*OUTPUT_FEATURE_PITCH - + f_block*OUTPUT_FEATURE_PITCH; -#else - const uint dst_index = OUTPUT_OFFSET - + b*OUTPUT_BATCH_PITCH - + f_block*OC_BLOCK_SIZE*OUTPUT_FEATURE_PITCH; -#endif - const uint filter_offset = f_block*OC_BLOCK_SIZE*INPUT0_FEATURE_NUM_PACKED; - - int dst_buf[OC_BLOCK_SIZE] = { 0 }; // 32 OC - - for (int k = 0; k < INPUT0_FEATURE_NUM_PACKED; ++k) - { - // Load 16 input elements from feature map by subgroup -#if PADDED_INPUT - INPUT0_TYPE src = input[input_offset + k*INPUT0_FEATURE_PITCH + x]; -#else - INPUT0_TYPE src = ALIGNED_BLOCK_READ(input, input_offset + k*INPUT0_FEATURE_PITCH); -#endif - - // Load 32 OC x 32 ICP. Each WI has lid-th and (lid+16)-th channels - wei_t wei = ALIGNED_BLOCK_READ2(weights, filter_offset + k * OC_BLOCK_SIZE); - - // Shuffle 32 OC x 32 ICP of weights in each WI - const wei_t wei0 = GET_WEI(wei, 0); - const wei_t wei1 = GET_WEI(wei, 1); - const wei_t wei2 = GET_WEI(wei, 2); - const wei_t wei3 = GET_WEI(wei, 3); - const wei_t wei4 = GET_WEI(wei, 4); - const wei_t wei5 = GET_WEI(wei, 5); - const wei_t wei6 = GET_WEI(wei, 6); - const wei_t wei7 = GET_WEI(wei, 7); - const wei_t wei8 = GET_WEI(wei, 8); - const wei_t wei9 = GET_WEI(wei, 9); - const wei_t wei10 = GET_WEI(wei, 10); - const wei_t wei11 = GET_WEI(wei, 11); - const wei_t wei12 = GET_WEI(wei, 12); - const wei_t wei13 = GET_WEI(wei, 13); - const wei_t wei14 = GET_WEI(wei, 14); - const wei_t wei15 = GET_WEI(wei, 15); - -#if LEFTOVERS_IC - if (k == INPUT0_FEATURE_NUM_PACKED - 1) - { - dst_buf[0] += popcount((wei0.s0 ^ src) & FILTER_MASK); - dst_buf[1] += popcount((wei1.s0 ^ src) & FILTER_MASK); - dst_buf[2] += popcount((wei2.s0 ^ src) & FILTER_MASK); - dst_buf[3] += popcount((wei3.s0 ^ src) & FILTER_MASK); - dst_buf[4] += popcount((wei4.s0 ^ src) & FILTER_MASK); - dst_buf[5] += popcount((wei5.s0 ^ src) & FILTER_MASK); - dst_buf[6] += popcount((wei6.s0 ^ src) & FILTER_MASK); - dst_buf[7] += popcount((wei7.s0 ^ src) & FILTER_MASK); - dst_buf[8] += popcount((wei8.s0 ^ src) & FILTER_MASK); - dst_buf[9] += popcount((wei9.s0 ^ src) & FILTER_MASK); - dst_buf[10] += popcount((wei10.s0 ^ src) & FILTER_MASK); - dst_buf[11] += popcount((wei11.s0 ^ src) & FILTER_MASK); - dst_buf[12] += popcount((wei12.s0 ^ src) & FILTER_MASK); - dst_buf[13] += popcount((wei13.s0 ^ src) & FILTER_MASK); - dst_buf[14] += popcount((wei14.s0 ^ src) & FILTER_MASK); - dst_buf[15] += popcount((wei15.s0 ^ src) & FILTER_MASK); - -#if OUTPUT_FEATURE_NUM > 16 - dst_buf[16] += popcount((wei0.s1 ^ src) & FILTER_MASK); - dst_buf[17] += popcount((wei1.s1 ^ src) & FILTER_MASK); - dst_buf[18] += popcount((wei2.s1 ^ src) & FILTER_MASK); - dst_buf[19] += popcount((wei3.s1 ^ src) & FILTER_MASK); - dst_buf[20] += popcount((wei4.s1 ^ src) & FILTER_MASK); - dst_buf[21] += popcount((wei5.s1 ^ src) & FILTER_MASK); - dst_buf[22] += popcount((wei6.s1 ^ src) & FILTER_MASK); - dst_buf[23] += popcount((wei7.s1 ^ src) & FILTER_MASK); - dst_buf[24] += popcount((wei8.s1 ^ src) & FILTER_MASK); - dst_buf[25] += popcount((wei9.s1 ^ src) & FILTER_MASK); - dst_buf[26] += popcount((wei10.s1 ^ src) & FILTER_MASK); - dst_buf[27] += popcount((wei11.s1 ^ src) & FILTER_MASK); - dst_buf[28] += popcount((wei12.s1 ^ src) & FILTER_MASK); - dst_buf[29] += popcount((wei13.s1 ^ src) & FILTER_MASK); - dst_buf[30] += popcount((wei14.s1 ^ src) & FILTER_MASK); - dst_buf[31] += popcount((wei15.s1 ^ src) & FILTER_MASK); -#endif - break; - } -#endif - dst_buf[0] += popcount(wei0.s0 ^ src); - dst_buf[1] += popcount(wei1.s0 ^ src); - dst_buf[2] += popcount(wei2.s0 ^ src); - dst_buf[3] += popcount(wei3.s0 ^ src); - dst_buf[4] += popcount(wei4.s0 ^ src); - dst_buf[5] += popcount(wei5.s0 ^ src); - dst_buf[6] += popcount(wei6.s0 ^ src); - dst_buf[7] += popcount(wei7.s0 ^ src); - dst_buf[8] += popcount(wei8.s0 ^ src); - dst_buf[9] += popcount(wei9.s0 ^ src); - dst_buf[10] += popcount(wei10.s0 ^ src); - dst_buf[11] += popcount(wei11.s0 ^ src); - dst_buf[12] += popcount(wei12.s0 ^ src); - dst_buf[13] += popcount(wei13.s0 ^ src); - dst_buf[14] += popcount(wei14.s0 ^ src); - dst_buf[15] += popcount(wei15.s0 ^ src); - -#if OUTPUT_FEATURE_NUM > 16 - dst_buf[16] += popcount(wei0.s1 ^ src); - dst_buf[17] += popcount(wei1.s1 ^ src); - dst_buf[18] += popcount(wei2.s1 ^ src); - dst_buf[19] += popcount(wei3.s1 ^ src); - dst_buf[20] += popcount(wei4.s1 ^ src); - dst_buf[21] += popcount(wei5.s1 ^ src); - dst_buf[22] += popcount(wei6.s1 ^ src); - dst_buf[23] += popcount(wei7.s1 ^ src); - dst_buf[24] += popcount(wei8.s1 ^ src); - dst_buf[25] += popcount(wei9.s1 ^ src); - dst_buf[26] += popcount(wei10.s1 ^ src); - dst_buf[27] += popcount(wei11.s1 ^ src); - dst_buf[28] += popcount(wei12.s1 ^ src); - dst_buf[29] += popcount(wei13.s1 ^ src); - dst_buf[30] += popcount(wei14.s1 ^ src); - dst_buf[31] += popcount(wei15.s1 ^ src); -#endif - } - - // Load data for fused operations (scales, biases, quantization thresholds, etc) -#if CUSTOM_FUSED_OPS - FUSED_OPS_PREPARE_DATA; -#endif - - UNIT_TYPE dst[OC_BLOCK_SIZE]; - for (int oc = 0; oc < OC_BLOCK_SIZE; oc++) - { - CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM - 2*dst_buf[oc]); -#if CUSTOM_FUSED_OPS - DO_ELTWISE_FUSED_OPS; -// Don't save floating-point intermediate result, since packed one is already computed -#if !BINARY_PACKED_OUTPUT - dst[oc] = res; -#endif -#elif HAS_FUSED_OPS - FUSED_OPS; - dst[oc] = FUSED_OPS_RESULT; -#endif - - } - - bool in_x = x < OUTPUT_SIZE_X; - bool in_y = y < OUTPUT_SIZE_Y; -#if BINARY_PACKED_OUTPUT - -#if PADDED_OUTPUT - if (in_x && in_y) - output[dst_index + y*OUTPUT_Y_PITCH + x] = TO_OUTPUT_TYPE(packed_res); -#else - if (xy * XY_BLOCK_SIZE < OUTPUT_SIZE_X*OUTPUT_SIZE_Y) - ALIGNED_BLOCK_WRITE(output, dst_index + xy*XY_BLOCK_SIZE, TO_OUTPUT_TYPE(packed_res)); - else if (in_x && in_y) - output[dst_index + y*OUTPUT_Y_PITCH + x] = TO_OUTPUT_TYPE(packed_res); - -#endif - -#else - - for (int oc = 0; oc < OC_BLOCK_SIZE; oc++) - { - bool in_fm = f_block*OC_BLOCK_SIZE + oc < OUTPUT_FEATURE_NUM; - if (in_x && in_y && in_fm) - output[dst_index + oc*OUTPUT_FEATURE_PITCH + y*OUTPUT_Y_PITCH + x] = TO_OUTPUT_TYPE(dst[oc]); - } - -#endif - -} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1_b_fs_yx_fsv16.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1_b_fs_yx_fsv16.cl deleted file mode 100644 index 32586d3134a56b..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_1x1_b_fs_yx_fsv16.cl +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "include/batch_headers/sub_group_block_read.cl" -#include "include/batch_headers/sub_group_block_write.cl" -#include "include/batch_headers/sub_group_shuffle.cl" -#include "include/batch_headers/fetch_data.cl" -#include "include/unit_type.cl" - -#define OC_BLOCK_SIZE 16 - -#define GET_SRC(data, id) _sub_group_shuffle(data, id) -#define ALIGNED_BLOCK_READ(ptr, byte_offset) as_uint(_sub_group_block_read((const __global uint*)(ptr) + (byte_offset))) -#define ALIGNED_BLOCK_READ2(ptr, byte_offset) as_uint2(_sub_group_block_read2((const __global uint*)(ptr) + (byte_offset))) - -REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE) -__attribute__((reqd_work_group_size(SUB_GROUP_SIZE, 1, 1))) -KERNEL(binary_convolution_1x1_b_fs_yx_fsv16)(const __global INPUT0_TYPE* input, - __global OUTPUT_TYPE* output, - const __global FILTER_TYPE* weights -#if HAS_FUSED_OPS_DECLS - , FUSED_OPS_DECLS -#endif -) -{ - const int xy = get_group_id(0); - const int f_block = get_global_id(1); - const int b = get_global_id(2); - const int lid = get_sub_group_local_id(); -#if PADDED_INPUT - const int x = (xy * XY_BLOCK_SIZE + lid) % OUTPUT_SIZE_X; - const int y = (xy * XY_BLOCK_SIZE + lid) / OUTPUT_SIZE_X; - const uint input_offset = INPUT0_OFFSET - + b*INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH - + y*INPUT0_Y_PITCH; -#else - const int x = (xy * XY_BLOCK_SIZE + lid) % OUTPUT_SIZE_X; - const int y = (xy * XY_BLOCK_SIZE + lid) / OUTPUT_SIZE_X; - const uint input_offset = INPUT0_OFFSET - + b*INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH - + xy*XY_BLOCK_SIZE; -#endif - const uint output_x_pitch = OC_BLOCK_SIZE; - const uint output_y_pitch = output_x_pitch * (OUTPUT_PAD_BEFORE_SIZE_X + OUTPUT_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X); - const uint output_total_f_size = OUTPUT_PAD_BEFORE_FEATURE_NUM + OUTPUT_FEATURE_NUM + OUTPUT_PAD_AFTER_FEATURE_NUM; - const uint output_fs_pitch = output_y_pitch * (OUTPUT_PAD_BEFORE_SIZE_Y + OUTPUT_SIZE_Y + OUTPUT_PAD_AFTER_SIZE_Y); - const uint output_b_pitch = output_fs_pitch * ((output_total_f_size + OC_BLOCK_SIZE - 1) / OC_BLOCK_SIZE); - const uint dst_index = OUTPUT_OFFSET*OC_BLOCK_SIZE - + b*output_b_pitch - + f_block*output_fs_pitch; - - const uint filter_offset = ((f_block/2)*2)*OC_BLOCK_SIZE*INPUT0_FEATURE_NUM_PACKED + (f_block%2)*16; - - int dst_buf[OC_BLOCK_SIZE] = { 0 }; // 16 X - - for (int k = 0; k < INPUT0_FEATURE_NUM_PACKED; ++k) - { - // Load 16 input elements from feature map by subgroup -#if PADDED_INPUT - INPUT0_TYPE src = input[input_offset + k*INPUT0_FEATURE_PITCH + x]; -#else - INPUT0_TYPE src = ALIGNED_BLOCK_READ(input, input_offset + k*INPUT0_FEATURE_PITCH); -#endif - - // Load 32 OC x 32 ICP. Each WI has lid-th and (lid+16)-th channels - FILTER_TYPE wei = ALIGNED_BLOCK_READ(weights, filter_offset + k * OC_BLOCK_SIZE*2); - - // Shuffle 2 OC x 32 ICP x 16 X of src - const INPUT0_TYPE src0 = GET_SRC(src, 0); - const INPUT0_TYPE src1 = GET_SRC(src, 1); - const INPUT0_TYPE src2 = GET_SRC(src, 2); - const INPUT0_TYPE src3 = GET_SRC(src, 3); - const INPUT0_TYPE src4 = GET_SRC(src, 4); - const INPUT0_TYPE src5 = GET_SRC(src, 5); - const INPUT0_TYPE src6 = GET_SRC(src, 6); - const INPUT0_TYPE src7 = GET_SRC(src, 7); - const INPUT0_TYPE src8 = GET_SRC(src, 8); - const INPUT0_TYPE src9 = GET_SRC(src, 9); - const INPUT0_TYPE src10 = GET_SRC(src, 10); - const INPUT0_TYPE src11 = GET_SRC(src, 11); - const INPUT0_TYPE src12 = GET_SRC(src, 12); - const INPUT0_TYPE src13 = GET_SRC(src, 13); - const INPUT0_TYPE src14 = GET_SRC(src, 14); - const INPUT0_TYPE src15 = GET_SRC(src, 15); - -#if LEFTOVERS_IC - if (k == INPUT0_FEATURE_NUM_PACKED - 1) - { - dst_buf[0] += popcount((wei ^ src0) & FILTER_MASK); - dst_buf[1] += popcount((wei ^ src1) & FILTER_MASK); - dst_buf[2] += popcount((wei ^ src2) & FILTER_MASK); - dst_buf[3] += popcount((wei ^ src3) & FILTER_MASK); - dst_buf[4] += popcount((wei ^ src4) & FILTER_MASK); - dst_buf[5] += popcount((wei ^ src5) & FILTER_MASK); - dst_buf[6] += popcount((wei ^ src6) & FILTER_MASK); - dst_buf[7] += popcount((wei ^ src7) & FILTER_MASK); - dst_buf[8] += popcount((wei ^ src8) & FILTER_MASK); - dst_buf[9] += popcount((wei ^ src9) & FILTER_MASK); - dst_buf[10] += popcount((wei ^ src10) & FILTER_MASK); - dst_buf[11] += popcount((wei ^ src11) & FILTER_MASK); - dst_buf[12] += popcount((wei ^ src12) & FILTER_MASK); - dst_buf[13] += popcount((wei ^ src13) & FILTER_MASK); - dst_buf[14] += popcount((wei ^ src14) & FILTER_MASK); - dst_buf[15] += popcount((wei ^ src15) & FILTER_MASK); - break; - } -#endif - dst_buf[0] += popcount(wei ^ src0); - dst_buf[1] += popcount(wei ^ src1); - dst_buf[2] += popcount(wei ^ src2); - dst_buf[3] += popcount(wei ^ src3); - dst_buf[4] += popcount(wei ^ src4); - dst_buf[5] += popcount(wei ^ src5); - dst_buf[6] += popcount(wei ^ src6); - dst_buf[7] += popcount(wei ^ src7); - dst_buf[8] += popcount(wei ^ src8); - dst_buf[9] += popcount(wei ^ src9); - dst_buf[10] += popcount(wei ^ src10); - dst_buf[11] += popcount(wei ^ src11); - dst_buf[12] += popcount(wei ^ src12); - dst_buf[13] += popcount(wei ^ src13); - dst_buf[14] += popcount(wei ^ src14); - dst_buf[15] += popcount(wei ^ src15); - } - - // Load data for fused operations (scales, biases, quantization thresholds, etc) -#if CUSTOM_FUSED_OPS - FUSED_OPS_PREPARE_DATA; -#endif - - OUTPUT_TYPE dst[OC_BLOCK_SIZE]; - __attribute__((opencl_unroll_hint(OC_BLOCK_SIZE))) - for (int oc = 0; oc < OC_BLOCK_SIZE; oc++) - { - CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM - 2*dst_buf[oc]); -#if CUSTOM_FUSED_OPS - DO_ELTWISE_FUSED_OPS; - dst[oc] = res; -#elif HAS_FUSED_OPS - FUSED_OPS; - dst[oc] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT); -#endif - } - -#if LEFTOVERS_OC - bool in_fm = f_block*OC_BLOCK_SIZE + lid < OUTPUT_FEATURE_NUM; - __attribute__((opencl_unroll_hint(SUB_GROUP_SIZE))) - for (int ox = 0; ox < SUB_GROUP_SIZE; ox++) { - int xi = (xy * XY_BLOCK_SIZE+ox) % OUTPUT_SIZE_X; - int yi = (xy * XY_BLOCK_SIZE+ox) / OUTPUT_SIZE_X; - bool in_x = xi < OUTPUT_SIZE_X; - bool in_y = yi < OUTPUT_SIZE_Y; - if (in_x && in_y && in_fm) { - output[dst_index + yi*output_y_pitch + xi*output_x_pitch + lid] = dst[ox]; - } - } -#else - for (int ox = 0; ox < SUB_GROUP_SIZE; ox++) { - int xi = (xy * XY_BLOCK_SIZE+ox) % OUTPUT_SIZE_X; - int yi = (xy * XY_BLOCK_SIZE+ox) / OUTPUT_SIZE_X; - bool in_x = xi < OUTPUT_SIZE_X; - bool in_y = yi < OUTPUT_SIZE_Y; - if (in_x && in_y) - UNIT_BLOCK_WRITE(output, dst_index + yi*output_y_pitch + xi*output_x_pitch, dst[ox]); - } -#endif -} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_generic.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_generic.cl deleted file mode 100644 index 9536247e721e8d..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_generic.cl +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "include/batch_headers/sub_group_block_read.cl" -#include "include/batch_headers/sub_group_shuffle.cl" -#include "include/batch_headers/fetch_data.cl" - -#define OC_BLOCK_SIZE 32 - -#define ALIGNED_BLOCK_READ(ptr, byte_offset) as_uint(_sub_group_block_read((const __global uint*)(ptr) + (byte_offset))) -#define ALIGNED_BLOCK_READ2(ptr, byte_offset) as_uint2(_sub_group_block_read2((const __global uint*)(ptr) + (byte_offset))) - -#if BINARY_PACKED_OUTPUT - #define BUFFER_TYPE UNIT_TYPE -#else - #define BUFFER_TYPE OUTPUT_TYPE -#endif - -REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE) -__attribute__((reqd_work_group_size(SUB_GROUP_SIZE, 1, 1))) -KERNEL(binary_convolution_generic)(const __global INPUT0_TYPE* input, - __global OUTPUT_TYPE* output, - const __global FILTER_TYPE* weights -#if HAS_FUSED_OPS_DECLS - , FUSED_OPS_DECLS -#endif -) -{ - const int f_block = get_global_id(1); - const int lid = get_sub_group_local_id(); - const int b = get_global_id(2); - - const int xy = get_group_id(0); - const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE; - const int y = (xy / X_BLOCKS); - - const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X; - const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y; - - const uint input_offset = INPUT0_OFFSET - + b*INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH - + input_y*INPUT0_Y_PITCH - + input_x*INPUT0_X_PITCH; - - typedef MAKE_VECTOR_TYPE(FILTER_TYPE, 2) data_t; - -#if BINARY_PACKED_OUTPUT - const uint dst_index = OUTPUT_OFFSET - + b*OUTPUT_FEATURE_NUM_PACKED*OUTPUT_FEATURE_PITCH - + f_block*OUTPUT_FEATURE_PITCH - + y*OUTPUT_Y_PITCH - + x; -#else - const uint dst_index = OUTPUT_OFFSET - + b*OUTPUT_BATCH_PITCH - + f_block*OC_BLOCK_SIZE*OUTPUT_FEATURE_PITCH - + y*OUTPUT_Y_PITCH - + x; -#endif - const uint filter_offset = f_block*OC_BLOCK_SIZE*INPUT0_FEATURE_NUM_PACKED*FILTER_SIZE_Y*FILTER_SIZE_X; - - int dst_buf[SUB_GROUP_SIZE*2] = { 0 }; // 2 OC x 16 X - -#if EXCLUDE_PAD - int real_ks = 0; - // calc real kernel size for out_x = x+lid - for (int kh = 0; kh < FILTER_SIZE_Y; kh++) - { - for (int kw = 0; kw < FILTER_SIZE_X; kw++) - { - real_ks += ((input_x + kw + lid*STRIDE_SIZE_X >= 0) && - (input_x + kw + lid*STRIDE_SIZE_X < INPUT0_SIZE_X) && - (input_y + kh >= 0) && - (input_y + kh < INPUT0_SIZE_Y)) ? 1 : 0; - } - } -#endif - - for (int k = 0; k < INPUT0_FEATURE_NUM_PACKED; ++k) - { - for (int kh = 0; kh < FILTER_SIZE_Y; kh++) - { - INPUT0_TYPE line_cache[INPUT_ELEMENTS_PER_WI]; - for (int i = 0; i < INPUT_ELEMENTS_PER_WI; i++) - { - line_cache[i] = PAD_VALUE; - } - - if (input_y + kh >= 0 && input_y + kh < INPUT0_SIZE_Y) - { - for (int i = 0; i < INPUT_ELEMENTS_PER_WI; i++) - { - if (input_x + i*SUB_GROUP_SIZE >= 0 && input_x + (i+1)*SUB_GROUP_SIZE < INPUT0_SIZE_X) - line_cache[i] = ALIGNED_BLOCK_READ(input, input_offset + kh*INPUT0_Y_PITCH + k*INPUT0_FEATURE_PITCH + i*SUB_GROUP_SIZE); - else if (input_x + i*SUB_GROUP_SIZE + lid >= 0 && input_x + i*SUB_GROUP_SIZE + lid < INPUT0_SIZE_X) - line_cache[i] = input[input_offset + kh*INPUT0_Y_PITCH + k*INPUT0_FEATURE_PITCH + i*SUB_GROUP_SIZE + lid]; - } - } - - __attribute__((opencl_unroll_hint(FILTER_SIZE_X))) - for (int kw = 0; kw < FILTER_SIZE_X; kw++) - { - // Load 32 OC x 32 ICP. Each WI has lid-th and (lid+16)-th channels - data_t wei = ALIGNED_BLOCK_READ2(weights, filter_offset + OC_BLOCK_SIZE*(k*FILTER_SIZE_Y*FILTER_SIZE_X + kh*FILTER_SIZE_X + kw)); - - // Single WI in subgroup calcs 2 OC x 16 X elements - __attribute__((opencl_unroll_hint(SUB_GROUP_SIZE))) - for (int i = 0; i < SUB_GROUP_SIZE; i++) - { - INPUT0_TYPE src = _sub_group_shuffle(line_cache[(kw + i*STRIDE_SIZE_X) / SUB_GROUP_SIZE], - (kw + i*STRIDE_SIZE_X) % SUB_GROUP_SIZE); -#if EXCLUDE_PAD - int compute = ((input_x + kw + i*STRIDE_SIZE_X >= 0) && - (input_x + kw + i*STRIDE_SIZE_X < INPUT0_SIZE_X) && - (input_y + kh >= 0) && - (input_y + kh < INPUT0_SIZE_Y)) ? 1 : 0; - - if (!compute) - continue; -#endif - -#if LEFTOVERS_IC - if (k == INPUT0_FEATURE_NUM_PACKED - 1) - { - dst_buf[0*SUB_GROUP_SIZE + i] += popcount((wei.s0 ^ src) & FILTER_MASK); - dst_buf[1*SUB_GROUP_SIZE + i] += popcount((wei.s1 ^ src) & FILTER_MASK); - continue; - } -#endif - - dst_buf[0*SUB_GROUP_SIZE + i] += popcount(wei.s0 ^ src); - dst_buf[1*SUB_GROUP_SIZE + i] += popcount(wei.s1 ^ src); - } - } - } - } - -#if EXCLUDE_PAD - -#endif - // Load data for fused operations (scales, biases, quantization thresholds, etc) -#if CUSTOM_FUSED_OPS - FUSED_OPS_PREPARE_DATA; -#endif - - BUFFER_TYPE dst[SUB_GROUP_SIZE*2]; - - __attribute__((opencl_unroll_hint(SUB_GROUP_SIZE*2))) - for (int i = 0; i < SUB_GROUP_SIZE*2; i++) - { -#if EXCLUDE_PAD - CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM*_sub_group_shuffle(real_ks, i%SUB_GROUP_SIZE) - 2*dst_buf[i]); -#else - CONV_RESULT_TYPE res = TO_CONV_RESULT_TYPE(INPUT0_FEATURE_NUM*FILTER_SIZE_Y*FILTER_SIZE_X - 2*dst_buf[i]); -#endif - -#if CUSTOM_FUSED_OPS - DO_ELTWISE_FUSED_OPS; - dst[i] = res; -#elif HAS_FUSED_OPS - FUSED_OPS; - dst[i] = FUSED_OPS_RESULT; -#else - dst[i] = res; -#endif - - } - -#if BINARY_PACKED_OUTPUT - int packed_out[SUB_GROUP_SIZE]; - -#if CUSTOM_FUSED_OPS - DO_CHANNEL_PACK_OPS; -#else - #error "BINARY_PACKED_OUTPUT should be true only if node has fused quantize with bin output" -#endif - - bool in_x = (x + lid) < OUTPUT_SIZE_X; - bool in_y = y < OUTPUT_SIZE_Y; - if (in_x && in_y) - output[dst_index + lid] = packed_out[lid]; - -#else - - for (int oc = 0; oc < 2; oc++) - { - for (int ow = 0; ow < SUB_GROUP_SIZE; ow++) - { - bool in_x = (x + ow) < OUTPUT_SIZE_X; - bool in_y = y < OUTPUT_SIZE_Y; - bool in_fm = f_block*OC_BLOCK_SIZE + oc*SUB_GROUP_SIZE + lid < OUTPUT_FEATURE_NUM; - if (in_x && in_y && in_fm) - { - output[dst_index + (oc*SUB_GROUP_SIZE + lid)*OUTPUT_FEATURE_PITCH + ow] = TO_OUTPUT_TYPE(dst[oc*SUB_GROUP_SIZE + ow]); - } - } - } - -#endif -} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_ref.cl deleted file mode 100644 index d7d2c7363a9f4d..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/binary_convolution_gpu_ref.cl +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "include/batch_headers/fetch_data.cl" - -KERNEL(binary_convolution_ref)(const __global INPUT0_TYPE* input, - __global OUTPUT_TYPE* output, - const __global FILTER_TYPE* weights -#if HAS_FUSED_OPS_DECLS - , FUSED_OPS_DECLS -#endif -) -{ - const int b = get_global_id(0); - const int f = get_global_id(1); - const int yx = get_global_id(2); - const int y = yx / OUTPUT_SIZE_X; - const int x = yx % OUTPUT_SIZE_X; - - const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X; - const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y; - - const int output_index = OUTPUT_OFFSET - + b * OUTPUT_BATCH_PITCH - + f * OUTPUT_FEATURE_PITCH - + y * OUTPUT_Y_PITCH - + x * OUTPUT_X_PITCH; - - const int input_index = INPUT0_OFFSET - + b * INPUT0_FEATURE_NUM_PACKED*INPUT0_FEATURE_PITCH; - - const int weights_index = (f / OFM_BLOCK_SIZE) * INPUT0_FEATURE_NUM_PACKED*FILTER_SIZE_X*FILTER_SIZE_Y*OFM_BLOCK_SIZE - + (f % OFM_BLOCK_SIZE); -#if EXCLUDE_PAD - int ks = 0; -#endif - int res_popcnt = 0; - for (int icp = 0; icp < INPUT0_FEATURE_NUM_PACKED; icp++) - { - for (int kh = 0; kh < FILTER_SIZE_Y; kh++) - { - const int input_offset_y = input_y + kh * DILATION_SIZE_Y; - const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0; - - for (int kw = 0; kw < FILTER_SIZE_X; kw++) - { - const int input_offset_x = input_x + kw * DILATION_SIZE_X; - const bool zero_x = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0; - FILTER_TYPE wei = weights[weights_index + icp*OFM_BLOCK_SIZE*FILTER_SIZE_X*FILTER_SIZE_Y + - kh*FILTER_SIZE_X*OFM_BLOCK_SIZE + kw*OFM_BLOCK_SIZE]; -#if EXCLUDE_PAD - if (!zero_y && !zero_x) - { - INPUT0_TYPE src = input[input_index + - icp * INPUT0_FEATURE_PITCH + - input_offset_y*INPUT0_Y_PITCH + - input_offset_x*INPUT0_X_PITCH]; // 32 packed input channels - -#if LEFTOVERS - if (icp == INPUT0_FEATURE_NUM_PACKED - 1) - res_popcnt += popcount((src ^ wei) & LEFTOVERS_MASK); - else -#endif - res_popcnt += popcount(src ^ wei); - if (icp == 0) - ks++; - } -#else - if (zero_y || zero_x) - { -#if LEFTOVERS - if (icp == INPUT0_FEATURE_NUM_PACKED - 1) - res_popcnt += popcount((PAD_VALUE ^ wei) & LEFTOVERS_MASK); - else -#endif - res_popcnt += popcount(PAD_VALUE ^ wei); - } - else - { - INPUT0_TYPE src = input[input_index + - icp * INPUT0_FEATURE_PITCH + - input_offset_y*INPUT0_Y_PITCH + - input_offset_x*INPUT0_X_PITCH]; // 32 packed input channels -#if LEFTOVERS - if (icp == INPUT0_FEATURE_NUM_PACKED - 1) - res_popcnt += popcount((src ^ wei) & LEFTOVERS_MASK); - else -#endif - res_popcnt += popcount(src ^ wei); - - } -#endif - } - } - } - - -#if EXCLUDE_PAD - UNIT_TYPE res = TO_OUTPUT_TYPE(INPUT0_FEATURE_NUM*ks - 2*res_popcnt); -#else - UNIT_TYPE res = TO_OUTPUT_TYPE(INPUT0_FEATURE_NUM*FILTER_SIZE_X*FILTER_SIZE_Y - 2*res_popcnt); -#endif - -#if HAS_FUSED_OPS - FUSED_OPS; - res = FUSED_OPS_RESULT; -#endif - - output[output_index] = res; -} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/quantize_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/quantize_gpu_ref.cl index 8941cf83fb3ad8..7e89faba03ad8a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/quantize_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/quantize_gpu_ref.cl @@ -52,42 +52,6 @@ KERNEL(quantize_ref)( const int v = ((vuwzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) / OUTPUT_SIZE_Z / OUTPUT_SIZE_W / OUTPUT_SIZE_U; #endif -#if PACKED_BINARY_OUTPUT - const int output_offset = OUTPUT_OFFSET - + b*OUTPUT_FEATURE_NUM_PACKED*OUTPUT_FEATURE_PITCH - + of*OUTPUT_FEATURE_PITCH - + y*OUTPUT_Y_PITCH - + x*OUTPUT_X_PITCH; - - const int threshold_offset = INPUT1_OFFSET - + (b % INPUT1_BATCH_NUM)*INPUT1_BATCH_PITCH - + (y % INPUT1_SIZE_Y)*INPUT1_Y_PITCH - + (x % INPUT1_SIZE_X)*INPUT1_X_PITCH; - - OUTPUT_TYPE res = 0x00000000; -#if SINGLE_OUT_VAL - int high_bit = output_high[0] == UNIT_VAL_ONE ? 1 : 0; - int low_bit = output_low[0] == UNIT_VAL_ONE ? 1 : 0; -#endif - int limit = min((int)OC_BLOCK_SIZE, (int)INPUT0_FEATURE_NUM); - for (int f = 0; f < limit; f++) - { - UNIT_TYPE val = input[INPUT0_GET_INDEX(b, of*OC_BLOCK_SIZE + f, y, x)]; - UNIT_TYPE threshold = input_low[threshold_offset + ((of*OC_BLOCK_SIZE + f) % INPUT1_FEATURE_NUM)*INPUT1_FEATURE_PITCH]; -#if PER_CHANNEL_OUT_VAL - int high_bit = output_high[of*OC_BLOCK_SIZE + f] == UNIT_VAL_ONE ? 1 : 0; - int low_bit = output_low[of*OC_BLOCK_SIZE + f] == UNIT_VAL_ONE ? 1 : 0; -#endif - res |= (((val > threshold) ? high_bit : low_bit) << f); - } - - if (x >= OUTPUT_SIZE_X || y >= OUTPUT_SIZE_Y) - return; - - output[output_offset] = res; - -#else - #if INPUT0_DIMS == 8 const int input_offset = INPUT0_GET_INDEX(b, of, v, u, w, z, y, x); #elif INPUT0_DIMS == 7 @@ -195,6 +159,4 @@ KERNEL(quantize_ref)( * (UNIT_VAL_ONE / (LEVELS-1) * (output_high_val - output_low_val)) + output_low_val)); #endif } - -#endif } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_binary.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_binary.cl deleted file mode 100644 index 6b9cb200cbe8a1..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data_binary.cl +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "include/reshape_dims.cl" -#include "include/batch_headers/fetch_data.cl" - - -#if !INPUT0_LAYOUT_BFYX && !INPUT0_LAYOUT_B_FS_YX_32FP -#error "Data binary reorder: unsupported input layout" -#endif - -#if !OUTPUT_LAYOUT_BFYX && !OUTPUT_LAYOUT_B_FS_YX_32FP -#error "Data binary reorder: unsupported output layout" -#endif - -#ifdef MEAN_SUBTRACT_IN_BUFFER -#error "Mean subtruction is not supported in binary reorder" -#endif - - -KERNEL (reorder_data_binary)(const __global INPUT_REORDER_TYPE* input, - __global OUTPUT_REORDER_TYPE* output) -{ - const uint b = get_global_id(0); - const uint f = get_global_id(1); - const uint y = ((uint)(get_global_id(2))) / INPUT0_SIZE_X; - const uint x = ((uint)(get_global_id(2))) % INPUT0_SIZE_X; - - -#if BINARY_INPUT && BINARY_OUTPUT - int input_index = INPUT0_OFFSET - + b * INPUT_PACKED_FEATURES_NUM * INPUT0_FEATURE_PITCH - + f * INPUT0_FEATURE_PITCH - + y * INPUT0_Y_PITCH - + x * INPUT0_X_PITCH; - int output_index = OUTPUT_OFFSET - + b * OUTPUT_PACKED_FEATURES_NUM * OUTPUT_FEATURE_PITCH - + f * OUTPUT_FEATURE_PITCH - + y * OUTPUT_Y_PITCH - + x * OUTPUT_X_PITCH; - - output[output_index] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(input[input_index]), NL_M, NL_N); -#elif BINARY_OUTPUT - int output_index = OUTPUT_OFFSET - + b * OUTPUT_PACKED_FEATURES_NUM * OUTPUT_FEATURE_PITCH - + f * OUTPUT_FEATURE_PITCH - + y * OUTPUT_Y_PITCH - + x * OUTPUT_X_PITCH; - - OUTPUT_TYPE res = 0x00000000; - int limit = min((int)IFM_PACK_SIZE, (int)(INPUT0_FEATURE_NUM - f*IFM_PACK_SIZE)); - for (int c = 0; c < limit; c++) - { - // index of required bit - int input_index = INPUT0_OFFSET - + b * INPUT0_BATCH_PITCH - + (f * IFM_PACK_SIZE + c) * INPUT0_FEATURE_PITCH - + y * INPUT0_Y_PITCH - + x * INPUT0_X_PITCH; - - int bit = input[input_index] > UNIT_VAL_ZERO ? 1 : 0; - res |= (bit << c); - } - output[output_index] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(res), NL_M, NL_N); -#elif BINARY_INPUT - int input_index = INPUT0_OFFSET - + b * INPUT_PACKED_FEATURES_NUM * INPUT0_FEATURE_PITCH - + f * INPUT0_FEATURE_PITCH - + y * INPUT0_Y_PITCH - + x * INPUT0_X_PITCH; - int res = input[input_index]; - int limit = min((int)IFM_PACK_SIZE, (int)(INPUT0_FEATURE_NUM - f*IFM_PACK_SIZE)); - for (int c = 0; c < limit; c++) - { - int output_index = OUTPUT_OFFSET - + b * OUTPUT_BATCH_PITCH - + (f*IFM_PACK_SIZE + c) * OUTPUT_FEATURE_PITCH - + y * OUTPUT_Y_PITCH - + x * OUTPUT_X_PITCH; - - int bit = (res >> c) & 0x00000001 > 0 ? 1 : -1; - output[output_index] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(bit), NL_M, NL_N); - } -#else -#error "Binary reorder is used without binary tensors" -#endif - -} diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_binary.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_binary.cl deleted file mode 100644 index 63115f2fdbc1c0..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights_binary.cl +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "include/reshape_dims.cl" - -#define OFM_BLOCK_SIZE 32 -#define IFM_PACK_SIZE 32 - -// packed binary oihw to packed binary os_is_yx_osv32_isv32p -KERNEL (reorder_weights_binary)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output) -{ - const unsigned o = get_global_id(0); - const unsigned i = get_global_id(1); - const unsigned y = (uint)get_global_id(2) / OUTPUT_SIZE_X; - const unsigned x = (uint)get_global_id(2) % OUTPUT_SIZE_X; - - int output_index = OUTPUT_OFFSET - + (o % OFM_BLOCK_SIZE) - + (o / OFM_BLOCK_SIZE) * ((OUTPUT_IFM_NUM + IFM_PACK_SIZE - 1) / IFM_PACK_SIZE) * OUTPUT_SIZE_Y * OUTPUT_SIZE_X * OFM_BLOCK_SIZE - + i * OFM_BLOCK_SIZE * OUTPUT_IFM_PITCH - + y * OFM_BLOCK_SIZE * OUTPUT_Y_PITCH - + x * OFM_BLOCK_SIZE * OUTPUT_X_PITCH; - - OUTPUT_TYPE res = 0x00000000; - int limit = min((int)IFM_PACK_SIZE, (int)(INPUT0_IFM_NUM - i*IFM_PACK_SIZE)); - for (int c = 0; c < limit; c++) - { - // index of required bit - int input_index = INPUT0_OFFSET - + o * INPUT0_OFM_PITCH - + (i * IFM_PACK_SIZE + c) * INPUT0_IFM_PITCH - + y * INPUT0_Y_PITCH - + x * INPUT0_X_PITCH; - - const int bit = input_index % IFM_PACK_SIZE; - const int element = input_index / IFM_PACK_SIZE; - res |= ((input[element] & (1 << bit)) >> bit) << c; - } - - output[output_index] = res; -} diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_tools.h b/src/plugins/intel_gpu/src/kernel_selector/common_tools.h index b9f3e69acf8c3e..e9673ca2e59c3f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_tools.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_tools.h @@ -23,7 +23,6 @@ inline uint32_t BytesPerElement(Datatype dt) { case Datatype::F32: case Datatype::INT32: case Datatype::UINT32: - case Datatype::BINARY: return 4; case Datatype::INT64: return 8; @@ -40,7 +39,6 @@ inline uint32_t BytesPerElement(WeightsType wt) { case WeightsType::F16: return 2; case WeightsType::F32: - case WeightsType::BINARY: case WeightsType::INT32: return 4; default: diff --git a/src/plugins/intel_gpu/src/kernel_selector/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common_types.h index 6d84aa65f378b8..e884cddefde0db 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h @@ -64,7 +64,6 @@ enum class KernelType { SLICE, STRIDED_SLICE, REVERSE_SEQUENCE, - BINARY_CONVOLUTION, QUANTIZE, LSTM_DYNAMIC_INPUT, LSTM_DYNAMIC_TIMELOOP, @@ -105,7 +104,6 @@ enum class KernelType { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// enum class Datatype { UNSUPPORTED, - BINARY, UINT4, INT4, INT8, @@ -124,7 +122,6 @@ enum class Datatype { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// enum class WeightsType { UNSUPPORTED, - BINARY, F16, F32, INT8, diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp index ac079ea448711c..9c94c9cc33e3de 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp @@ -113,8 +113,6 @@ namespace kernel_selector { std::string toCLType(WeightsType wType) { switch (wType) { - case WeightsType::BINARY: - return GetTypeName(); case WeightsType::INT4: case WeightsType::INT8: return GetTypeName(); @@ -134,8 +132,6 @@ std::string toCLType(WeightsType wType) { std::string toCLType(Datatype dType) { switch (dType) { - case Datatype::BINARY: - return GetTypeName(); case Datatype::INT8: return GetTypeName(); case Datatype::UINT8: @@ -1435,7 +1431,6 @@ JitConstants MakeTypeJitConstants(Datatype dataType, const std::string& macroNam is_fp = false; break; case Datatype::UINT32: - case Datatype::BINARY: type = "uint"; max_val = "UINT_MAX"; min_val = "0"; @@ -1539,8 +1534,6 @@ JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& ma return MakeTypeJitConstants(Datatype::INT4, macroName); case WeightsType::UINT4: return MakeTypeJitConstants(Datatype::UINT4, macroName); - case WeightsType::BINARY: - return MakeTypeJitConstants(Datatype::UINT32, macroName); case WeightsType::INT32: return MakeTypeJitConstants(Datatype::INT32, macroName); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp index 193d589daba44d..6f8e9f621c6959 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp @@ -103,7 +103,6 @@ std::string toString(DataLayout l) { case kernel_selector::DataLayout::bs_f_bsv8__af8: return "BS_F_BSV8__AF8"; case kernel_selector::DataLayout::bs_f_bsv16__af8: return "BS_F_BSV16__AF8"; case kernel_selector::DataLayout::winograd_2x3_s1_data: return "WINOGRAD_2x3_S1_DATA"; - case kernel_selector::DataLayout::b_fs_yx_32fp: return "B_FS_YX_32FP"; case kernel_selector::DataLayout::bfzyx: return "BFZYX"; case kernel_selector::DataLayout::bzyxf: return "BZYXF"; case kernel_selector::DataLayout::fs_b_yx_fsv32: return "FS_B_YX_FSV32"; @@ -141,7 +140,6 @@ std::string toString(DataLayout l) { std::string toString(Datatype dType) { switch (dType) { - case Datatype::BINARY: return "BINARY"; case Datatype::UINT4: return "UINT4"; case Datatype::INT4: return "INT4"; case Datatype::INT8: return "INT8"; @@ -159,7 +157,6 @@ std::string toString(Datatype dType) { std::string toString(WeightsType wType) { switch (wType) { - case WeightsType::BINARY: return "BINARY"; case WeightsType::F16: return "F16"; case WeightsType::F32: return "F32"; case WeightsType::UINT4: return "UINT4"; @@ -186,8 +183,6 @@ std::string toString(KernelType kt) { case KernelType::ELTWISE: return "ELTWISE"; case KernelType::REORDER: return "REORDER"; case KernelType::SELECT: return "SELECT"; - case KernelType::BINARY_CONVOLUTION: - return "BINARY_CONVOLUTION"; case KernelType::NON_MAX_SUPPRESSION: return "NON_MAX_SUPPRESSION"; case KernelType::MATRIX_NMS: return "MATRIX_NMS"; @@ -361,7 +356,6 @@ std::string toString(WeightsLayout layout) { case WeightsLayout::os_is_yx_osv32_isv4: return "OS_IS_YX_OSV32_ISV4"; case WeightsLayout::os_is_zyx_osv32_isv4: return "OS_IS_ZYX_OSV32_ISV4"; case WeightsLayout::os_is_y_x8_osv8_isv4_swizzled_by_4: return "OS_IS_Y_X8_OSV8_ISV4_SWIZZLED_BY_4"; - case WeightsLayout::os_is_yx_osv32_isv32p: return "OS_IS_YX_OSV32_ISV32P"; case WeightsLayout::oizyx: return "OIZYX"; case WeightsLayout::iozyx: return "IOZYX"; case WeightsLayout::os_is_zyx_isv16_osv16: return "OS_IS_ZYX_ISV16_OSV16"; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp index d7b3002bb6ffab..f458b560b3945e 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp @@ -80,9 +80,6 @@ void ParamsKey::EnableInputDataType(Datatype dt) { case Datatype::F32: key.inputType.val.F32 = 1; break; - case Datatype::BINARY: - key.inputType.val.binary = 1; - break; default: break; } @@ -125,9 +122,6 @@ void ParamsKey::EnableOutputDataType(Datatype dt) { case Datatype::F32: key.outputType.val.F32 = 1; break; - case Datatype::BINARY: - key.outputType.val.binary = 1; - break; default: break; } @@ -152,9 +146,6 @@ void ParamsKey::EnableInputWeightsType(WeightsType wt) { case WeightsType::UINT4: key.inputWeightsType.val.uint4 = 1; break; - case WeightsType::BINARY: - key.inputWeightsType.val.binary = 1; - break; case WeightsType::INT32: key.inputWeightsType.val.int32 = 1; default: @@ -181,9 +172,6 @@ void ParamsKey::EnableOutputWeightsType(WeightsType wt) { case WeightsType::UINT4: key.outputWeightsType.val.uint4 = 1; break; - case WeightsType::BINARY: - key.outputWeightsType.val.binary = 1; - break; case WeightsType::INT32: key.outputWeightsType.val.int32 = 1; default: diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h index 8fb50c1716633d..c8ba581f51c696 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h @@ -240,7 +240,6 @@ class ParamsKey { uint32_t cell : 1; } lstm_elt; struct quantize_t { - uint32_t packed_binary_output : 1; uint32_t scale_shift_opt : 1; } quantize; } dedicated; @@ -263,7 +262,6 @@ class ParamsKey { uint32_t int64 : 1; uint32_t F16 : 1; uint32_t F32 : 1; - uint32_t binary : 1; } val; uint32_t raw; } DataTypesKey; @@ -329,7 +327,6 @@ class ParamsKey { void EnableBilinearInterpolationPad() { key.restrict.val.dedicated.conv.bilinear_interpolation_pad = 1; } void EnableDeformableMask() { key.restrict.val.dedicated.conv.deformable_mask_enabled = 1; } - void EnableQuantizePackedBinaryOutput() { key.restrict.val.dedicated.quantize.packed_binary_output = 1; } void EnableQuantizeScaleShiftOpt() { key.restrict.val.dedicated.quantize.scale_shift_opt = 1; } void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; } @@ -606,8 +603,6 @@ struct dep_info { // - KernelBase::MakeFusedOpsDeclsJitConstants that creates arguments for kernel declaration and macro for all tensors used in // a fused op (requires FusedOpsConfiguration instance). // - fused_operation_desc contains a bunch of methods to generate variable/pointer names, type conversions, data loads -// If you need an example of custom code generation for fused ops, check BinaryConvolutionKernelGeneric::GetFusedPrimitivesJitConstants -// method in binary_convolution_kernel_generic.cpp. struct fused_operation_desc { std::shared_ptr op_params; int32_t dep_idx_start; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.cpp index bb3eb06e32b3b0..5530619fcf74fd 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_utils.cpp @@ -24,8 +24,6 @@ static WeightsType DataTypeToWeightsType(Datatype t) { return WeightsType::F16; case Datatype::F32: return WeightsType::F32; - case Datatype::BINARY: - return WeightsType::BINARY; case Datatype::INT32: return WeightsType::INT32; default: @@ -468,7 +466,6 @@ bool CheckInputsOutputNoPitchSameDims(const base_params& params) { {DataLayout::b_fs_yx_fsv8, {1, 8}}, {DataLayout::b_fs_zyx_fsv8, {1, 8}}, {DataLayout::fs_b_yx_fsv32, {1, 32}}, - {DataLayout::b_fs_yx_32fp, {1, 32}}, {DataLayout::bs_fs_yx_bsv32_fsv16, {32, 16}}, {DataLayout::bs_fs_zyx_bsv32_fsv16, {32, 16}}, {DataLayout::bs_fs_yx_bsv32_fsv32, {32, 32}}, diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.cpp deleted file mode 100644 index 12d9593d9a8ae9..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.cpp +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "binary_convolution_kernel_1x1.h" -#include -#include -#include - -namespace kernel_selector { - -static const int sub_group_size = 16; -static const int ic_pack_size = 32; -static const int xy_block_size = 16; - -ParamsKey BinaryConvolutionKernel1x1::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::BINARY); - k.EnableInputWeightsType(WeightsType::BINARY); - k.EnableOutputDataType(Datatype::F16); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::INT32); - k.EnableOutputDataType(Datatype::BINARY); - k.EnableInputLayout(DataLayout::b_fs_yx_32fp); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::b_fs_yx_32fp); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableNonBiasTerm(); - k.EnableBatching(); - k.EnableDifferentTypes(); - return k; -} - -DeviceFeaturesKey BinaryConvolutionKernel1x1::get_required_device_features_key(const Params& params, const optional_params& /*options*/) const { - DeviceFeaturesKey k; - k.requires_subgroup_shuffle(); - k.requires_blocked_read_write(); - - return k; -} - -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1::SetDefault(const binary_convolution_params& params, int) const { - DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); - - const auto& out = params.outputs[0]; - - auto x = out.X().v; - auto y = out.Y().v; - auto f = out.Feature().v; - auto b = out.Batch().v; - - dispatchData.gws[0] = Align(x * y, sub_group_size); - dispatchData.gws[1] = CeilDiv(f, 2 * sub_group_size); // 1 WI calcs 32 OC - dispatchData.gws[2] = b; - - dispatchData.lws[0] = sub_group_size; - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; - - return dispatchData; -} - -KernelsPriority BinaryConvolutionKernel1x1::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return FORCE_PRIORITY_1; -} - -bool BinaryConvolutionKernel1x1::Validate(const Params& p, const optional_params& o) const { - if (!BinaryConvolutionKernelBase::Validate(p, o) || !ConvolutionBinaryCheckInput(p, o)) - return false; - - const auto& params = static_cast(p); - - const auto& input = params.inputs[0]; - const auto& output = params.outputs[0]; - - const bool bOutputSizes = output.X().v != input.X().v || output.Y().v != input.Y().v; - const bool bFilterSize = params.filterSize.x != 1 || params.filterSize.y != 1; - const bool bStride = params.stride.x != 1 || params.stride.y != 1; - const bool bGroups = params.groups > 1; - - if (bOutputSizes || bFilterSize || bStride || bGroups) - return false; - - return true; -} - -JitConstants BinaryConvolutionKernel1x1::GetJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const { - auto jit = Parent::GetJitConstants(params, dispatchData); - - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); - jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size))); - jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_PACKED", CeilDiv(params.outputs[0].Feature().v, ic_pack_size))); - jit.AddConstant(MakeJitConstant("PADDED_INPUT", params.inputs[0].X().pad.Total() != 0)); - jit.AddConstant(MakeJitConstant("PADDED_OUTPUT", params.outputs[0].X().pad.Total() != 0)); - jit.AddConstant(MakeJitConstant("XY_BLOCK_SIZE", xy_block_size)); - if (params.inputs[0].Feature().v % ic_pack_size) { - jit.AddConstant(MakeJitConstant("LEFTOVERS_IC", params.inputs[0].Feature().v % ic_pack_size)); - jit.AddConstant(MakeJitConstant("FILTER_MASK", - (0xFFFFFFFF >> (ic_pack_size - params.inputs[0].Feature().v % ic_pack_size)))); - } - - if (params.outputs[0].GetDType() == Datatype::BINARY) { - jit.AddConstant(MakeJitConstant("BINARY_PACKED_OUTPUT", 1)); - } - - return jit; -} - -JitConstants BinaryConvolutionKernel1x1::GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& /*kd*/) const { - JitConstants jit = {}; - - auto input_dt = GetUnitType(params); - FusedOpsConfiguration conf = {"", {"b", "(f_block*16 + oc)", "y", "x"}, "res", input_dt, 1 }; - jit.Merge(MakeFusedOpsDeclsJitConstants(params, {conf})); - - size_t op_id = 0; - std::string input_decls = ""; - std::string eltwise_fused_ops = ""; - std::string prepare_data = ""; - for (auto& fused_dep : params.fused_ops) { - auto fused_dep_codegen = FusedOpsCodeGenerator(fused_dep); - auto get_aligned_load2 = [&](std::string ptr, std::string byte_offset) -> std::string { - if (fused_dep.tensors[0].GetDType() == Datatype::F32) - return "(_sub_group_block_read2((const __global uint*)(" + ptr + ") + (" + byte_offset + ")))"; - else - return "(_sub_group_block_read_us2((const __global ushort*)(" + ptr + ") + (" + byte_offset + - ")))"; - }; - - auto get_shuffle = [&](std::string var, std::string lid) -> std::string { - return "(_sub_group_shuffle(" + var + ", " + lid + "))"; - }; - - std::string data_type = fused_dep_codegen.GetInputTypeName(0, 1); - std::string vec_data_type = fused_dep_codegen.GetInputTypeName(0, 2); - std::string sc = "sc" + toCodeString(op_id); - std::string sh = "sh" + toCodeString(op_id); - std::string e_add = "e_add" + toCodeString(op_id); - std::string e_mul = "e_mul" + toCodeString(op_id); - - switch (fused_dep.GetType()) { - case KernelType::QUANTIZE: { - std::string var_name_in = fused_dep_codegen.GetInputVarName(0); - std::string var_name_out = fused_dep_codegen.GetInputVarName(3); - std::string cast_type_vec = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2"; - std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float" : "as_half"; - - prepare_data += "\\\n\tint packed_res = 0;"; - if (fused_dep.tensors[0].Feature().v == params.outputs[0].Feature().v) { - prepare_data += "\\\n\t" + vec_data_type + " " + var_name_in + " = " + cast_type_vec + - get_aligned_load2(fused_dep_codegen.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";"; - eltwise_fused_ops += "\\\n\t" + data_type + " thresh = (oc < 16) ? " + get_shuffle(var_name_in + ".s0", "oc") + - " : " + get_shuffle(var_name_in + ".s1", "oc") + ";"; - } else { - prepare_data += "\\\n\t" + data_type + " " + var_name_in + " = " + cast_type + - + "(" + fused_dep_codegen.GetInputPtrName(0) + "[0]);"; - eltwise_fused_ops += "\\\n\t" + data_type + " thresh = " + var_name_in + ";"; - } - - - if (fused_dep.tensors[2].Feature().v == params.outputs[0].Feature().v) { - // Per-channel output value - prepare_data += "\\\n\t" + vec_data_type + " " + var_name_out + " = " + cast_type_vec + - get_aligned_load2(fused_dep_codegen.GetInputPtrName(3), "f_block*OC_BLOCK_SIZE") + ";"; - eltwise_fused_ops +="\\\n\t" + data_type + " out_val = (oc < 16) ? " + get_shuffle(var_name_out + ".s0", "oc") + - " : " + get_shuffle(var_name_out + ".s1", "oc") + ";"; - } else { - // Per-tensor output value - prepare_data += "\\\n\t" + data_type + " " + var_name_out + " = " + cast_type + - + "(" + fused_dep_codegen.GetInputPtrName(3) + "[0]);"; - eltwise_fused_ops += "\\\n\t" + data_type + " out_val = " + var_name_out + ";"; - } - eltwise_fused_ops += "\\\n\tif (out_val == 1) "; - eltwise_fused_ops += "\\\n\t\tpacked_res |= (res > thresh) << oc;"; - eltwise_fused_ops += "\\\n\telse "; - eltwise_fused_ops += "\\\n\t\tpacked_res |= (res <= thresh) << oc;"; - - break; - } - - case KernelType::ACTIVATION: { - auto p = fused_dep.GetOpParams(); - base_activation_params activation = p->param; - if (activation.function != ActivationFunction::NONE) { - auto suffix = "_FUSED_OP" + toCodeString(op_id); - - jit.Merge(MakeActivationJitConstants(activation, fused_dep.output_tensor.GetDType(), suffix)); - eltwise_fused_ops += "\\\n\tres = ACTIVATION" + suffix + "((OUTPUT_TYPE)res, ACTIVATION_PARAMS" + suffix + ");"; - } - - break; - } - - case KernelType::ELTWISE: { - std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2"; - std::string var_name = fused_dep_codegen.GetInputVarName(0); - prepare_data += "\\\n\t" + vec_data_type + " " + var_name + " = " + cast_type + - get_aligned_load2(fused_dep_codegen.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";"; - - auto eltwise_p = std::dynamic_pointer_cast(fused_dep.op_params); - - if (eltwise_p->mode == EltwiseMode::ADD) { - eltwise_fused_ops += "\\\n\t" + data_type + " " + e_add + " = (oc < 16) ? " + - get_shuffle(var_name + ".s0", "oc") + " : " + get_shuffle(var_name + ".s1", "oc") + ";"; - eltwise_fused_ops += "\\\n\tres = res+" + e_add + ";"; - } else if (eltwise_p->mode == EltwiseMode::MUL) { - eltwise_fused_ops += "\\\n\t" + data_type + " " + e_mul + " = (oc < 16) ? " + - get_shuffle(var_name + ".s0", "oc") + " : " + get_shuffle(var_name + ".s1", "oc") + ";"; - eltwise_fused_ops += "\\\n\tres = res*" + e_mul + ";"; - } else { - throw std::invalid_argument("Not supported eltwise fusing op in binary_convolution_1x1 kernel: " + params.layerID); - } - - break; - } - - default: - throw std::invalid_argument("Invalid fused op in binary_convolution_1x1 kernel: " + params.layerID); - } - - op_id++; - } - jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops)); - jit.AddConstant(MakeJitConstant("FUSED_OPS_PREPARE_DATA", prepare_data)); - jit.AddConstant(MakeJitConstant("CUSTOM_FUSED_OPS", true)); - - return jit; -} - -KernelsData BinaryConvolutionKernel1x1::GetKernelsData(const Params& params, const optional_params& options) const { - return GetTunedKernelsDataByIndex(params, options); -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.h deleted file mode 100644 index 03e30737e6bd7f..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "binary_convolution_kernel_base.h" -#include - -namespace kernel_selector { - -class BinaryConvolutionKernel1x1 : public BinaryConvolutionKernelBase { -public: - using Parent = BinaryConvolutionKernelBase; - - BinaryConvolutionKernel1x1() : BinaryConvolutionKernelBase("binary_convolution_gpu_1x1") {} - virtual ~BinaryConvolutionKernel1x1() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; - DeviceFeaturesKey get_required_device_features_key(const Params& params, const optional_params& /*options*/) const override; - -protected: - WeightsLayout GetPreferredWeightLayout(const binary_convolution_params &) const override { - return WeightsLayout::os_is_yx_osv32_isv32p; - } - JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const override; - bool Validate(const Params& p, const optional_params& o) const override; - DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp deleted file mode 100644 index 282e97f5b4a5fd..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.cpp +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "binary_convolution_kernel_1x1_b_fs_yx_fsv16.h" -#include -#include -#include - -namespace kernel_selector { - -static const int sub_group_size = 16; -static const int ic_pack_size = 32; -static const int xy_block_size = 16; - -ParamsKey BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::BINARY); - k.EnableInputWeightsType(WeightsType::BINARY); - k.EnableOutputDataType(Datatype::F16); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::UINT8); - k.EnableOutputDataType(Datatype::INT8); - k.EnableOutputDataType(Datatype::INT32); - k.EnableInputLayout(DataLayout::b_fs_yx_32fp); - k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableNonBiasTerm(); - k.EnableBatching(); - k.EnableDifferentTypes(); - return k; -} - -DeviceFeaturesKey BinaryConvolutionKernel1x1_b_fs_yx_fsv16::get_required_device_features_key(const Params& params, const optional_params& /*options*/) const { - DeviceFeaturesKey k; - k.requires_subgroup_shuffle(); - k.requires_blocked_read_write(); - - return k; -} - -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernel1x1_b_fs_yx_fsv16::SetDefault( - const binary_convolution_params& params, - int) const { - DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); - - const auto& out = params.outputs[0]; - - auto x = out.X().v; - auto y = out.Y().v; - auto f = out.Feature().v; - auto b = out.Batch().v; - - dispatchData.gws[0] = Align(x * y, sub_group_size); - dispatchData.gws[1] = CeilDiv(f, sub_group_size); // 1 WI calcs 16 OC - dispatchData.gws[2] = b; - - dispatchData.lws = { static_cast(sub_group_size), 1, 1 }; - - return dispatchData; -} - -KernelsPriority BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return FORCE_PRIORITY_1; -} - -bool BinaryConvolutionKernel1x1_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const { - if (!BinaryConvolutionKernelBase::Validate(p, o) || !ConvolutionBinaryCheckInput(p, o)) - return false; - - const auto& params = static_cast(p); - - const auto& input = params.inputs[0]; - const auto& output = params.outputs[0]; - - const bool bOutputSizes = output.X().v != input.X().v || output.Y().v != input.Y().v; - const bool bFilterSize = params.filterSize.x != 1 || params.filterSize.y != 1; - const bool bStride = params.stride.x != 1 || params.stride.y != 1; - const bool bGroups = params.groups > 1; - - if (bOutputSizes || bFilterSize || bStride || bGroups) - return false; - - return true; -} - -JitConstants BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const { - auto jit = Parent::GetJitConstants(params, dispatchData); - - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); - jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size))); - jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_PACKED", CeilDiv(params.outputs[0].Feature().v, ic_pack_size))); - jit.AddConstant(MakeJitConstant("PADDED_INPUT", params.inputs[0].X().pad.Total() != 0)); - jit.AddConstant(MakeJitConstant("PADDED_OUTPUT", params.outputs[0].X().pad.Total() != 0)); - jit.AddConstant(MakeJitConstant("XY_BLOCK_SIZE", xy_block_size)); - if (params.inputs[0].Feature().v % ic_pack_size) { - jit.AddConstant(MakeJitConstant("LEFTOVERS_IC", params.inputs[0].Feature().v % ic_pack_size)); - jit.AddConstant(MakeJitConstant("FILTER_MASK", - (0xFFFFFFFF >> (ic_pack_size - params.inputs[0].Feature().v % ic_pack_size)))); - } - - if (params.outputs[0].Feature().v % 32 != 0) { - jit.AddConstant(MakeJitConstant("LEFTOVERS_OC", true)); - } - - if (params.outputs[0].GetDType() == Datatype::BINARY) { - jit.AddConstant(MakeJitConstant("BINARY_PACKED_OUTPUT", 1)); - } - - return jit; -} - -JitConstants BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& /*kd*/) const { - JitConstants jit = {}; - - auto input_dt = GetUnitType(params); - FusedOpsConfiguration conf = {"", {"b", "(f_block*16 + oc)", "y", "x"}, "res", input_dt, 1 }; - jit.Merge(MakeFusedOpsDeclsJitConstants(params, {conf})); - - size_t op_id = 0; - std::string input_decls = ""; - std::string eltwise_fused_ops = ""; - std::string prepare_data = ""; - for (auto& fused_dep : params.fused_ops) { - auto fused_dep_codegen = FusedOpsCodeGenerator(fused_dep); - - auto get_aligned_load = [&](std::string ptr, std::string byte_offset) -> std::string { - if (fused_dep.tensors[0].GetDType() == Datatype::F32) - return "(_sub_group_block_read((const __global uint*)(" + ptr + ") + (" + byte_offset + ")))"; - else - return "(_sub_group_block_read_us((const __global ushort*)(" + ptr + ") + (" + byte_offset + - ")))"; - }; - - auto get_shuffle = [&](std::string var, std::string lid) -> std::string { - return "(_sub_group_shuffle(" + var + ", " + lid + "))"; - }; - - std::string data_type = fused_dep_codegen.GetInputTypeName(0, 1); - std::string vec_data_type = fused_dep_codegen.GetInputTypeName(0, 1); - std::string sc = "sc" + toCodeString(op_id); - std::string e_add = "e_add" + toCodeString(op_id); - std::string e_mul = "e_mul" + toCodeString(op_id); - - switch (fused_dep.GetType()) { - case KernelType::ACTIVATION: { - auto p = fused_dep.GetOpParams(); - base_activation_params activation = p->param; - if (activation.function != ActivationFunction::NONE) { - auto suffix = "_FUSED_OP" + toCodeString(op_id); - - jit.Merge(MakeActivationJitConstants(activation, fused_dep.output_tensor.GetDType(), suffix)); - eltwise_fused_ops += "\\\n\tres = ACTIVATION" + suffix + "((OUTPUT_TYPE)res, ACTIVATION_PARAMS" + suffix + ");"; - } - - break; - } - - case KernelType::ELTWISE: { - std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float" : "as_half"; - std::string var_name = fused_dep_codegen.GetInputVarName(0); - prepare_data += "\\\n\t" + vec_data_type + " " + var_name + " = " + cast_type + - get_aligned_load(fused_dep_codegen.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";"; - - auto eltwise_p = std::dynamic_pointer_cast(fused_dep.op_params); - - if (eltwise_p->mode == EltwiseMode::ADD) { - eltwise_fused_ops += "\\\n\t" + data_type + " " + e_add + " = " + get_shuffle(var_name, "oc") + ";"; - eltwise_fused_ops += "\\\n\tres = res+" + var_name + ";"; - } else if (eltwise_p->mode == EltwiseMode::MUL) { - eltwise_fused_ops += "\\\n\t" + data_type + " " + e_mul + " = " + get_shuffle(var_name, "oc") + ";"; - eltwise_fused_ops += "\\\n\tres = res*" + var_name + ";"; - } else { - throw std::invalid_argument("Not supported eltwise fusing op in binary_convolution_1x1_fsv16 kernel: " + params.layerID); - } - - break; - } - - default: - throw std::invalid_argument("Invalid fused op in binary_convolution_1x1_fsv16 kernel: " + params.layerID); - } - - op_id++; - } - jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops)); - jit.AddConstant(MakeJitConstant("FUSED_OPS_PREPARE_DATA", prepare_data)); - jit.AddConstant(MakeJitConstant("CUSTOM_FUSED_OPS", true)); - - return jit; -} - -KernelsData BinaryConvolutionKernel1x1_b_fs_yx_fsv16::GetKernelsData(const Params& params, const optional_params& options) const { - return GetTunedKernelsDataByIndex(params, options); -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h deleted file mode 100644 index d7eb023f5b1176..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_1x1_b_fs_yx_fsv16.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "binary_convolution_kernel_base.h" -#include - -namespace kernel_selector { - -class BinaryConvolutionKernel1x1_b_fs_yx_fsv16 : public BinaryConvolutionKernelBase { -public: - using Parent = BinaryConvolutionKernelBase; - - BinaryConvolutionKernel1x1_b_fs_yx_fsv16() : BinaryConvolutionKernelBase("binary_convolution_gpu_1x1_b_fs_yx_fsv16") {} - virtual ~BinaryConvolutionKernel1x1_b_fs_yx_fsv16() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; - DeviceFeaturesKey get_required_device_features_key(const Params& params, const optional_params& /*options*/) const override; - -protected: - WeightsLayout GetPreferredWeightLayout(const binary_convolution_params &) const override { - return WeightsLayout::os_is_yx_osv32_isv32p; - } - JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const override; - bool Validate(const Params& p, const optional_params& o) const override; - DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.cpp deleted file mode 100644 index acc55df224fff9..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.cpp +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "binary_convolution_kernel_base.h" -#include "kernel_selector_utils.h" -#include "common_tools.h" -#include -#include -#include - -namespace kernel_selector { -bool BinaryConvolutionKernelBase::Validate(const Params& p, const optional_params& o) const { - if (p.GetType() != KernelType::BINARY_CONVOLUTION || o.GetType() != KernelType::BINARY_CONVOLUTION) { - return false; - } - - const binary_convolution_params& params = static_cast(p); - const binary_convolution_optional_params& optParams = static_cast(o); - - bool bSupportedWeightsLayout = params.weights.GetLayout() == GetPreferredWeightLayout(params); - - const bool bWeightsOK = bSupportedWeightsLayout || optParams.allowStaticInputReordering; - - if (!bWeightsOK) { - return false; - } - - return true; -} - -JitConstants BinaryConvolutionKernelBase::GetJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const { - JitConstants jit = WeightBiasKernelBase::GetJitConstants(params); - jit.Merge(GetFusedPrimitivesJitConstants(params, dispatchData)); - - jit.AddConstants({ - MakeJitConstant("STRIDE", params.stride), - MakeJitConstant("PADDING", params.padding), - MakeJitConstant("DILATION", params.dilation), - }); - - jit.Merge(MakeTypeJitConstants(params.out_dt, "CONV_RESULT")); - - return jit; -} - -JitConstants BinaryConvolutionKernelBase::GetFusedPrimitivesJitConstants(const binary_convolution_params& /*params*/, - const DispatchData& /*kd*/) const { - return {}; -} - -bool BinaryConvolutionKernelBase::CheckWorkGroups(const BinaryConvolutionKernelBase::DispatchData& dispatchData) { - if (dispatchData.gws.size() != 3 || dispatchData.lws.size() != 3) - return false; - - for (size_t i = 0; i < dispatchData.gws.size(); i++) { - if (dispatchData.gws[i] == 0 || dispatchData.lws[i] == 0) - return false; - if ((dispatchData.gws[i] % dispatchData.lws[i]) != 0) - return false; - } - - return true; -} - -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelBase::SetDefault(const binary_convolution_params& params, - int) const { - DispatchData dispatchData; - auto in_layout = params.inputs[0].GetLayout(); - auto out_layout = params.outputs[0].GetLayout(); - std::vector> dims_by_gws; - - const auto& out = params.outputs[0]; - std::vector global; - if (out_layout == DataLayout::bfyx || out_layout == DataLayout::byxf) { - global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v}; - dims_by_gws = {{Tensor::DataChannelName::X}, - {Tensor::DataChannelName::Y}, - {Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}}; - } else { - global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v}; - dims_by_gws = {{Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}, - {Tensor::DataChannelName::X}, - {Tensor::DataChannelName::Y}}; - } - - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo, in_layout, out_layout, dims_by_gws); - - dispatchData.gws = global; - dispatchData.lws = local; - - dispatchData.cldnnStyle.blockWidth = 1; - dispatchData.cldnnStyle.blockHeight = 1; - dispatchData.cldnnStyle.prefetch = 0; - dispatchData.cldnnStyle.inputBlockArraySize = 0; - dispatchData.cldnnStyle.inputBlockWidth = 0; - - dispatchData.gemmStyle.globalWorkSizeDX = 1; - dispatchData.gemmStyle.globalWorkSizeDY = 1; - dispatchData.gemmStyle.globalWorkSizeDZ = 1; - dispatchData.gemmStyle.subBlockDimK = 1; - dispatchData.gemmStyle.subBlockDimM = 0; - dispatchData.gemmStyle.subBlockDimN = 0; - return dispatchData; -} - -KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& params, - const optional_params& options, - const std::string exeMode, - int autoTuneIndex) const { - if (!Validate(params, options)) { - return {}; - } - - KernelData kd = KernelData::Default(params); - binary_convolution_params& newParams = *static_cast(kd.params.get()); - - if (NeedPaddedInput()) { - kd.reorderInput = ConvolutionBinaryUpdateInputParams(newParams); - } - DispatchData dispatchData = SetDefault(newParams, autoTuneIndex); - - if (!CheckWorkGroups(dispatchData)) { - // Internal Error - wrong calculation of global/local work group sizes - return {}; - } - - bool succeed = UpdateWeightsParams(newParams, - options, - GetPreferredWeightLayout(newParams), - kd.weightsReorderParams, - GetSupportedKey()); - - if (!succeed) { - return {}; - } - - auto finalKernelName = GetKernelName(newParams); - auto cldnnJit = GetJitConstants(newParams, dispatchData); - auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, params, options); - auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint); - - auto& kernel = kd.kernels[0]; - uint32_t fused_deps_total = 0; - for (auto& fused_dep : newParams.fused_ops) { - for (int i = 0; i < static_cast(fused_dep.dep_size); i++) { - kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT_OF_FUSED_PRIMITIVE, fused_deps_total}); - fused_deps_total++; - } - } - - FillCLKernelData(kernel, - dispatchData, - params.engineInfo, - finalKernelName, - jit, - entryPoint, - exeMode, - true, - !newParams.bias.empty(), - 1, - fused_deps_total); - - kd.autoTuneIndex = autoTuneIndex; - - return {kd}; -} - -bool CheckConvolutionBinaryPaddedInputDesc(const binary_convolution_params& params, const DataTensor& reqDesc) { - assert(params.inputs.size() == 1); - - bool properPadding = reqDesc.X().pad.before <= params.inputs[0].X().pad.before && - reqDesc.Y().pad.before <= params.inputs[0].Y().pad.before && - reqDesc.Feature().pad.before <= params.inputs[0].Feature().pad.before && - reqDesc.Batch().pad.before <= params.inputs[0].Batch().pad.before; - - properPadding &= reqDesc.X().pad.after <= params.inputs[0].X().pad.after && - reqDesc.Y().pad.after <= params.inputs[0].Y().pad.after && - reqDesc.Feature().pad.after <= params.inputs[0].Feature().pad.after && - reqDesc.Batch().pad.after <= params.inputs[0].Batch().pad.after; - - return properPadding; -} - -static DataTensor GetConvolutionBFYXPaddedTensor(const binary_convolution_params& cp) { - assert(cp.inputs.size() == 1); - assert(cp.inputs[0].GetDims().size() == 4U); - - DataTensor t = cp.inputs[0]; - std::vector pad{{0, 0}, {0, 0}, {0, 0}, {0, 0}}; - - pad[0].before = cp.padding.x; - pad[1].before = cp.padding.y; - - const auto inputLimitX = (cp.outputs[0].X().v - 1) * cp.stride.x + (cp.filterSize.x - 1) * cp.dilation.x + 1; - const auto inputLimitY = (cp.outputs[0].Y().v - 1) * cp.stride.y + (cp.filterSize.y - 1) * cp.dilation.y + 1; - - pad[0].after = (size_t)std::max(static_cast(inputLimitX) - static_cast(t.X().v) - static_cast(pad[0].before), static_cast(0)); - pad[1].after = (size_t)std::max(static_cast(inputLimitY) - static_cast(t.Y().v) - static_cast(pad[1].before), static_cast(0)); - - Tensor::NDims dims(4); - const Tensor::NDims& orgDims = cp.inputs[0].GetDims(); - size_t pitch = 1; - for (size_t i = 0; i < dims.size(); i++) { - dims[i].pad = pad[i]; - dims[i].v = orgDims[i].v; - dims[i].pitch = pitch; - pitch *= dims[i].LogicalDimPadded(); - } - - return {dims, t.GetDType(), t.GetLayout()}; -} - -bool ConvolutionBinaryCheckInput(const Params& p, const optional_params& o) { - const binary_convolution_params& params = static_cast(p); - - if (params.padding.x == 0 && params.padding.y == 0) { - const auto req_input = GetConvolutionBFYXPaddedTensor(params); - const bool bProperInputDesc = CheckConvolutionBinaryPaddedInputDesc(params, req_input); - - return bProperInputDesc; - } - - return true; -} - -bool ConvolutionBinaryUpdateInputParams(binary_convolution_params& params) { - const auto req_input = GetConvolutionBFYXPaddedTensor(params); - const bool bProperInputDesc = CheckConvolutionBinaryPaddedInputDesc(params, req_input); - - if (!bProperInputDesc) { - params.inputs[0] = req_input; - return true; - } - - return false; -} - -std::string BinaryConvolutionKernelBase::GetAutoTuneOptions(int autoTuneIndex) const { - if ((autoTuneIndex >= 0) && (autoTuneIndex < static_cast(autoTuneOptions.size()))) { - return autoTuneOptions[autoTuneIndex]; - } - - return EXE_MODE_DEFAULT; -} - -KernelsData BinaryConvolutionKernelBase::GetTunedKernelsDataByIndex(const Params& params, - const optional_params& options, - const int autoTuneIndex) const { - return GetCommonKernelsData(params, options, GetAutoTuneOptions(autoTuneIndex), autoTuneIndex); -} - -KernelsData BinaryConvolutionKernelBase::GetKernelsDataForAutoTune(const Params& params, - const optional_params& options) const { - if (!Validate(params, options)) { - return {}; - } - - KernelsData res = {}; - - for (size_t i = 0; i < autoTuneOptions.size(); i++) { - KernelsData kd = GetTunedKernelsDataByIndex(params, options, static_cast(i)); - if (!kd.empty()) { - res.emplace_back(kd[0]); - } - } - - return res; -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.h deleted file mode 100644 index 686b81d7e33f27..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_base.h +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "weight_bias_kernel_base.h" -#include "binary_convolution_params.h" -#include -#include - -namespace kernel_selector { -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// BinaryConvolutionKernelBase -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class BinaryConvolutionKernelBase : public WeightBiasKernelBase { -public: - using WeightBiasKernelBase::WeightBiasKernelBase; - virtual ~BinaryConvolutionKernelBase() {} - - struct DispatchData : public CommonDispatchData { - struct CLDNNStyle { - size_t blockWidth, blockHeight; // used for kernels processing blocks - size_t prefetch; - size_t inputBlockArraySize; // Number of elements in array of UNIT_TYPE that must be specified in kernel to - // store/cache input block. - size_t inputBlockWidth; // Number of elements in X dimension stored/cached in input block. - }; - - struct GEMMStyle { - size_t subBlockDimM; - size_t subBlockDimK; - size_t subBlockDimN; - size_t globalWorkSizeDX; - size_t globalWorkSizeDY; - size_t globalWorkSizeDZ; - }; - - union { - CLDNNStyle cldnnStyle; - GEMMStyle gemmStyle; - }; - }; - - std::string GetAutoTuneOptions(int autoTuneIndex) const; - std::vector autoTuneOptions = {EXE_MODE_DEFAULT, EXE_MODE_NO_PRERA_SCH, EXE_MODE_AGE_BASED}; - KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override; - KernelsData GetTunedKernelsDataByIndex(const Params& params, - const optional_params& options, - int autoTuneIndex = -1) const override; - -protected: - virtual WeightsLayout GetPreferredWeightLayout(const binary_convolution_params &) const = 0; - virtual std::string GetKernelName(const binary_convolution_params&) const { return kernelName; } - virtual bool NeedPaddedInput() const { return false; } - bool Validate(const Params& p, const optional_params& o) const override; - using WeightBiasKernelBase::GetJitConstants; - virtual JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const; - virtual JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const; - virtual DispatchData SetDefault(const binary_convolution_params& params, int autoTuneIndex = -1) const; - static bool CheckWorkGroups(const DispatchData&); - KernelsData GetCommonKernelsData(const Params& params, - const optional_params& options, - const std::string exeMode = EXE_MODE_DEFAULT, - int autoTuneIndex = -1) const; -}; - -bool ConvolutionBinaryCheckInput(const Params& p, const optional_params& o); -bool CheckConvolutionBinaryPaddedInputDesc(const binary_convolution_params& params, const DataTensor& reqDesc); -bool ConvolutionBinaryUpdateInputParams(binary_convolution_params& params); - -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.cpp deleted file mode 100644 index 9acd73f38b8cd9..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.cpp +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "binary_convolution_kernel_generic.h" -#include -#include -#include - -namespace kernel_selector { - -static const int sub_group_size = 16; -static const int ic_pack_size = 32; -static const int x_block_size = 16; - -ParamsKey BinaryConvolutionKernelGeneric::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::BINARY); - k.EnableInputWeightsType(WeightsType::BINARY); - k.EnableOutputDataType(Datatype::F16); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::INT32); - k.EnableOutputDataType(Datatype::BINARY); - k.EnableInputLayout(DataLayout::b_fs_yx_32fp); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::b_fs_yx_32fp); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableNonBiasTerm(); - k.EnableBatching(); - k.EnableDifferentTypes(); - return k; -} - -DeviceFeaturesKey BinaryConvolutionKernelGeneric::get_required_device_features_key(const Params& params, const optional_params& /*options*/) const { - DeviceFeaturesKey k; - k.requires_subgroup_shuffle(); - k.requires_blocked_read_write(); - - return k; -} - -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelGeneric::SetDefault(const binary_convolution_params& params, - int) const { - DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); - - const auto& out = params.outputs[0]; - - auto x = out.X().v; - auto y = out.Y().v; - auto f = out.Feature().v; - auto b = out.Batch().v; - - dispatchData.gws[0] = Align(x, sub_group_size) * y; - dispatchData.gws[1] = CeilDiv(f, 2 * sub_group_size); // 1 WI calc 2 OC x 16 X - dispatchData.gws[2] = b; - - dispatchData.lws[0] = sub_group_size; - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; - - return dispatchData; -} - -KernelsPriority BinaryConvolutionKernelGeneric::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return FORCE_PRIORITY_2; -} - -bool BinaryConvolutionKernelGeneric::Validate(const Params& p, const optional_params& o) const { - if (!BinaryConvolutionKernelBase::Validate(p, o) || !ConvolutionBinaryCheckInput(p, o)) - return false; - - const auto& params = static_cast(p); - - if (params.groups > 1) - return false; - - return true; -} - -JitConstants BinaryConvolutionKernelGeneric::GetJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const { - auto jit = Parent::GetJitConstants(params, dispatchData); - - auto input = params.inputs[0]; - auto output = params.outputs[0]; - size_t input_line_size = params.stride.x * (x_block_size - 1) + params.weights.X().v; - - int pad_physical_val = params.pad_value == -1.0f ? 0x00000000 : 0xFFFFFFFF; - jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); - jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, ic_pack_size))); - jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_PACKED", CeilDiv(params.outputs[0].Feature().v, ic_pack_size))); - jit.AddConstant(MakeJitConstant("PAD_VALUE", pad_physical_val)); - jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", x_block_size)); - jit.AddConstant(MakeJitConstant("INPUT_ELEMENTS_PER_WI", CeilDiv(input_line_size, sub_group_size))); - jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(output.X().v, x_block_size))); - jit.AddConstant(MakeJitConstant("EXCLUDE_PAD", params.pad_value == 0.0f)); - if (params.inputs[0].Feature().v % ic_pack_size) { - jit.AddConstant(MakeJitConstant("LEFTOVERS_IC", params.inputs[0].Feature().v % ic_pack_size)); - jit.AddConstant(MakeJitConstant("FILTER_MASK", - (0xFFFFFFFF >> (ic_pack_size - params.inputs[0].Feature().v % ic_pack_size)))); - } - - if (params.outputs[0].GetDType() == Datatype::BINARY) { - jit.AddConstant(MakeJitConstant("BINARY_PACKED_OUTPUT", 1)); - } - - return jit; -} - -JitConstants BinaryConvolutionKernelGeneric::GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& /*kd*/) const { - JitConstants jit = {}; - - auto input_dt = GetUnitType(params); - FusedOpsConfiguration conf = {"", {"b", "(f_block*16 + i)", "y", "x"}, "res", input_dt, 1 }; - jit.Merge(MakeFusedOpsDeclsJitConstants(params, {conf})); - - size_t op_id = 0; - std::string eltwise_fused_ops = ""; - std::string channel_pack_fused_ops = ""; - std::string prepare_data = ""; - for (auto& fused_dep : params.fused_ops) { - auto fused_dep_codegen = FusedOpsCodeGenerator(fused_dep); - auto get_aligned_load2 = [&](std::string ptr, std::string byte_offset) -> std::string { - if (fused_dep.tensors[0].GetDType() == Datatype::F32) - return "(_sub_group_block_read2((const __global uint*)(" + ptr + ") + (" + byte_offset + ")))"; - else - return "(_sub_group_block_read_us2((const __global ushort*)(" + ptr + ") + (" + byte_offset + - ")))"; - }; - std::string data_type = fused_dep_codegen.GetInputTypeName(0, 1); - std::string vec_data_type = fused_dep_codegen.GetInputTypeName(0, 2); - std::string sc = "sc" + toCodeString(op_id); - std::string sh = "sh" + toCodeString(op_id); - std::string e_add = "e_add" + toCodeString(op_id); - std::string e_mul = "e_mul" + toCodeString(op_id); - - switch (fused_dep.GetType()) { - case KernelType::QUANTIZE: { - std::string var_name_in = fused_dep_codegen.GetInputVarName(0); - std::string var_name_out = fused_dep_codegen.GetInputVarName(3); - std::string cast_type_vec = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2"; - std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float" : "as_half"; - - if (fused_dep.tensors[0].Feature().v == params.outputs[0].Feature().v) { - prepare_data += vec_data_type + " " + var_name_in + " = " + cast_type_vec + - get_aligned_load2(fused_dep_codegen.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";"; - } else { - prepare_data += data_type + " " + var_name_in + " = " + cast_type + - + "(" + fused_dep_codegen.GetInputPtrName(0) + "[0]);"; - } - - if (fused_dep.tensors[2].Feature().v == params.outputs[0].Feature().v) { - prepare_data += vec_data_type + " " + var_name_out + " = " + cast_type_vec + - get_aligned_load2(fused_dep_codegen.GetInputPtrName(3), "f_block*OC_BLOCK_SIZE") + ";"; - } else { - prepare_data += data_type + " " + var_name_out + " = " + cast_type + - "(" + fused_dep_codegen.GetInputPtrName(3)+"[0]);"; - } - - std::string var_in_s0 = fused_dep.tensors[0].Feature().v == params.outputs[0].Feature().v ? var_name_in + ".s0" : var_name_in; - std::string var_in_s1 = fused_dep.tensors[0].Feature().v == params.outputs[0].Feature().v ? var_name_in + ".s1" : var_name_in; - - std::string var_out_s0 = fused_dep.tensors[3].Feature().v == params.outputs[0].Feature().v ? var_name_out + ".s0" : var_name_out; - std::string var_out_s1 = fused_dep.tensors[3].Feature().v == params.outputs[0].Feature().v ? var_name_out + ".s1" : var_name_out; - - channel_pack_fused_ops += "\\\n\tfor (int i = 0; i < 16; i++) {"; - channel_pack_fused_ops += "\\\n\tint ch0, ch1;"; - if (fused_dep.tensors[2].Feature().v == params.outputs[0].Feature().v) { - channel_pack_fused_ops += "\\\n\tif ("+ var_out_s0 + " == UNIT_VAL_ONE) "; - channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] > " + var_in_s0 + " ? (1 << lid) : 0;"; - channel_pack_fused_ops += "\\\n\telse "; - channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] <= " + var_in_s0 + " ? (1 << lid) : 0;"; - channel_pack_fused_ops += "\\\n\tif ("+ var_out_s1 + " == UNIT_VAL_ONE) "; - channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] > " + var_in_s1 + " ? " - "(1 << (SUB_GROUP_SIZE + lid)) : 0;"; - channel_pack_fused_ops += "\\\n\telse "; - channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] <= " + var_in_s1 + " ? " - "(1 << (SUB_GROUP_SIZE + lid)) : 0;"; - } else { - channel_pack_fused_ops += "\\\n\tif ("+ var_out_s0 + " == UNIT_VAL_ONE) {"; - channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] > " + var_in_s0 + " ? (1 << lid) : 0;"; - channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] > " + var_in_s1 + " ? " - "(1 << (SUB_GROUP_SIZE + lid)) : 0;"; - channel_pack_fused_ops += "\\\n\t} else {"; - channel_pack_fused_ops += "\\\n\t\tch0 = dst[0*SUB_GROUP_SIZE + i] <= " + var_in_s0 + " ? (1 << lid) : 0;"; - channel_pack_fused_ops += "\\\n\t\tch1 = dst[1*SUB_GROUP_SIZE + i] <= " + var_in_s1 + " ? " - "(1 << (SUB_GROUP_SIZE + lid)) : 0;"; - channel_pack_fused_ops += "\\\n\t}"; - } - channel_pack_fused_ops += "\\\n\tint packed = ch0 + ch1;"; - channel_pack_fused_ops += "\\\n\tpacked_out[i] = sub_group_reduce_add(packed);"; - channel_pack_fused_ops += "\\\n\t}"; - - break; - } - - case KernelType::ACTIVATION: { - auto p = fused_dep.GetOpParams(); - base_activation_params activation = p->param; - if (activation.function != ActivationFunction::NONE) { - auto suffix = "_FUSED_OP" + toCodeString(op_id); - - jit.Merge(MakeActivationJitConstants(activation, fused_dep.output_tensor.GetDType(), suffix)); - eltwise_fused_ops += "\\\n\tres = ACTIVATION" + suffix + "((OUTPUT_TYPE)res, ACTIVATION_PARAMS" + suffix + ");"; - } - - break; - } - - case KernelType::ELTWISE: { - std::string cast_type = (fused_dep.tensors[0].GetDType() == Datatype::F32) ? "as_float2" : "as_half2"; - std::string var_name = fused_dep_codegen.GetInputVarName(0); - prepare_data += vec_data_type + " " + var_name + " = " + cast_type + - get_aligned_load2(fused_dep_codegen.GetInputPtrName(0), "f_block*OC_BLOCK_SIZE") + ";"; - - auto eltwise_p = std::dynamic_pointer_cast(fused_dep.op_params); - - if (eltwise_p->mode == EltwiseMode::ADD) { - eltwise_fused_ops += data_type + " " + e_add + " = (i < 16) ? " + var_name + ".s0" + " : " + var_name + ".s1;"; - eltwise_fused_ops += "res = res+" + e_add +";"; - } else if (eltwise_p->mode == EltwiseMode::MUL) { - eltwise_fused_ops += data_type + " " + e_mul + " = (i < 16) ? " + var_name + ".s0" + " : " + var_name + ".s1;"; - eltwise_fused_ops += "res = res*" + e_mul +";"; - } else { - throw std::invalid_argument("Not supported eltwise fusing op in binary_convolution_generic kernel: " + params.layerID); - } - - break; - } - - default: - throw std::invalid_argument("Invalid fused op in binary_convolution_generic kernel: " + params.layerID); - } - - op_id++; - } - jit.AddConstant(MakeJitConstant("DO_ELTWISE_FUSED_OPS", eltwise_fused_ops)); - jit.AddConstant(MakeJitConstant("DO_CHANNEL_PACK_OPS", channel_pack_fused_ops)); - jit.AddConstant(MakeJitConstant("FUSED_OPS_PREPARE_DATA", prepare_data)); - jit.AddConstant(MakeJitConstant("CUSTOM_FUSED_OPS", true)); - - return jit; -} - -KernelsData BinaryConvolutionKernelGeneric::GetKernelsData(const Params& params, const optional_params& options) const { - return GetTunedKernelsDataByIndex(params, options); -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.h deleted file mode 100644 index 82653b2d779f0d..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_generic.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "binary_convolution_kernel_base.h" -#include - -namespace kernel_selector { - -class BinaryConvolutionKernelGeneric : public BinaryConvolutionKernelBase { -public: - using Parent = BinaryConvolutionKernelBase; - - BinaryConvolutionKernelGeneric() : BinaryConvolutionKernelBase("binary_convolution_gpu_generic") {} - virtual ~BinaryConvolutionKernelGeneric() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; - DeviceFeaturesKey get_required_device_features_key(const Params& params, const optional_params& /*options*/) const override; - -protected: - WeightsLayout GetPreferredWeightLayout(const binary_convolution_params &) const override { - return WeightsLayout::os_is_yx_osv32_isv32p; - } - JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const override; - bool Validate(const Params& p, const optional_params& o) const override; - DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.cpp deleted file mode 100644 index 87a61d64c6e7b7..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "binary_convolution_kernel_ref.h" -#include - -namespace kernel_selector { - -ParamsKey BinaryConvolutionKernelRef::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::BINARY); - k.EnableInputWeightsType(WeightsType::BINARY); - k.EnableOutputDataType(Datatype::F16); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::INT32); - k.EnableOutputDataType(Datatype::BINARY); - k.EnableInputLayout(DataLayout::b_fs_yx_32fp); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::b_fs_yx_32fp); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableDilation(); - k.EnableNonBiasTerm(); - k.EnableBatching(); - return k; -} - -BinaryConvolutionKernelBase::DispatchData BinaryConvolutionKernelRef::SetDefault(const binary_convolution_params& params, - int) const { - DispatchData dispatchData = BinaryConvolutionKernelBase::SetDefault(params); - - const auto& out = params.outputs[0]; - - auto b = out.Batch().v; - auto f = out.Feature().v; - auto y = out.Y().v; - auto x = out.X().v; - - dispatchData.gws[0] = b; - dispatchData.gws[1] = f; - dispatchData.gws[2] = x * y; - - dispatchData.lws[0] = 1; - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; - - return dispatchData; -} - -KernelsPriority BinaryConvolutionKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return DONT_USE_IF_HAVE_SOMETHING_ELSE; -} - -JitConstants BinaryConvolutionKernelRef::GetJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const { - auto jit = Parent::GetJitConstants(params, dispatchData); - - int pad_physical_val = params.pad_value == -1.0f ? 0x00000000 : 0xFFFFFFFF; - int leftovers_mask = (0xFFFFFFFF >> (32 - params.inputs[0].Feature().v % 32)); - jit.AddConstant(MakeJitConstant("INPUT0_FEATURE_NUM_PACKED", CeilDiv(params.inputs[0].Feature().v, 32))); - jit.AddConstant(MakeJitConstant("FEATURE_PACK_SIZE", 32)); - jit.AddConstant(MakeJitConstant("OFM_BLOCK_SIZE", 32)); - jit.AddConstant(MakeJitConstant("EXCLUDE_PAD", params.pad_value == 0.0f)); - jit.AddConstant(MakeJitConstant("PAD_VALUE", pad_physical_val)); - jit.AddConstant(MakeJitConstant("LEFTOVERS", params.inputs[0].Feature().v % 32 != 0)); - jit.AddConstant(MakeJitConstant("LEFTOVERS_MASK", leftovers_mask)); - - return jit; -} - -KernelsData BinaryConvolutionKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { - return GetTunedKernelsDataByIndex(params, options); -} - -bool BinaryConvolutionKernelRef::Validate(const Params& p, const optional_params& o) const { - if (!BinaryConvolutionKernelBase::Validate(p, o) || !ConvolutionBinaryCheckInput(p, o)) - return false; - - const auto& params = static_cast(p); - - if (!params.fused_ops.empty()) - return false; - - return true; -} - -JitConstants BinaryConvolutionKernelRef::GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& /*kd*/) const { - JitConstants jit = {}; - - auto input_dt = GetUnitType(params); - FusedOpsConfiguration conf = {"", {"b", "f", "y", "x"}, "res", input_dt, 1 }; - jit.Merge(MakeFusedOpsJitConstants(params, {conf})); - - return jit; -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.h deleted file mode 100644 index f993acd1810b91..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_ref.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "binary_convolution_kernel_base.h" -#include - -namespace kernel_selector { - -class BinaryConvolutionKernelRef : public BinaryConvolutionKernelBase { -public: - using Parent = BinaryConvolutionKernelBase; - - BinaryConvolutionKernelRef() : BinaryConvolutionKernelBase("binary_convolution_gpu_ref") {} - virtual ~BinaryConvolutionKernelRef() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; - -protected: - WeightsLayout GetPreferredWeightLayout(const binary_convolution_params &) const override { - return WeightsLayout::os_is_yx_osv32_isv32p; - } - JitConstants GetFusedPrimitivesJitConstants(const binary_convolution_params& params, - const DispatchData& dispatchData) const override; - bool Validate(const Params& p, const optional_params& o) const override; - DispatchData SetDefault(const binary_convolution_params& arg, int autoTuneIndex = -1) const override; - JitConstants GetJitConstants(const binary_convolution_params& params, const DispatchData& dispatchData) const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp deleted file mode 100644 index 5bb0a615002a25..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "binary_convolution_kernel_selector.h" -#include "binary_convolution_kernel_ref.h" -#include "binary_convolution_kernel_generic.h" -#include "binary_convolution_kernel_1x1.h" -#include "binary_convolution_kernel_1x1_b_fs_yx_fsv16.h" - -namespace kernel_selector { -binary_convolution_kernel_selector::binary_convolution_kernel_selector() { - Attach(); - Attach(); - Attach(); - Attach(); -} - -KernelsData binary_convolution_kernel_selector::GetBestKernels(const Params& params, - const optional_params& options) const { - return GetNaiveBestKernel(params, options, KernelType::BINARY_CONVOLUTION); -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h deleted file mode 100644 index 65ed563de5b2de..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_kernel_selector.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "kernel_selector.h" - -namespace kernel_selector { -class binary_convolution_kernel_selector : public kernel_selector_base { -public: - static binary_convolution_kernel_selector& Instance() { - static binary_convolution_kernel_selector instance_; - return instance_; - } - - binary_convolution_kernel_selector(); - - virtual ~binary_convolution_kernel_selector() {} - - KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.cpp deleted file mode 100644 index 070d02e7118909..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "binary_convolution_params.h" -#include -#include - -namespace kernel_selector { -std::string binary_convolution_params::to_string() const { - std::stringstream s; - - s << base_params::to_string() << "_"; - s << filterSize.x << "_" << filterSize.y << "_"; - s << stride.x << "_" << stride.y << "_"; - s << dilation.x << "_" << dilation.y << "_"; - s << padding.x << "_" << padding.y << "_"; - s << 1; - s << groups; - - return s.str(); -} - -std::string binary_convolution_params::to_cache_string_v2() const { - std::stringstream s; - - s << weight_bias_params::to_cache_string_v2() << ";"; - s << filterSize.x << "_" << filterSize.y << "_" << filterSize.z << ";"; - s << stride.x << "_" << stride.y << "_" << stride.z << ";"; - s << dilation.x << "_" << dilation.y << "_" << dilation.z << ";"; - s << padding.x << "_" << padding.y << "_" << padding.z << ";"; - s << 1 << ";"; - s << groups; - - return s.str(); -} - -ParamsKey binary_convolution_params::GetParamsKey() const { - ParamsKey k = weight_bias_params::GetParamsKey(); - - if (dilation.x != 1 || - dilation.y != 1) { - k.EnableDilation(); - } - - if (groups > 1) { - k.EnableGroupedConvolution(); - } - - return k; -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.h deleted file mode 100644 index 9f9d9b38f39bf0..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/binary_convolution/binary_convolution_params.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "weight_bias_params.h" -#include -#include - -namespace kernel_selector { - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// binary_convolution_params -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct binary_convolution_params : public weight_bias_params { - binary_convolution_params() : weight_bias_params(KernelType::BINARY_CONVOLUTION) {} - - uSize filterSize; - uSize stride; - uSize dilation; - uSize padding; - Datatype out_dt = Datatype::UNSUPPORTED; - float pad_value = 0.0f; - uint32_t groups = 1; - - std::string to_string() const override; - std::string to_cache_string_v2() const override; - ParamsKey GetParamsKey() const override; -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// convolution_optional_params -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct binary_convolution_optional_params : weight_bias_optional_params { - binary_convolution_optional_params() : weight_bias_optional_params(KernelType::BINARY_CONVOLUTION) {} -}; - -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp index 694799398342a1..da10f037941ba8 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_base.cpp @@ -14,34 +14,12 @@ bool QuantizeKernelBase::Validate(const Params& p, const optional_params&) const if (params.inputs.size() != 5) return false; - // Binary packed output is possible only with bfyx input and b_fs_yx_32fp output - if (params.outputs[0].GetDType() == Datatype::BINARY && - (params.outputs[0].GetLayout() != DataLayout::b_fs_yx_32fp || params.inputs[0].GetLayout() != DataLayout::bfyx)) - return false; - return true; } JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const { JitConstants jit = MakeBaseParamsJitConstants(params); - if (params.packed_binary_output) { - jit.AddConstant(MakeJitConstant("PACKED_BINARY_OUTPUT", params.packed_binary_output)); - jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_PACKED", CeilDiv(params.outputs[0].Feature().v, 32))); - jit.AddConstant(MakeJitConstant("OC_BLOCK_SIZE", 32)); - if ((params.inputs[3].LogicalSize() == 1 && params.inputs[4].LogicalSize() == 1) || - (params.inputs[3].LogicalSize() == params.inputs[3].Batch().v && - params.inputs[4].LogicalSize() == params.inputs[4].Batch().v)) { - jit.AddConstant(MakeJitConstant("SINGLE_OUT_VAL", 1)); - - } else if (params.inputs[3].LogicalSize() == params.outputs[0].Feature().v && - params.inputs[4].LogicalSize() == params.outputs[0].Feature().v) { - jit.AddConstant(MakeJitConstant("PER_CHANNEL_OUT_VAL", 1)); - } else { - throw std::runtime_error("Unsupported const blob shape in node " + params.layerID); - } - } - jit.AddConstant(MakeJitConstant("LEVELS", static_cast(params.levels))); jit.AddConstant(MakeJitConstant("LWS_0", dispatchData.lws[0])); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_params.h index 692acc7b9d49b1..d781ac397d0180 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_params.h @@ -14,7 +14,6 @@ struct quantize_params : public base_params { quantize_params() : base_params(KernelType::QUANTIZE) , levels(0) - , packed_binary_output(false) , scale_shift_opt(false) , has_post_scale(true) , has_post_shift(true) @@ -38,7 +37,6 @@ struct quantize_params : public base_params { , out_shift(0.0f) { } int levels; - bool packed_binary_output; bool scale_shift_opt; bool has_post_scale; bool has_post_shift; @@ -65,8 +63,6 @@ struct quantize_params : public base_params { ParamsKey GetParamsKey() const override { auto k = base_params::GetParamsKey(); - if (packed_binary_output) - k.EnableQuantizePackedBinaryOutput(); if (scale_shift_opt) k.EnableQuantizeScaleShiftOpt(); return k; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_ref.cpp index 131cbb5f9e751a..8b5c16c9518ef7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/quantize/quantize_kernel_ref.cpp @@ -17,14 +17,12 @@ ParamsKey QuantizeKernelRef::GetSupportedKey() const { k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::INT8); - k.EnableOutputDataType(Datatype::BINARY); k.EnableAllInputLayout(); k.EnableAllOutputLayout(); k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); k.EnableDifferentTypes(); - k.EnableQuantizePackedBinaryOutput(); k.EnableDynamicShapesSupport(); return k; } @@ -34,7 +32,7 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params) auto output = params.outputs[0]; - if (output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) { + if (output.GetLayout() == DataLayout::b_fs_yx_fsv16) { dispatchData.gws[0] = output.Batch().v; dispatchData.gws[1] = Align(output.Feature().v, sub_group_size); dispatchData.gws[2] = output.Y().v * output.X().v * output.Z().v; @@ -44,7 +42,7 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params) dispatchData.lws[2] = 1; } else { dispatchData.gws[0] = output.Batch().v; - dispatchData.gws[1] = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v; + dispatchData.gws[1] = output.Feature().v; dispatchData.gws[2] = Align(output.X().v * output.Y().v * output.Z().v * output.W().v * output.U().v * output.V().v, 16); dispatchData.lws[0] = 1; @@ -57,7 +55,7 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params) JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const { JitConstants jit = Parent::GetJitConstants(params, dispatchData); - if (params.outputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) { + if (params.outputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16) { jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); } return jit; @@ -68,15 +66,6 @@ bool QuantizeKernelRef::Validate(const Params& p, const optional_params&) const if (params.inputs.size() != 5) return false; - // Binary packed output is possible only with b_fs_yx_32fp output layout and some input layouts - if (params.outputs[0].GetDType() == Datatype::BINARY && - (params.outputs[0].GetLayout() != DataLayout::b_fs_yx_32fp || - (params.inputs[0].GetLayout() != DataLayout::bfyx && - params.inputs[0].GetLayout() != DataLayout::bfzyx && - params.inputs[0].GetLayout() != DataLayout::b_fs_zyx_fsv16 && - params.inputs[0].GetLayout() != DataLayout::b_fs_yx_fsv16 && - params.inputs[0].GetLayout() != DataLayout::fs_b_yx_fsv32))) - return false; return true; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp index 7fecbc14345cd0..7aea6878e13213 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.cpp @@ -22,7 +22,6 @@ inline uint32_t SubGroupSize(WeightsLayout l) { case WeightsLayout::os_i_osv16__ai8: case WeightsLayout::i_yxs_os_yxsv2_osv16: case WeightsLayout::iy_xs_os_xsv2_osv16__ao32: - case WeightsLayout::os_is_yx_osv32_isv32p: case WeightsLayout::os_is_yx_isv16_osv16: case WeightsLayout::os_is_zyx_isv16_osv16: case WeightsLayout::is_os_zyx_isv16_osv16: diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.cpp deleted file mode 100644 index 41e922d8efe8dd..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "reorder_kernel_binary.h" -#include "kernel_selector_utils.h" -#include - -namespace kernel_selector { -ParamsKey ReorderKernelBinary::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::F16); - k.EnableInputDataType(Datatype::F32); - k.EnableInputDataType(Datatype::BINARY); - k.EnableOutputDataType(Datatype::BINARY); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::F16); - k.EnableDifferentTypes(); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableInputLayout(DataLayout::b_fs_yx_32fp); - k.EnableOutputLayout(DataLayout::b_fs_yx_32fp); - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableBatching(); - return k; -} - -JitConstants ReorderKernelBinary::GetJitConstants(const reorder_params& params) const { - auto jit = ReorderKernelBase::GetJitConstants(params); - KernelData kd = KernelData::Default(params); - reorder_params& newParams = *static_cast(kd.params.get()); - - const auto& input = newParams.inputs[0]; - jit.AddConstant(MakeJitConstant("ELEMENTS_COUNT", input.LogicalSize())); - jit.AddConstant(MakeJitConstant("IFM_PACK_SIZE", 32)); - - if (input.GetDType() == Datatype::BINARY) { - jit.AddConstant(MakeJitConstant("BINARY_INPUT", 1)); - jit.AddConstant(MakeJitConstant("INPUT_PACKED_FEATURES_NUM", CeilDiv(input.Feature().v, 16))); - } - - if (params.outputs[0].GetDType() == Datatype::BINARY) { - jit.AddConstant(MakeJitConstant("BINARY_OUTPUT", 1)); - jit.AddConstant(MakeJitConstant("OUTPUT_PACKED_FEATURES_NUM", CeilDiv(params.outputs[0].Feature().v, 32))); - } - - return jit; -} - -ReorderKernelBinary::DispatchData ReorderKernelBinary::SetDefault(const reorder_params& params) const { - DispatchData dispatchData; - auto in_layout = params.inputs[0].GetLayout(); - auto out_layout = params.outputs[0].GetLayout(); - std::vector> dims_by_gws = {{ Tensor::DataChannelName::BATCH }, - { Tensor::DataChannelName::FEATURE }, - { Tensor::DataChannelName::X, Tensor::DataChannelName::Y }}; - - const auto& input = params.inputs[0]; - - dispatchData.gws = { input.Batch().v, CeilDiv(input.Feature().v, 32), input.Y().v * input.X().v }; - dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout, dims_by_gws); - - return dispatchData; -} - -KernelsData ReorderKernelBinary::GetKernelsData(const Params& params, const optional_params& options) const { - assert(params.GetType() == KernelType::REORDER); - - const reorder_params& orgParams = static_cast(params); - - if (orgParams.inputs[0].GetDType() != Datatype::BINARY && - orgParams.outputs[0].GetDType() != Datatype::BINARY) - return {}; - - if (orgParams.inputs[0].GetDType() == Datatype::BINARY && - orgParams.inputs[0].GetLayout() != DataLayout::b_fs_yx_32fp) - return {}; - - if (orgParams.outputs[0].GetDType() == Datatype::BINARY && - orgParams.outputs[0].GetLayout() != DataLayout::b_fs_yx_32fp) - return {}; - - return GetCommonKernelsData(orgParams, options); -} - -KernelsPriority ReorderKernelBinary::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return FORCE_PRIORITY_6; -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.h deleted file mode 100644 index 511c161196f216..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_binary.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "reorder_kernel_base.h" - -namespace kernel_selector { -class ReorderKernelBinary : public ReorderKernelBase { -public: - ReorderKernelBinary() : ReorderKernelBase("reorder_data_binary") {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const reorder_params& params) const override; - DispatchData SetDefault(const reorder_params& arg) const override; - -protected: - ParamsKey GetSupportedKey() const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_selector.cpp index 80dffe7bcd0dc6..0a56ae7b803dc5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_selector.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_selector.cpp @@ -8,7 +8,6 @@ #include "reorder_from_winograd_2x3_kernel.h" #include "reorder_to_winograd_2x3_kernel.h" #include "reorder_kernel_to_yxfb_batched.h" -#include "reorder_kernel_binary.h" #include "reorder_biplanar_nv12.h" #include "reorder_kernel_fs_b_yx_fsv32_to_bfyx.h" #include "reorder_kernel_bfyx_to_blocked_format.h" @@ -18,7 +17,6 @@ namespace kernel_selector { reorder_kernel_selector::reorder_kernel_selector() { Attach(); - Attach(); Attach(); Attach(); Attach(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.cpp deleted file mode 100644 index 5fa9a2f56ad4e6..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "reorder_weights_binary_kernel.h" -#include "kernel_selector_utils.h" -#include - -namespace kernel_selector { -ParamsKey ReorderWeightsBinaryKernel::GetSupportedKey() const { - ParamsKey k; - k.EnableInputWeightsType(WeightsType::BINARY); - k.EnableOutputWeightsType(WeightsType::BINARY); - k.EnableInputWeightsLayout(WeightsLayout::oiyx); - k.EnableOutputWeightsLayout(WeightsLayout::os_is_yx_osv32_isv32p); - k.EnableDifferentTypes(); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - return k; -} - -ReorderWeightsBinaryKernel::DispatchData ReorderWeightsBinaryKernel::SetDefault( - const reorder_weights_params& params) const { - const auto& out = params.output; - - DispatchData dispatchData; - - dispatchData.gws = { out.OFM().v, CeilDiv(out.IFM().v, 32), out.X().v * out.Y().v }; - dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); - - return dispatchData; -} - -KernelsData ReorderWeightsBinaryKernel::GetKernelsData(const Params& params, const optional_params& options) const { - const reorder_weights_params& orgParams = static_cast(params); - return GetCommonKernelsData(orgParams, options); -} - -KernelsPriority ReorderWeightsBinaryKernel::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { - return FORCE_PRIORITY_4; -} -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.h deleted file mode 100644 index 766ad443fcae1e..00000000000000 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_binary_kernel.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "reorder_kernel_base.h" - -namespace kernel_selector { -class ReorderWeightsBinaryKernel : public ReorderKernelBase { -public: - ReorderWeightsBinaryKernel() : ReorderKernelBase("reorder_weights_binary") {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; - DispatchData SetDefault(const reorder_weights_params& arg) const override; - -protected: - ParamsKey GetSupportedKey() const override; -}; -} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp index fbe010f111bb76..c43fd014212818 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_weights_kernel_selector.cpp @@ -8,7 +8,6 @@ #include "reorder_weights_winograd_6x3_kernel.h" #include "reorder_weights_image_fyx_b_kernel.h" #include "reorder_weights_image_winograd_6x3_kernel.h" -#include "reorder_weights_binary_kernel.h" #include "reorder_weights_opt.h" #include "reorder_weights_int4.h" @@ -20,7 +19,6 @@ ReorderWeightsKernelSelector::ReorderWeightsKernelSelector() { Attach(); Attach(); Attach(); - Attach(); Attach(); Attach(); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp index 3fcd03bdece0db..e4314d9cc11f3d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp @@ -61,7 +61,6 @@ DataTensor::DataChannelArray DataTensor::dataChannelArray {{ { DataLayout::bfzyx, { 0, 1, 2, -1, -1, -1, 3, 4 } }, { DataLayout::bzyxf, { 1, 2, 3, -1, -1, -1, 0, 4 } }, { DataLayout::fs_b_yx_fsv32, { 0, 1, -1, -1, -1, -1, 3, 2 } }, - { DataLayout::b_fs_yx_32fp, { 0, 1, -1, -1, -1, -1, 2, 3 } }, { DataLayout::bfwzyx, { 0, 1, 2, 3, -1, -1, 4, 5 } }, { DataLayout::bfuwzyx, { 0, 1, 2, 3, 4, -1, 5, 6 } }, { DataLayout::bfvuwzyx, { 0, 1, 2, 3, 4, 5, 6, 7 } }, @@ -151,7 +150,6 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{ { WeightsLayout::os_is_zyx_osv32_isv4, { 0, 1, 2, 3, 4, -1 } }, { WeightsLayout::oizyx, { 0, 1, 2, 3, 4, -1 } }, { WeightsLayout::iozyx, { 0, 1, 2, 4, 3, -1 } }, - { WeightsLayout::os_is_yx_osv32_isv32p, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::os_is_zyx_isv16_osv16, { 0, 1, 2, 3, 4, -1 } }, { WeightsLayout::os_is_yx_isv16_osv16, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::is_os_yx_osv8_isv4, { 0, 1, -1, 3, 2, -1 } }, @@ -261,10 +259,6 @@ NDims DataTensor::GetSimpleDims(const std::vector& d, DataLayout l) { assert(newDims.size() == 5); newDims[3] = RoundUp(newDims[3], 32); break; - case b_fs_yx_32fp: - assert(newDims.size() == 4); - newDims[3] = RoundUp(newDims[3], 32); - break; case fs_b_yx_fsv32: assert(newDims.size() == 4); newDims[3] = RoundUp(newDims[3], 32); @@ -788,11 +782,6 @@ NDims WeightsTensor::GetSimpleDims(const std::vector& d, WeightsLayout l newDims[3] = RoundUp(newDims[3], 4); newDims[4] = RoundUp(newDims[4], 32); break; - case os_is_yx_osv32_isv32p: - assert(newDims.size() == 4); - newDims[2] = RoundUp(newDims[2], 32); // ic - newDims[3] = RoundUp(newDims[3], 32); // oc - break; case os_is_yx_isv16_osv16: assert(newDims.size() == 4); newDims[2] = RoundUp(newDims[2], 16); diff --git a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h index 205b3198a7a103..23a56729f1ea74 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h +++ b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h @@ -72,7 +72,6 @@ enum DataLayout { bfzyx, // batch+feature+3D spatial bzyxf, fs_b_yx_fsv32, // for FP16 kernels, 32 features to avoid partial writes - b_fs_yx_32fp, // bfyx with blocks of 16 packed binary input channels bfwzyx, // batch, feature, 4D spatial bfuwzyx, // batch, feature, 5D spatial bfvuwzyx, // batch, feature, 6D spatial @@ -191,7 +190,6 @@ enum WeightsLayout { os_is_yx_osv4_isv16, oizyx, iozyx, - os_is_yx_osv32_isv32p, // 2 blocks: 32 packed binary in channels and 32 output channels os_is_osv32_isv32_swizzled_by_4, // for weights for 1x1 IMAD convolution os_i_yxs_osv4_yxsv4, // for weights for depthwise IMAD convolution os_y_is_x_osv8_isv2, diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index e152e0367fea8c..0eeeb3555a0900 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -158,7 +158,6 @@ std::shared_ptr Graph::get_runtime_model(std::vectorget_bilinear_interpolation_pad()); } -static void CreateBinaryConvolutionOp(ProgramBuilder& p, const std::shared_ptr& op) { - validate_inputs_count(op, {2}); - auto inputs = p.GetInputInfo(op); - std::string layerName = layer_type_name_ID(op); - - auto outDims = op->get_output_shape(0); - - std::vector weights = {inputs[1].pid}; - cldnn::data_types calc_precision = cldnn::element_type_to_data_type(op->get_output_element_type(0)); - - auto strides = op->get_strides(); - auto pads_begin = op->get_pads_begin(); - auto dilations = op->get_dilations(); - - // Extend 1d vectors to 2d as 1d can't be handled properly by the graph optimizer for now - strides.resize(std::max(2, strides.size()), 1); - pads_begin.resize(std::max(2, pads_begin.size()), 0); - dilations.resize(std::max(2, dilations.size()), 1); - - auto convPrim = cldnn::binary_convolution(layerName, - inputs[0], - weights, - strides, - pads_begin, - dilations, - tensor_from_dims(outDims), - 1, - op->get_pad_value(), - calc_precision); - - p.add_primitive(*op, convPrim); -} - REGISTER_FACTORY_IMPL(v1, GroupConvolution); REGISTER_FACTORY_IMPL(v1, Convolution); REGISTER_FACTORY_IMPL(v1, ConvolutionBackpropData); REGISTER_FACTORY_IMPL(v1, GroupConvolutionBackpropData); REGISTER_FACTORY_IMPL(v1, DeformableConvolution); REGISTER_FACTORY_IMPL(v8, DeformableConvolution); -REGISTER_FACTORY_IMPL(v1, BinaryConvolution); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.cpp b/src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.cpp new file mode 100644 index 00000000000000..40c4dfa781eed2 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.cpp @@ -0,0 +1,120 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "binary_conv_to_conv.hpp" +#include + +#include "openvino/core/coordinate_diff.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/core/type/float16.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/binary_convolution.hpp" +#include "openvino/op/convolution.hpp" +#include "openvino/op/fake_quantize.hpp" +#include "openvino/op/pad.hpp" +#include "openvino/op/util/attr_types.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace intel_gpu { + +namespace { +template +void convert_packed_bin_to_fp(const uint8_t* src_ptr, DST_T* dst_ptr, size_t size) { + for (size_t i = 0; i < size; i++) { + auto val = (src_ptr[i / 8] >> (i % 8)) & 0x01; + dst_ptr[i] = static_cast(val == 0 ? -1.0f : 1.0f); + } +} +} // namespace + +ConvertBinaryConvolutionToConvolution::ConvertBinaryConvolutionToConvolution() { + using namespace ov::pass::pattern; + + auto binary_fq = [](const Output& node) { + auto fq = std::dynamic_pointer_cast(node.get_node_shared_ptr()); + if (!fq) + return false; + + return fq->get_levels() == 2; + }; + + auto activations_input_m = any_input(); + auto in_lo_m = wrap_type(); + auto in_hi_m = wrap_type(); + auto out_lo_m = wrap_type(); + auto out_hi_m = wrap_type(); + auto fq_m = wrap_type({activations_input_m, in_lo_m, in_hi_m, out_lo_m, out_hi_m}, binary_fq); + auto weights_input_m = wrap_type(type_matches(ov::element::u1)); + auto binary_conv_m = wrap_type({fq_m, weights_input_m}); + + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + + auto binary_conv = std::dynamic_pointer_cast(pattern_map.at(binary_conv_m).get_node_shared_ptr()); + auto activations = pattern_map.at(activations_input_m); + auto weights = std::dynamic_pointer_cast(pattern_map.at(weights_input_m).get_node_shared_ptr()); + auto fp_element_type = activations.get_element_type(); + + ov::Tensor new_weights_data(fp_element_type, weights->get_output_shape(0)); + auto src_ptr = static_cast(weights->get_data_ptr()); + auto size = ov::shape_size(weights->get_shape()); + switch (fp_element_type) { + case ov::element::f16: convert_packed_bin_to_fp(src_ptr, static_cast(new_weights_data.data()), size); break; + case ov::element::f32: convert_packed_bin_to_fp(src_ptr, static_cast(new_weights_data.data()), size); break; + default: return false; + } + + auto new_weights_const = std::make_shared(new_weights_data); + auto rank = activations.get_partial_shape().size(); + + auto in_lo = pattern_map.at(in_lo_m); + auto in_hi = pattern_map.at(in_hi_m); + auto out_lo = std::make_shared(fp_element_type, ov::Shape(rank, 1), std::vector{-1.0f}); + auto out_hi = std::make_shared(fp_element_type, ov::Shape(rank, 1), std::vector{1.0f}); + + auto new_fq = std::make_shared(activations, in_lo, in_hi, out_lo, out_hi, 2); + std::vector> result_nodes = { new_fq }; + + std::shared_ptr conv_input = new_fq; + auto pb = binary_conv->get_pads_begin(); + auto pe = binary_conv->get_pads_end(); + if (binary_conv->get_pad_value() != 0.0f) { + pb.insert(pb.begin(), rank - pb.size(), 0); + pe.insert(pe.begin(), rank - pe.size(), 0); + auto pad_b = std::make_shared(ov::element::i32, ov::Shape{pb.size()}, pb); + auto pad_e = std::make_shared(ov::element::i32, ov::Shape{pe.size()}, pe); + auto pad_v = std::make_shared(fp_element_type, ov::Shape{}, std::vector{binary_conv->get_pad_value()}); + auto pad = std::make_shared(new_fq, pad_b, pad_e, pad_v, ov::op::PadMode::CONSTANT); + conv_input = pad; + + pb = ov::CoordinateDiff(binary_conv->get_pads_begin().size(), 0); + pe = ov::CoordinateDiff(binary_conv->get_pads_end().size(), 0); + result_nodes.push_back(pad); + } + auto convolution = std::make_shared(conv_input, + new_weights_const, + binary_conv->get_strides(), + pb, + pe, + binary_conv->get_dilations(), + ov::op::PadType::EXPLICIT); + + result_nodes.push_back(convolution); + convolution->set_friendly_name(binary_conv->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), result_nodes); + ov::replace_node(binary_conv, convolution); + return true; + }; + + auto m = std::make_shared(binary_conv_m, "ConvertBinaryConvolutionToConvolution"); + this->register_matcher(m, callback); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.hpp b/src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.hpp new file mode 100644 index 00000000000000..780d3b19d345c5 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/binary_conv_to_conv.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + +class ConvertBinaryConvolutionToConvolution: public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertBinaryConvolutionToConvolution", "0"); + ConvertBinaryConvolutionToConvolution(); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 68268790c62860..9d1bdbe01d275c 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -116,6 +116,7 @@ #include "plugin/transformations/move_fc_reshape_to_weights.hpp" #include "plugin/transformations/convert_fc_to_compressed.hpp" #include "plugin/transformations/rms_fusion.hpp" +#include "plugin/transformations/binary_conv_to_conv.hpp" #include "transformations/low_precision/mark_dequantization_subgraph.hpp" #include "low_precision/pull_reshape_through_dequantization.hpp" @@ -260,6 +261,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); } + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/plugins/intel_gpu/src/runtime/format.cpp b/src/plugins/intel_gpu/src/runtime/format.cpp index 095bda14bbf97d..db229fe2abe2b0 100644 --- a/src/plugins/intel_gpu/src/runtime/format.cpp +++ b/src/plugins/intel_gpu/src/runtime/format.cpp @@ -48,7 +48,6 @@ static const std::map format_traits_map { FMT_TRAITS(bfuwzyx, 1, 1, 5, 0, {0, 1, 2, 3, 4, 5, 6}, "bfuwzyx", "bfxyzwu", {}), FMT_TRAITS(bfvuwzyx, 1, 1, 6, 0, {0, 1, 2, 3, 4, 5, 6, 7}, "bfvuwzyx", "bfxyzwuv", {}), FMT_TRAITS(fs_b_yx_fsv32, 1, 1, 2, 0, {1, 0, 2, 3}, "fbyx", "bfxy", {{1, 32}}), - FMT_TRAITS(b_fs_yx_32fp, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy", {{1, 32}}), FMT_TRAITS(b_fs_zyx_fsv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{1, 16}}), FMT_TRAITS(bs_fs_zyx_bsv16_fsv32, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{0, 16 }, {1, 32}}), FMT_TRAITS(bs_fs_zyx_bsv16_fsv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{0, 16 }, {1, 16}}), @@ -132,7 +131,6 @@ static const std::map format_traits_map { FMT_TRAITS(os_is_yx_osv32_isv4_swizzled_by_2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 4}}), FMT_TRAITS(os_is_yx_osv32_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 4}}), FMT_TRAITS(os_is_zyx_osv32_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 4}}), - FMT_TRAITS(os_is_yx_osv32_isv32p, 1, 1, 1, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 32}}), FMT_TRAITS(os_is_zyx_isv16_osv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 16}, {0, 16}}), FMT_TRAITS(is_os_zyx_isv16_osv16, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "oixyz", {{1, 16}, {0, 16}}), FMT_TRAITS(is_os_yx_osv8_isv4, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "oixy", {{0, 8}, {1, 4}}), @@ -294,8 +292,7 @@ format format::adjust_to_rank(format fmt, size_t new_rank) { auto is_adjustable = [](const format& fmt) -> bool { return !format::is_weights_format(fmt) && !format::is_image_2d(fmt) && - !format::is_winograd(fmt) && - fmt != format::b_fs_yx_32fp; + !format::is_winograd(fmt); }; // Skip special formats as order + blocking desc may be not enough to properly match them diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp index a18c5bd73210f1..5534e9a1248285 100644 --- a/src/plugins/intel_gpu/src/runtime/layout.cpp +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -151,8 +151,6 @@ static format to_weights_format(format f, bool is_grouped) { return format::o_is_yx_isv16; case format::bs_fs_fsv8_bsv8: return format::os_i_osv8__ai8; - case format::b_fs_yx_32fp: - return format::os_is_yx_osv32_isv32p; default: throw std::invalid_argument("Unable to convert data format " + f.to_string() + " to weights format"); } @@ -377,11 +375,6 @@ size_t layout::get_linear_size() const { sizes[1] = align_to(sizes[1], 4); sizes[0] = align_to(sizes[0], 8); sizes[2] = align_to(sizes[2], 8); - } else if (this->format == cldnn::format::b_fs_yx_32fp) { - sizes[1] = align_to(sizes[1], 32); - } else if (this->format == cldnn::format::os_is_yx_osv32_isv32p) { - sizes[0] = align_to(sizes[0], 32); - sizes[1] = align_to(sizes[1], 32); } else if (this->format == cldnn::format::image_2d_rgba) { sizes[1] = 4; } else if (this->format == cldnn::format::gs_oi_yxs_gsv4_yxsv4 || diff --git a/src/plugins/intel_gpu/tests/unit/fusions/binary_convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/binary_convolution_fusion_test.cpp deleted file mode 100644 index 5079e06e0e8704..00000000000000 --- a/src/plugins/intel_gpu/tests/unit/fusions/binary_convolution_fusion_test.cpp +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "fusion_test_common.hpp" - -#include -#include -#include -#include -#include -#include - -#include - -using namespace cldnn; -using namespace ::tests; - -namespace { - -struct binary_convolution_test_params { - tensor in_shape; - tensor out_shape; - tensor kernel; - ov::Strides stride; - ov::CoordinateDiff pad; - ov::Strides dilation; - uint32_t groups; - data_types data_type; - format input_format; - data_types weights_type; - format weights_format; - data_types default_type; - format default_format; - size_t expected_fused_primitives; - size_t expected_not_fused_primitives; -}; - -class BinaryConvolutionFusingTest : public BaseFusingTest { -public: - void execute(binary_convolution_test_params& p) { - auto input_prim = get_mem(get_input_layout(p)); - network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused); - network network_fused(this->engine, this->topology_fused, cfg_fused); - network_fused.set_input_data("input", input_prim); - network_not_fused.set_input_data("input", input_prim); - - compare(network_not_fused, network_fused, p); - auto find_conv = [](primitive_info& p) -> bool { - if (p.original_id == "conv_prim") - return true; - return false; - }; - - auto pi_fused = network_fused.get_primitives_info(); - auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_conv); - if (info_fused != pi_fused.end()) - std::cout << "kernel: " << info_fused->kernel_id << std::endl; - } - - layout get_input_layout(binary_convolution_test_params& p) { - auto pad = p.pad; - std::vector pad_ = { 0, 0, static_cast(pad[1]), static_cast(pad[0]) }; - return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } }; - } - - layout get_per_channel_layout(binary_convolution_test_params& p) { - return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} }; - } -}; - -} // namespace - -#define CASE_BIN_CONV1 { 1, 16, 4, 5 }, { 1, 16, 4, 5 }, { 1, 1, 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, 1, data_types::u1, format::b_fs_yx_32fp, data_types::u1, format::os_is_yx_osv32_isv32p, data_types::f32, format::bfyx -#define CASE_BIN_CONV2 { 1, 16, 4, 5 }, { 1, 30, 4, 5 }, { 1, 1, 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::u1, format::b_fs_yx_32fp, data_types::u1, format::os_is_yx_osv32_isv32p, data_types::f32, format::bfyx -#define CASE_BIN_CONV3 { 1, 184, 12, 21 }, { 1, 224, 12, 21 }, { 1, 1, 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::u1, format::b_fs_yx_32fp, data_types::u1, format::os_is_yx_osv32_isv32p, data_types::f32, format::bfyx - -/* ----------------------------------------------------------------------------------------------------- */ -/* -------------------------------------- binary convolution cases ------------------------------------- */ -/* ----------------------------------------------------------------------------------------------------- */ - -class conv_bin_activation : public BinaryConvolutionFusingTest {}; -TEST_P(conv_bin_activation, basic) { - auto p = GetParam(); - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - activation("activation", input_info("bin_conv_prim"), activation_func::relu), - reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_bin_activation, ::testing::ValuesIn(std::vector{ - binary_convolution_test_params{ CASE_BIN_CONV1, 2, 3 }, -})); - -class conv_bin_scale_activation : public BinaryConvolutionFusingTest {}; -TEST_P(conv_bin_scale_activation, basic) { - auto p = GetParam(); - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - eltwise("scale", { input_info("bin_conv_prim"), input_info("scale_data") }, eltwise_mode::prod, p.default_type), - activation("activation", input_info("scale"), activation_func::relu), - reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_bin_scale_activation, ::testing::ValuesIn(std::vector{ - binary_convolution_test_params{ CASE_BIN_CONV1, 2, 4 }, - binary_convolution_test_params{ CASE_BIN_CONV2, 2, 4 }, -})); - -class conv_bin_quantize_bin : public BinaryConvolutionFusingTest {}; -TEST_P(conv_bin_quantize_bin, channel_wise_quantize) { - auto p = GetParam(); - auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random); - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - data("in_lo", in_thresh), - data("in_hi", in_thresh), - data("out_lo", get_mem(get_per_channel_layout(p), -1)), - data("out_hi", get_mem(get_per_channel_layout(p), 1)), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - quantize("quantize_data", input_info("bin_conv_prim"), input_info("in_lo"), input_info("in_hi"), - input_info("out_lo"), input_info("out_hi"), 2, data_types::u1), - reorder("reorder_bfyx", input_info("quantize_data"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -TEST_P(conv_bin_quantize_bin, blob_wise_quantize) { - auto p = GetParam(); - auto in_thresh = get_mem(get_single_element_layout(p), min_random, max_random); - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - data("in_lo", in_thresh), - data("in_hi", in_thresh), - data("out_lo", get_mem(get_single_element_layout(p), -1)), - data("out_hi", get_mem(get_single_element_layout(p), 1)), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - quantize("quantize_data", input_info("bin_conv_prim"), input_info("in_lo"), input_info("in_hi"), - input_info("out_lo"), input_info("out_hi"), 2, data_types::u1), - reorder("reorder_bfyx", input_info("quantize_data"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_bin_quantize_bin, ::testing::ValuesIn(std::vector{ - binary_convolution_test_params{ CASE_BIN_CONV1, 2, 3 }, - binary_convolution_test_params{ CASE_BIN_CONV2, 2, 3 }, -})); - -class conv_bin_scale_conv_dw : public BinaryConvolutionFusingTest {}; -TEST_P(conv_bin_scale_conv_dw, dw_kernel_3x3_stride2) { - auto p = GetParam(); - auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3)); - auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor }; - - ov::Strides dw_stride = {2, 2}; - ov::Strides dw_dilation = {1, 1}; - ov::CoordinateDiff dw_pad = p.pad; - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - data("weights_dw", get_mem(dw_weights_layout, -127, 127)), - data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - eltwise("scale", { input_info("bin_conv_prim"), input_info("scale_data") }, eltwise_mode::prod, p.default_type), - convolution("conv_dw", input_info("scale"), "weights_dw", "", p.out_shape.feature[0], dw_stride, dw_dilation, dw_pad, dw_pad, true), - reorder("reorder_bfyx", input_info("conv_dw"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -TEST_P(conv_bin_scale_conv_dw, dw_kernel_3x3_stride1) { - auto p = GetParam(); - auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3)); - auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor }; - - ov::Strides dw_stride = {1, 1}; - ov::Strides dw_dilation = {1, 1}; - ov::CoordinateDiff dw_pad = p.pad; - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - data("weights_dw", get_mem(dw_weights_layout, -127, 127)), - data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - eltwise("scale", { input_info("bin_conv_prim"), input_info("scale_data") }, eltwise_mode::prod, p.default_type), - convolution("conv_dw", input_info("scale"), "weights_dw", "", p.out_shape.feature[0], dw_stride, dw_dilation, dw_pad, dw_pad, true), - reorder("reorder_bfyx", input_info("conv_dw"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_bin_scale_conv_dw, ::testing::ValuesIn(std::vector{ - binary_convolution_test_params{ CASE_BIN_CONV2, 3, 4 }, - binary_convolution_test_params{ CASE_BIN_CONV3, 3, 4 }, -})); - -class conv_bin_scale_conv_dw_prelu : public BinaryConvolutionFusingTest {}; -TEST_P(conv_bin_scale_conv_dw_prelu, dw_kernel_3x3_stride2) { - auto p = GetParam(); - auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3)); - auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor }; - - ov::Strides dw_stride = {2, 2}; - ov::Strides dw_dilation = {1, 1}; - ov::CoordinateDiff dw_pad = p.pad; - auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random); - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - data("weights_dw", get_mem(dw_weights_layout, -127, 127)), - data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - eltwise("scale", { input_info("bin_conv_prim"), input_info("scale_data") }, eltwise_mode::prod, p.default_type), - convolution("conv_dw", input_info("scale"), "weights_dw", "", p.out_shape.feature[0], dw_stride, dw_dilation, dw_pad, dw_pad, true), - data("slope_data", get_mem(get_per_channel_layout(p))), - activation("activation", input_info("conv_dw"), "slope_data", activation_func::relu_negative_slope), - reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -TEST_P(conv_bin_scale_conv_dw_prelu, dw_kernel_3x3_stride1) { - auto p = GetParam(); - auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3)); - auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor }; - - ov::Strides dw_stride = {1, 1}; - ov::Strides dw_dilation = {1, 1}; - ov::CoordinateDiff dw_pad = p.pad; - auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random); - create_topologies( - input_layout("input", get_input_layout(p)), - data("weights", get_mem(get_weights_layout(p), -127, 127)), - data("weights_dw", get_mem(dw_weights_layout, -127, 127)), - data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)), - binary_convolution("bin_conv_prim", input_info("input"), { "weights" }, p.stride, p.pad, p.dilation, p.out_shape, p.groups), - eltwise("scale", { input_info("bin_conv_prim"), input_info("scale_data") }, eltwise_mode::prod, p.default_type), - convolution("conv_dw", input_info("scale"), "weights_dw", "", p.out_shape.feature[0], dw_stride, dw_dilation, dw_pad, dw_pad, true), - data("slope_data", get_mem(get_per_channel_layout(p))), - activation("activation", input_info("conv_dw"), "slope_data", activation_func::relu_negative_slope), - reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32) - ); - - tolerance = 1e-5f; - execute(p); -} - -INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_bin_scale_conv_dw_prelu, ::testing::ValuesIn(std::vector{ - binary_convolution_test_params{ CASE_BIN_CONV2, 3, 5 }, - binary_convolution_test_params{ CASE_BIN_CONV3, 3, 5 }, -})); diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp index 58694f91d4c3ab..06c6f7aac1c6a1 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp @@ -114,10 +114,7 @@ class BaseFusingTest : public ::testing::TestWithParam { cldnn::memory::ptr get_mem(cldnn::layout l) { auto prim = engine.allocate_memory(l); tensor s = l.get_tensor(); - if (l.data_type == data_types::u1) { - VF rnd_vec = rg.generate_random_1d(s.count() / 32, min_random, max_random); - set_values(prim, rnd_vec); - } else if (l.data_type == data_types::i8 || l.data_type == data_types::u8) { + if (l.data_type == data_types::i8 || l.data_type == data_types::u8) { VF rnd_vec = rg.generate_random_1d(s.count(), min_random, max_random); set_values(prim, rnd_vec); } else if (l.data_type == data_types::f16) { @@ -134,10 +131,7 @@ class BaseFusingTest : public ::testing::TestWithParam { cldnn::memory::ptr get_mem(cldnn::layout l, float fill_value) { auto prim = engine.allocate_memory(l); tensor s = l.get_tensor(); - if (l.data_type == data_types::u1) { - VF rnd_vec(s.count() / 32, static_cast(fill_value)); - set_values(prim, rnd_vec); - } else if (l.data_type == data_types::f16) { + if (l.data_type == data_types::f16) { VF rnd_vec(s.count(), ov::float16(fill_value).to_bits()); set_values(prim, rnd_vec); } else if (l.data_type == data_types::f32) { @@ -169,10 +163,6 @@ class BaseFusingTest : public ::testing::TestWithParam { VF rnd_vec = rg.generate_random_norepetitions(s.count(), min, max); set_values(prim, rnd_vec); } - else if (l.data_type == data_types::u1) { - VF rnd_vec = rg.generate_random_norepetitions(s.count(), min, max); - set_values(prim, rnd_vec); - } return prim; } @@ -192,9 +182,6 @@ class BaseFusingTest : public ::testing::TestWithParam { } else if (l.data_type == data_types::u8) { VF rnd_vec = rg.generate_random_1d(s.count(), min, max); set_values(prim, rnd_vec); - } else if (l.data_type == data_types::u1) { - VF rnd_vec = rg.generate_random_1d(s.count() / 32, min, max); - set_values(prim, rnd_vec); } return prim; diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/quantize_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/quantize_si_test.cpp index 342386aae479c1..c095f486c93173 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/quantize_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/quantize_si_test.cpp @@ -72,10 +72,6 @@ INSTANTIATE_TEST_SUITE_P(smoke, quantize_test, layout{ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx}, layout{ov::PartialShape{1, 2, 3, 4}, data_types::i8, format::bfyx} }, - { - layout{ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx}, - layout{ov::PartialShape{1, 2, 3, 4}, data_types::u1, format::b_fs_yx_32fp} - }, { layout{ov::PartialShape{1, 2, 3, 4, 5}, data_types::f32, format::bfzyx}, layout{ov::PartialShape{1, 2, 3, 4, 5}, data_types::u8, format::bfzyx} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/binary_convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/binary_convolution_gpu_test.cpp deleted file mode 100644 index 9561f7a94f4941..00000000000000 --- a/src/plugins/intel_gpu/tests/unit/test_cases/binary_convolution_gpu_test.cpp +++ /dev/null @@ -1,495 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "test_utils.h" - -#include -#include -#include -#include - -#include - -using namespace cldnn; -using namespace ::tests; - -// Batch, groups, IC, IW, IH, OC, OW, OH, KH, KW, SH, SW, PH, PW -struct TestParams { - int b; - int g; - - int ic; - int ih; - int iw; - - int oc; - int oh; - int ow; - - int kh; - int kw; - - int sh; - int sw; - - int ph; - int pw; - - float pad_value; - data_types dt; - std::string name; - bool is_caching_test; - - bool isConsistent() const - { - bool res = true; - - res &= (((iw - kw + 2*pw) / sw + 1) == ow); - res &= (((ih - kh + 2*ph) / sh + 1) == oh); - return res; - } - - friend ::std::ostream& operator<<(::std::ostream& os, const TestParams& p) { - return os << "Params: [ b=" << p.b - << "; g=" << p.g - << "; src=[" << p.ic << "; " << p.ih << "; " << p.iw << "]" - << "; dst=[" << p.oc << "; " << p.oh << "; " << p.ow << "]" - << "; k=[" << p.kh << "; " << p.kw << "]" - << "; stride=[" << p.sh << "; " << p.sw << "]" - << "; pad=[" << p.ph << "; " << p.pw << "]" - << "; pad_value=" << p.pad_value - << "; name=" << p.name - << "; is_caching_test=" << p.is_caching_test - << "]"; - } - friend void PrintTo(const TestParams& p, ::std::ostream* os) { - *os << p; - } -}; - -static void fill(cldnn::memory::ptr mem) { - cldnn::mem_lock ptr(mem, get_test_stream()); - for (size_t i = 0; i < div_up(mem->get_layout().count(), 32); i++) { - ptr[i] = (uint32_t)rand() % (1 << 31); - } -} - -template -void compute_ref_conv_bin(const cldnn::memory::ptr src, - const cldnn::memory::ptr weights, - cldnn::memory::ptr dst, - TestParams &p) { - - cldnn::mem_lock src_data(src, get_test_stream()); - cldnn::mem_lock weights_data(weights, get_test_stream()); - cldnn::mem_lock dst_data(dst, get_test_stream()); - - int pack_size = sizeof(data_t_src) * 8; - - int B = p.b; - int NG = p.g; - int IC = p.ic; - int IH = p.ih; - int IW = p.iw; - - int OC = p.oc; - int OH = p.oh; - int OW = p.ow; - - int KH = p.kh; - int KW = p.kw; - - int SH = p.sh; - int SW = p.sw; - - int PH = p.ph; - int PW = p.pw; - - auto extract_bit = [&](data_t_src val, data_t_src bit) -> data_t_src { - return (data_t_src)((val >> bit) & 0x1); - }; - - auto ker = [&](data_t_acc &d, int g, int mb, int oc,int oh, int ow, int& ks) { - for (int ic = 0; ic < IC / NG; ++ic) { - for (int kh = 0; kh < KH; ++kh) - for (int kw = 0; kw < KW; ++kw) { - const int ih = oh * SH - PH + kh; - const int iw = ow * SW - PW + kw; - - int widx = g * OC / NG *IC / NG * KH * KW - + oc * IC / NG * KH * KW - + ic * KH * KW - + kh * KW - + kw; - int iidx = -1; - uint8_t w = extract_bit(weights_data[widx / pack_size], widx % pack_size); - uint8_t s = 0; - - if ((ih < 0 || ih >= IH || iw < 0 || iw >= IW)) { - if (p.pad_value == 0.0f) - continue; - else - s = (p.pad_value == -1.0f) ? 0 : 1; - } else { - if (ic == 0) ks++; - iidx = mb * div_up(IC, pack_size) * IH * IW - + g * div_up(IC, pack_size) / NG * IH * IW - + (ic/pack_size) * IH * IW - + ih * IW - + iw; - - s = extract_bit(src_data[iidx], ic % pack_size); - } - d += (data_t_acc)(s ^ w); - } - } - }; - - for (int g = 0; g < NG; g++) { - for (int b = 0; b < B; b++) { - for (int oc = 0; oc < OC / NG; oc++) { - for (int oh = 0; oh < OH; oh++) { - for (int ow = 0; ow < OW; ow++) { - data_t_acc a = 0; - int ks = 0; - ker(a, g, b, oc, oh, ow, ks); - int dst_off = b * OC * OH* OW - + g * OC / NG * OH * OW - + oc * OH * OW - + oh * OW - + ow; - if (p.pad_value == 0.0f) - dst_data[dst_off] =(data_t_dst)(IC*ks - 2*a); - else - dst_data[dst_off] = (data_t_dst)(IC*KH*KW - 2*a); - } - } - } - } - } -} - -class binary_convolution_test : public ::testing::TestWithParam { - void SetUp() override { - std::cout << GetParam() << std::endl; - ASSERT_TRUE(GetParam().isConsistent()); - } -}; - -TEST_P(binary_convolution_test, conv) { - auto& engine = get_test_engine(); - - // DG2 is not validated for binary convolution: https://github.com/openvinotoolkit/openvino/pull/12486 - if(engine.get_device_info().supports_immad) - return; - - ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); - config.set_property(ov::intel_gpu::optimize_data(true)); - topology topology_bin; - - std::string weights_suffix = "_w_"; - - std::string input_name = "input"; - std::string output_name = "conv"; - - TestParams p = GetParam(); - - ov::Strides stride = {static_cast(p.sh), static_cast(p.sw)}; - ov::CoordinateDiff pad = {p.ph, p.pw}; - ov::Strides dilation = {1,1}; - - cldnn::tensor is_size{ cldnn::batch(p.b), - cldnn::feature(p.ic), - cldnn::spatial(p.iw, p.ih) }; - cldnn::tensor wei_size{ cldnn::batch(p.oc), - cldnn::feature(p.ic), - cldnn::spatial(p.kw, p.kh) }; - cldnn::tensor os_size{ cldnn::batch(p.b), - cldnn::feature(p.oc), - cldnn::spatial(p.ow, p.oh)}; - - auto input = engine.allocate_memory({ cldnn::data_types::u1, cldnn::format::b_fs_yx_32fp, is_size }); - auto weights = engine.allocate_memory({ cldnn::data_types::u1, cldnn::format::bfyx, wei_size }); - auto output_ref = engine.allocate_memory({ cldnn::data_types::f32, cldnn::format::bfyx, os_size }); - - fill(input); - fill(weights); - - compute_ref_conv_bin(input, weights, output_ref, p); - -// print_bin_blob(input,"input"); -// print_bin_blob_packed(input,"input"); -// print_bin_blob(weights, "weights"); -// print_blob(output_ref, "ref_out"); - - topology_bin.add(input_layout(input_name, input->get_layout())); - topology_bin.add(data(output_name + weights_suffix, weights)); - - topology_bin.add(binary_convolution(output_name, input_info(input_name), {output_name + weights_suffix}, - stride, pad, dilation, os_size, 1, p.pad_value, p.dt)); - - cldnn::network::ptr network_bin = get_network(engine, topology_bin, config, get_test_stream_ptr(), p.is_caching_test); - - network_bin->set_input_data(input_name, input); - - std::map outputs = network_bin->execute(); - auto outputMemory = outputs.at(output_name).get_memory(); - - for (size_t i = 0; i < output_ref->count(); i++) { - if (p.dt == data_types::f32) { - cldnn::mem_lock ref(output_ref, get_test_stream()); - cldnn::mem_lock opt(outputMemory, get_test_stream()); - - ASSERT_EQ(ref[i], opt[i]) << i; - } else if (p.dt == data_types::f16) { - cldnn::mem_lock ref(output_ref, get_test_stream()); - cldnn::mem_lock opt(outputMemory, get_test_stream()); - - ASSERT_EQ(ref[i], half_to_float(opt[i])) << i; - } - } -} - -// Batch, groups, IC, IW, IH, OC, OW, OH, KH, KW, SH, SW, PH, PW -INSTANTIATE_TEST_SUITE_P(BinaryConvTest, binary_convolution_test, ::testing::Values( - TestParams{1, 1, 16,2,2, 4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 17,2,2, 4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 17,2,2, 4,2,2, 3,3, 1,1, 1,1, 0.0f, data_types::f32, "small", false}, - TestParams{1, 1, 17,2,2, 4,2,2, 3,3, 1,1, 1,1, 1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 16,2,2, 16,2,2, 3,3, 1,1, 1,1, 1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 32,2,2, 32,2,2, 3,3, 1,1, 1,1, 1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 32,2,2, 32,2,2, 1,1, 1,1, 0,0, 1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 128,2,2, 128,2,2, 1,1, 1,1, 0,0, -1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 16,4,3, 4,4,3, 1,1, 1,1, 0,0, -1.0f, data_types::f32, "small", false}, - TestParams{1, 1, 16,2,2, 4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 17,2,2, 4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 17,2,2, 4,2,2, 3,3, 1,1, 1,1, 0.0f, data_types::f16, "small", false}, - TestParams{1, 1, 17,2,2, 4,2,2, 3,3, 1,1, 1,1, 1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 16,2,2, 16,2,2, 3,3, 1,1, 1,1, 1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 32,2,2, 32,2,2, 3,3, 1,1, 1,1, 1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 32,2,2, 32,2,2, 1,1, 1,1, 0,0, 1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 128,2,2, 128,2,2, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 16,4,3, 4,4,3, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 9,16,32, 17,8,16, 7,7, 2,2, 3,3, -1.0f, data_types::f16, "small", false}, - TestParams{1, 1, 9,16,32, 17,8,16, 7,7, 2,2, 3,3, 1.0f, data_types::f16, "small", false}, - - // Resnet-18 3x3 - TestParams{1, 1, 64,56,56, 64,56,56, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_0", false}, - TestParams{1, 1, 64,56,56, 128,28,28, 3,3, 2,2, 1,1, -1.0f, data_types::f16, "resnet18_1", false}, - TestParams{1, 1, 128,28,28, 128,28,28, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_2", false}, - TestParams{1, 1, 128,28,28, 256,14,14, 3,3, 2,2, 1,1, -1.0f, data_types::f16, "resnet18_3", false}, - TestParams{1, 1, 256,14,14, 256,14,14, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_4", false}, - TestParams{1, 1, 256,14,14, 512, 7, 7, 3,3, 2,2, 1,1, -1.0f, data_types::f16, "resnet18_5", false}, - TestParams{1, 1, 512, 7, 7, 512, 7, 7, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_6", false}, - // Resnet-50 - TestParams{1, 1, 64,56,56, 64,56,56, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_0", false}, - TestParams{1, 1, 64,56,56, 256,56,56, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_1", false}, - TestParams{1, 1, 256,56,56, 128,28,28, 1,1, 2,2, 0,0, -1.0f, data_types::f16, "resnet50_2", false}, - TestParams{1, 1, 128,28,28, 512,28,28, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_3", false}, - TestParams{1, 1, 512,28,28, 128,28,28, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_4", false}, - TestParams{1, 1, 512,28,28, 256,14,14, 1,1, 2,2, 0,0, -1.0f, data_types::f16, "resnet50_5", false}, - TestParams{1, 1, 256,14,14, 1024,14,14, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_6", false}, - TestParams{1, 1, 1024,14,14, 256,14,14, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_7", false}, - TestParams{1, 1, 1024,14,14, 512,7,7, 1,1, 2,2, 0,0, -1.0f, data_types::f16, "resnet50_8", false}, - TestParams{1, 1, 512,7,7, 2048,7,7, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_9", false}, - TestParams{1, 1, 2048,7,7, 512,7,7, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_10", false}, - // Mobilenet-ssd-vd - TestParams{1, 1, 56,96,168, 112,96,168, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv2_2_sep_BIN", false}, // back_bone_seq_conv2_2_sep_BIN - TestParams{1, 1, 112,96,168, 112,96,168, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv3_1_sep_BIN", false}, // back_bone_seq_conv3_1_sep_BIN - TestParams{1, 1, 112,48,84, 208,48, 84, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv3_2_sep_BIN", false}, // back_bone_seq_conv3_2_sep_BIN - TestParams{1, 1, 208,48,84, 216,48, 84, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv4_1_sep_BIN", false}, // back_bone_seq_conv4_1_sep_BIN - TestParams{1, 1, 216,24,42, 328,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv4_2_sep_BIN", false}, // back_bone_seq_conv4_2_sep_BIN - TestParams{1, 1, 328,24,42, 288,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_1_sep_BIN", false}, // back_bone_seq_conv5_1_sep_BIN - TestParams{1, 1, 288,24,42, 288,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_2_sep_BIN", false}, // back_bone_seq_conv5_2_sep_BIN - TestParams{1, 1, 288,24,42, 240,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_3_sep_BIN", false}, // back_bone_seq_conv5_3_sep_BIN - TestParams{1, 1, 240,24,42, 264,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_4_sep_BIN", false}, // back_bone_seq_conv5_4_sep_BIN - TestParams{1, 1, 264,24,42, 192,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_5_sep_BIN", false}, // back_bone_seq_conv5_5_sep_BIN - TestParams{1, 1, 192,12,21, 208,12, 21, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_6_sep_BIN", false}, // back_bone_seq_conv5_6_sep_BIN - TestParams{1, 1, 208,12,21, 88,12, 21, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv6_sep_BN", false} // back_bone_seq_conv6_sep_BN -)); - -INSTANTIATE_TEST_SUITE_P(export_import, binary_convolution_test, ::testing::Values( - TestParams{1, 1, 208,12,21, 88,12, 21, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv6_sep_BN", true} -)); - -template -static void set_binary_values(cldnn::memory::ptr mem, std::vector args) { - cldnn::mem_lock ptr(mem, get_test_stream()); - - auto it = ptr.begin(); - for (auto x : args) - *it++ = x; -} - -TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) { - auto& engine = get_test_engine(); - // DG2 is not validated for binary convolution: https://github.com/openvinotoolkit/openvino/pull/12486 - if(engine.get_device_info().supports_immad) - return; - - auto input = engine.allocate_memory({ data_types::u1, format::b_fs_yx_32fp, { 1, 16, 2, 2 } }); - auto weights = engine.allocate_memory({ data_types::u1, format::bfyx, { 4, 16, 1, 1 } }); - - // 0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 - // 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 - // 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 - set_binary_values(input, { 21796, 22113, 24531, 32768 }); - - // 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 - // 0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 - // 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - set_binary_values(weights, { 65535, 21930, 43605, 0 }); - - // 16 - 2*popcount(1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 1) = -4 - // 16 - 2*popcount(0 1 1 1 1 0 0 1 1 0 0 1 0 1 0 1) = -2 - // 16 - 2*popcount(0 0 1 1 0 1 0 0 0 0 0 0 0 1 0 1) = 6 - // 16 - 2*popcount(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0) = -14 - - // 16 - 2*popcount(0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0) = 8 - // 16 - 2*popcount(1 1 0 1 0 0 1 1 1 1 0 0 0 0 0 0) = 2 - // 16 - 2*popcount(1 0 0 1 1 1 1 0 0 1 0 1 0 0 0 0) = 2 - // 16 - 2*popcount(0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1) = -2 - - // 16 - 2*popcount(1 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1) = -8 - // 16 - 2*popcount(0 0 1 0 1 1 0 0 0 0 1 1 1 1 1 1) = -2 - // 16 - 2*popcount(0 1 1 0 0 0 0 1 1 0 1 0 1 1 1 1) = -2 - // 16 - 2*popcount(1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 0) = 2 - - // 16 - 2*popcount(0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0) = 4 - // 16 - 2*popcount(1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0) = 2 - // 16 - 2*popcount(1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 0) = -6 - // 16 - 2*popcount(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1) = 14 - VF output_vec = { - -4.0f, -2.0f, 6.0f, -14.0f, - 8.0f, 2.0f, 2.0f, -2.0f, - -8.0f, -2.0f, -2.0f, 2.0f, - 4.0f, 2.0f, -6.0f, 14.0f }; - - topology topology( - input_layout("input", input->get_layout()), - data("weights", weights), - binary_convolution("binary_conv", input_info("input"), { "weights" }, - { 1,1 }, - { 0,0 }, - { 1,1 }, - { 1,4,2,2 }, - 0, 0.0f, - data_types::f32, - padding{ { 0,0,0,0 }, 0 }) - ); - - ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); - config.set_property(ov::intel_gpu::optimize_data(true)); - - network network(engine, topology, config); - network.set_input_data("input", input); - - auto outputs = network.execute(); - ASSERT_EQ(outputs.size(), size_t(1)); - ASSERT_EQ(outputs.begin()->first, "binary_conv"); - - auto output_memory = outputs.at("binary_conv").get_memory(); - auto output_layout = output_memory->get_layout(); - cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - - ASSERT_EQ(output_layout.format, format::bfyx); - ASSERT_EQ(output_layout.data_type, data_types::f32); - ASSERT_EQ(output_layout.batch(), 1); - ASSERT_EQ(output_layout.feature(), 4); - ASSERT_EQ(output_layout.spatial(1), 2); - ASSERT_EQ(output_layout.spatial(0), 2); - - for (size_t i = 0; i < output_layout.count(); i++) - { - ASSERT_EQ(output_ptr[i], output_vec[i]) << "index="<< i; - } -} - -TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) { - auto& engine = get_test_engine(); - // DG2 is not validated for binary convolution: https://github.com/openvinotoolkit/openvino/pull/12486 - if(engine.get_device_info().supports_immad) - return; - - auto input = engine.allocate_memory({ data_types::u1, format::b_fs_yx_32fp, { 1, 16, 2, 2 } }); - auto weights = engine.allocate_memory({ data_types::u1, format::bfyx, { 4, 16, 1, 1 } }); - - // 0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 - // 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 - // 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 - set_binary_values(input, { 21796, 22113, 24531, 32768 }); - - // 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 - // 0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 - // 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - set_binary_values(weights, { 65535, 21930, 43605, 0 }); - - // 16 - 2*popcount(1 1 0 1 1 0 1 1 0 1 0 1 0 1 0 1) = -4 - // 16 - 2*popcount(0 1 1 1 1 0 0 1 1 0 0 1 0 1 0 1) = -2 - // 16 - 2*popcount(0 0 1 1 0 1 0 0 0 0 0 0 0 1 0 1) = 6 - // 16 - 2*popcount(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0) = -14 - - // 16 - 2*popcount(0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0) = 8 - // 16 - 2*popcount(1 1 0 1 0 0 1 1 1 1 0 0 0 0 0 0) = 2 - // 16 - 2*popcount(1 0 0 1 1 1 1 0 0 1 0 1 0 0 0 0) = 2 - // 16 - 2*popcount(0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 1) = -2 - - // 16 - 2*popcount(1 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1) = -8 - // 16 - 2*popcount(0 0 1 0 1 1 0 0 0 0 1 1 1 1 1 1) = -2 - // 16 - 2*popcount(0 1 1 0 0 0 0 1 1 0 1 0 1 1 1 1) = -2 - // 16 - 2*popcount(1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 0) = 2 - - // 16 - 2*popcount(0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0) = 4 - // 16 - 2*popcount(1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0) = 2 - // 16 - 2*popcount(1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 0) = -6 - // 16 - 2*popcount(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1) = 14 - VF output_vec = { - -4.0f, -2.0f, 6.0f, -14.0f, - 8.0f, 2.0f, 2.0f, -2.0f, - -8.0f, -2.0f, -2.0f, 2.0f, - 4.0f, 2.0f, -6.0f, 14.0f }; - - topology topology( - input_layout("input", input->get_layout()), - data("weights", weights), - binary_convolution("binary_conv", input_info("input"), { "weights" }, - { 1,1 }, - { 0,0 }, - { 1,1 }, - { 1,4,2,2 }, - 0, 0.0f, - data_types::f16, - padding{ { 0,0,0,0 }, 0 }) - ); - - ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); - config.set_property(ov::intel_gpu::optimize_data(true)); - - network network(engine, topology, config); - network.set_input_data("input", input); - - auto outputs = network.execute(); - ASSERT_EQ(outputs.size(), size_t(1)); - ASSERT_EQ(outputs.begin()->first, "binary_conv"); - - auto output_memory = outputs.at("binary_conv").get_memory(); - auto output_layout = output_memory->get_layout(); - cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - - ASSERT_EQ(output_layout.format, format::bfyx); - ASSERT_EQ(output_layout.data_type, data_types::f16); - ASSERT_EQ(output_layout.batch(), 1); - ASSERT_EQ(output_layout.feature(), 4); - ASSERT_EQ(output_layout.spatial(1), 2); - ASSERT_EQ(output_layout.spatial(0), 2); - - for (size_t i = 0; i < output_layout.count(); i++) { - ASSERT_EQ(half_to_float(output_ptr[i]), output_vec[i]) << "index="<< i; - } -} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp index 3058d3b389ee89..42ad04775d36dd 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/quantize_gpu_test.cpp @@ -167,72 +167,6 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) { } } -TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) { - auto& engine = get_test_engine(); - auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 8, 2, 2}}); - auto input_thresh = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 8, 1, 1 } }); - auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - - set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f, - 5.0f, 2.0f, 2.0f, 3.0f, - 4.0f, 6.0f, 3.0f, 3.0f, - 3.0f, 5.0f, 1.0f, 1.0f, - - 1.0f, 1.0f, 1.0f, 1.0f, - 4.0f, 6.0f, 3.0f, 3.0f, - 3.0f, 5.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f }); - - set_values(input_thresh, { 0.0f, 1.0f, 2.0f, 3.0f, - 4.0f, 5.0f, 6.0f, 7.0f }); - set_values(output_low, { -1.0f }); - set_values(output_high, { 1.0f }); - - // 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 - // 1 1 1 1 0 1 0 0 0 0 1 1 0 1 1 1 - // 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 - // 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 - std::vector ref_data = { -1, 1, 1, 1, - 1, 1, 1, 1, - 1, 1, 1, 1, - -1, 1, -1, -1, - -1, -1, -1, -1, - -1, 1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1 }; - - topology topology; - topology.add( - input_layout("input", input->get_layout()), - data("input_low", input_thresh), - data("input_high", input_thresh), - data("output_low", output_low), - data("output_high", output_high), - quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::u1), - reorder("reorder", input_info("quantize"), layout{data_types::f32, format::bfyx, tensor{1,8,2,2}}) - ); - - ExecutionConfig config = get_test_default_config(engine); - config.set_property(ov::intel_gpu::optimize_data(true)); - network network(engine, topology, config); - network.set_input_data("input", input); - auto outputs = network.execute(); - - auto output = outputs.at("reorder").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - - // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output->count(), (size_t)32); - ASSERT_EQ(output->get_layout().count(), (size_t)32); - - ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint32_t)); - - for (size_t i = 0; i < ref_data.size(); ++i) { - ASSERT_EQ(output_ptr[i], ref_data[i]) << " index = " << i; - } -} - TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) { cldnn::engine& engine = get_test_engine(); auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 16, 2, 2}}); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 3d71384fca884b..6f4dcda23fea90 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -2067,101 +2067,6 @@ TEST(reorder_gpu_i64, basic) ASSERT_EQ(*(a_ptr++), val); } -TEST(reorder_gpu_binary, binary_output) -{ - auto& engine = get_test_engine(); - - ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); - config.set_property(ov::intel_gpu::optimize_data(true)); - - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); - layout output_layout(data_types::u1, format::b_fs_yx_32fp, { 2, 2, 2, 2 }); - - // Data is supposed to be quantized to {0,1} values - set_values(input, { - 1.f, 0.f, 1.f, 1.f, - 0.f, 1.f, 1.f, 0.f, - - 1.f, 1.f, 0.f, 1.f, - 0.f, 0.f, 0.f, 1.f - }); - - topology topology( - input_layout("input", input->get_layout()), - reorder("reorder", input_info("input"), output_layout)); - - network network(engine, topology, get_test_default_config(engine)); - network.set_input_data("input", input); - - auto outputs = network.execute(); - ASSERT_EQ(outputs.size(), size_t(1)); - ASSERT_EQ(outputs.begin()->first, "reorder"); - - auto output = outputs.begin()->second.get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - - std::vector answers = { 1, 2, 3, 1, - 1, 1, 0, 3 }; - - // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output->count(), input->get_layout().count()); - ASSERT_EQ(output->get_layout().count(), input->get_layout().count()); - - // Check that memory physical size consider binary pack - ASSERT_EQ(output->size(), answers.size() * sizeof(uint32_t)); - - for (size_t i = 0; i < answers.size(); ++i) { - ASSERT_EQ(answers[i], output_ptr[i]) << "index: " << i; - } -} - -TEST(reorder_gpu_binary, binary_input) -{ - auto& engine = get_test_engine(); - - ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); - config.set_property(ov::intel_gpu::optimize_data(true)); - - auto input = engine.allocate_memory({ data_types::u1, format::b_fs_yx_32fp,{ 2, 2, 2, 2 } }); - layout output_layout(data_types::f32, format::bfyx, { 2, 2, 2, 2 }); - - // Data is supposed to be quantized to {0,1} values - std::vector answers = { - 1.f, -1.f, 1.f, 1.f, - -1.f, 1.f, 1.f, -1.f, - - 1.f, 1.f, -1.f, 1.f, - -1.f, -1.f, -1.f, 1.f - }; - - set_values(input, { 1, 2, 3, 1, - 1, 1, 0, 3 }); - - topology topology( - input_layout("input", input->get_layout()), - reorder("reorder", input_info("input"), output_layout)); - - network network(engine, topology, get_test_default_config(engine)); - network.set_input_data("input", input); - - auto outputs = network.execute(); - ASSERT_EQ(outputs.size(), size_t(1)); - ASSERT_EQ(outputs.begin()->first, "reorder"); - - auto output = outputs.begin()->second.get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - - // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output->count(), input->get_layout().count()); - ASSERT_EQ(output->get_layout().count(), input->get_layout().count()); - - ASSERT_EQ(output->size(), answers.size() * sizeof(float)); - - for (size_t i = 0; i < answers.size(); ++i) { - ASSERT_EQ(answers[i], output_ptr[i]) << "index: " << i; - } -} - TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) { // Topology: @@ -2859,10 +2764,7 @@ class ReorderTest : public ::testing::TestWithParam { cldnn::memory::ptr get_mem(cldnn::layout l) { auto prim = engine.allocate_memory(l); tensor s = l.get_tensor(); - if (l.data_type == data_types::u1) { - VF rnd_vec = rg.generate_random_1d(s.count() / 32, min_random, max_random); - set_values(prim, rnd_vec); - } else if (l.data_type == data_types::i8 || l.data_type == data_types::u8) { + if (l.data_type == data_types::i8 || l.data_type == data_types::u8) { VF rnd_vec = rg.generate_random_1d(s.count(), min_random, max_random); set_values(prim, rnd_vec); } else if (l.data_type == data_types::f16) { diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h index 680a87283fcbfd..b63fc2f5fa8db6 100644 --- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h +++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.h @@ -578,131 +578,6 @@ inline std::vector get_output_values_to_float(cldnn::network& net, const } double default_tolerance(cldnn::data_types dt); -// inline void print_bin_blob(cldnn::memory& mem, std::string name) -// { -// auto&& size = mem.get_layout().get_tensor(); - -// std::cerr << name; -// std::cerr << " shape: "; -// std::cerr << size.batch[0] << " "; -// std::cerr << size.feature[0] << " "; -// std::cerr << size.spatial[1] << " "; -// std::cerr << size.spatial[0] << " "; -// std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; - -// auto mem_ptr = mem.pointer(); - -// bool packed_ic = mem.get_layout().format == cldnn::format::b_fs_yx_32fp ? 1 : 0; -// int B = size.batch[0]; -// int C = size.feature[0]; -// int H = size.spatial[1]; -// int W = size.spatial[0]; - -// for (cldnn::tensor::value_type b = 0; b < B; ++b) -// { -// for (cldnn::tensor::value_type f = 0; f < C; ++f) -// { -// for (cldnn::tensor::value_type y = 0; y < H; ++y) -// { -// for (cldnn::tensor::value_type x = 0; x < W; ++x) -// { -// if (!packed_ic) -// { -// size_t input_it = b * C*H*W + f * W*H + y * W + x; -// size_t elem = input_it / 32; -// size_t bit = input_it % 32; -// std::cerr << ((mem_ptr[elem] & (1 << bit)) >> bit) << " "; -// } -// else -// { -// size_t input_it = b * (C / 32)*W*H + (f / 32)*W*H + y * W + x; -// size_t bit = f % 32; -// std::cerr << ((mem_ptr[input_it] & (1 << bit)) >> bit) << " "; -// } -// } -// std::cerr << std::endl; -// } -// std::cerr << std::endl; -// } -// std::cerr << "==============" << std::endl; -// } -// } - -// inline void print_bin_blob_packed(cldnn::memory& mem, std::string name) -// { -// auto&& size = mem.get_layout().get_tensor(); - -// std::cerr << name; -// std::cerr << " shape: "; -// std::cerr << size.batch[0] << " "; -// std::cerr << size.feature[0] << " "; -// std::cerr << size.spatial[1] << " "; -// std::cerr << size.spatial[0] << " "; -// std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; - -// auto mem_ptr = mem.pointer(); - -// int B = size.batch[0]; -// int C = size.feature[0]; -// int H = size.spatial[1]; -// int W = size.spatial[0]; - -// for (cldnn::tensor::value_type b = 0; b < B; ++b) -// { -// for (cldnn::tensor::value_type f = 0; f < div_up(C, 32); ++f) -// { -// for (cldnn::tensor::value_type y = 0; y < H; ++y) -// { -// for (cldnn::tensor::value_type x = 0; x < W; ++x) -// { -// size_t input_it = b * div_up(C, 32)*W*H + f * W*H + y * W + x; -// std::cerr << mem_ptr[input_it] << " "; -// } -// std::cerr << std::endl; -// } -// std::cerr << std::endl; -// } -// std::cerr << "==============" << std::endl; -// } -// } - -// inline void print_blob(cldnn::memory& mem, std::string name) -// { -// auto&& size = mem.get_layout().get_tensor(); - -// std::cerr << name; -// std::cerr << " shape: "; -// std::cerr << size.batch[0] << " "; -// std::cerr << size.feature[0] << " "; -// std::cerr << size.spatial[1] << " "; -// std::cerr << size.spatial[0] << " "; -// std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; - -// auto mem_ptr = mem.pointer(); - -// int B = size.batch[0]; -// int C = size.feature[0]; -// int H = size.spatial[1]; -// int W = size.spatial[0]; - -// for (cldnn::tensor::value_type b = 0; b < B; ++b) -// { -// for (cldnn::tensor::value_type f = 0; f < C; ++f) -// { -// for (cldnn::tensor::value_type y = 0; y < H; ++y) -// { -// for (cldnn::tensor::value_type x = 0; x < W; ++x) -// { -// size_t input_it = b * C*W*H + f * W*H + y * W + x; -// std::cerr << std::setw(4) << mem_ptr[input_it] << " "; -// } -// std::cerr << std::endl; -// } -// std::cerr << std::endl; -// } -// std::cerr << "==============" << std::endl; -// } -// } inline cldnn::network::ptr get_network(cldnn::engine& engine, cldnn::topology& topology, diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_binary_conv_to_conv_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_binary_conv_to_conv_test.cpp new file mode 100644 index 00000000000000..23c30f0e95e28a --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_binary_conv_to_conv_test.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/core/coordinate_diff.hpp" +#include "openvino/core/type/element_type.hpp" + +using namespace testing; +using namespace ov::intel_gpu; + +TEST_F(TransformationTestsF, ConvertBinaryConvolutionToConvolutionTest1) { + { + auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 256, 56, 56 }); + auto in_lo = std::make_shared(ov::element::f32, ov::Shape{ 1, 256, 1, 1 }); + auto in_hi = std::make_shared(ov::element::f32, ov::Shape{ 1, 256, 1, 1 }); + auto out_lo = std::make_shared(ov::element::f32, ov::Shape{ 1, 1, 1, 1 }, std::vector{0.0f}); + auto out_hi = std::make_shared(ov::element::f32, ov::Shape{ 1, 1, 1, 1 }, std::vector{1.0f}); + auto fq = std::make_shared(input, in_lo, in_hi, out_lo, out_hi, 2); + auto weights = std::make_shared(ov::element::u1, ov::Shape{ 32, 256, 3, 3 }); + auto binary_conv = std::make_shared(fq, + weights, + ov::Strides{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::Strides{1, 1}, + ov::op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT, + -1.0f); + + model = std::make_shared(ov::NodeVector{ binary_conv }, ov::ParameterVector{ input }); + manager.register_pass(); + } + { + auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 256, 56, 56 }); + auto in_lo = std::make_shared(ov::element::f32, ov::Shape{ 1, 256, 1, 1 }); + auto in_hi = std::make_shared(ov::element::f32, ov::Shape{ 1, 256, 1, 1 }); + auto out_lo = std::make_shared(ov::element::f32, ov::Shape{ 1, 1, 1, 1 }, std::vector{-1.0f}); + auto out_hi = std::make_shared(ov::element::f32, ov::Shape{ 1, 1, 1, 1 }, std::vector{1.0f}); + auto fq = std::make_shared(input, in_lo, in_hi, out_lo, out_hi, 2); + auto weights = std::make_shared(ov::element::f32, ov::Shape{ 32, 256, 3, 3 }); + + auto pb = std::make_shared(ov::element::i32, ov::Shape{ 4 }, std::vector{0, 0, 1, 1}); + auto pe = std::make_shared(ov::element::i32, ov::Shape{ 4 }, std::vector{0, 0, 1, 1}); + auto pv = std::make_shared(ov::element::f32, ov::Shape{ }, std::vector{1.0f}); + + auto pad = std::make_shared(fq, pb, pe, pv, ov::op::PadMode::CONSTANT); + + auto conv = std::make_shared(pad, + weights, + ov::Strides{1, 1}, + ov::CoordinateDiff{0, 0}, + ov::CoordinateDiff{0, 0}, + ov::Strides{1, 1}); + + model_ref = std::make_shared(ov::NodeVector{ conv }, ov::ParameterVector{ input }); + } +} From bcb38796cee6913b220116b4b7ea88e70e72ec53 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Thu, 9 Nov 2023 10:51:00 +0400 Subject: [PATCH 246/275] Ngraph helpers/builders cleaning (#20819) * Delete `getNodeSharedPtr()` * Remove `makeRoll` ng::builder * Delete `makeSelect` ng::builder * Delete `makeDepthToSpace` ng::builder * Remove `CompareFunctions` and `getConstData` from ng::helpers * Return `makeSelect` for compatibility with NPU * Port `QuantizationGranularity`, `MemoryTransformation` * Restore ng::helpers::QuantGranularity for BWD CMP --- .../transpose_sinking_test.cpp | 16 ++- .../quantized_convolution_backprop_data.cpp | 2 +- .../quantized_group_convolution.cpp | 2 +- ...ntized_group_convolution_backprop_data.cpp | 2 +- .../subgraph_tests/quantized_mat_mul.cpp | 2 +- .../single_layer_tests/depth_to_space.cpp | 2 +- .../functional/single_layer_tests/roll.cpp | 3 +- .../quantized_convolution_backprop_data.cpp | 2 +- ...ntized_group_convolution_backprop_data.cpp | 2 +- .../dynamic/deth_to_space.cpp | 6 +- .../single_layer_tests/dynamic/select.cpp | 3 +- .../src/base/layer_test_utils.cpp | 27 +++- .../src/single_layer/depth_to_space.cpp | 3 +- .../src/single_layer/roll.cpp | 3 +- .../src/single_layer/select.cpp | 4 +- .../quantized_convolution_backprop_data.cpp | 4 +- .../subgraph/quantized_group_convolution.cpp | 4 +- ...ntized_group_convolution_backprop_data.cpp | 4 +- .../src/subgraph/quantized_mat_mul.cpp | 2 +- .../ov_models/include/ov_models/builders.hpp | 8 -- .../include/ov_models/utils/ov_helpers.hpp | 24 +--- .../ov_models/src/depth_to_space.cpp | 18 --- src/tests/ov_helpers/ov_models/src/roll.cpp | 21 --- .../ov_models/src/utils/ov_helpers.cpp | 132 ------------------ .../include/common_test_utils/test_enums.hpp | 6 + .../common_test_utils/src/test_enums.cpp | 14 ++ 26 files changed, 84 insertions(+), 232 deletions(-) delete mode 100644 src/tests/ov_helpers/ov_models/src/depth_to_space.cpp delete mode 100644 src/tests/ov_helpers/ov_models/src/roll.cpp diff --git a/src/common/transformations/tests/common_optimizations/transpose_sinking_test.cpp b/src/common/transformations/tests/common_optimizations/transpose_sinking_test.cpp index 6188628246a0e4..417f90ee89e863 100644 --- a/src/common/transformations/tests/common_optimizations/transpose_sinking_test.cpp +++ b/src/common/transformations/tests/common_optimizations/transpose_sinking_test.cpp @@ -195,7 +195,21 @@ class TransposeSinking : public ov::test::TestsCommon, private: std::shared_ptr get_reduction(NodeTypeInfo reduction_type_info, const OutputVector& inputs, bool keep_dims) { - auto reduction = ngraph::helpers::getNodeSharedPtr(reduction_type_info, inputs); + std::shared_ptr reduction; + for (const auto& it : get_available_opsets()) { + const auto& opset = it.second(); + if (opset.contains_type(reduction_type_info)) { + reduction = std::shared_ptr(opset.create(reduction_type_info.name)); + reduction->set_arguments(inputs); + reduction->validate_and_infer_types(); + } + } + OPENVINO_ASSERT(reduction, + "supported opsets does not contain op with name: ", + reduction_type_info.name, + " version: ", + reduction_type_info.version_id); + if (auto arithmetic_reduce = std::dynamic_pointer_cast(reduction)) arithmetic_reduce->set_keep_dims(keep_dims); else if (auto logical_reduce = std::dynamic_pointer_cast(reduction)) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp index 6f51fde892c464..bce22a2a7e598c 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp @@ -19,7 +19,7 @@ const std::vector netPrecisions = { const std::vector numOutChannels = {16, 32}; const std::vector levels = {256}; -const std::vector granularity = {Pertensor, Perchannel}; +const std::vector granularity = {QuantizationGranularity::Pertensor, QuantizationGranularity::Perchannel}; /* ============= 2D GroupConvolutionBackpropData ============= */ const std::vector> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}}; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution.cpp index ec45146e05ef5a..384709bfd69c34 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution.cpp @@ -21,7 +21,7 @@ const std::vector numOutChannels = {3, 24, 48}; const std::vector numGroups = {3}; const std::vector levels = {256}; -const std::vector granularity = {Pertensor, Perchannel}; +const std::vector granularity = {QuantizationGranularity::Pertensor, QuantizationGranularity::Perchannel}; const std::vector quantizeWeights = {false, true}; /* ============= 2D GroupConvolution ============= */ diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp index da96f2b28d26de..833cad7e1a21bb 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp @@ -20,7 +20,7 @@ const std::vector numOutChannels = {16, 32}; const std::vector numGroups = {2, 8, 16}; const std::vector levels = {256}; -const std::vector granularity = {Pertensor, Perchannel}; +const std::vector granularity = {QuantizationGranularity::Pertensor, QuantizationGranularity::Perchannel}; /* ============= 2D GroupConvolutionBackpropData ============= */ const std::vector> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}}; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp index 563cb7adc579f0..2c28b919af12ec 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp @@ -43,7 +43,7 @@ const std::vector ranges_i32 = { const std::vector levels_8 = {256}; const std::vector levels_16 = {65536}; const std::vector levels_32 = {4294967296}; -const std::vector granularity = {Pertensor}; +const std::vector granularity = {QuantizationGranularity::Pertensor}; const auto quantParams_i8 = ::testing::Combine( ::testing::ValuesIn(levels_8), diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp index 30f1617d3cfd68..0ad2af8b85ad4d 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp @@ -75,7 +75,7 @@ class DepthToSpaceLayerCPUTest : public testing::WithParamInterface(inType, shape)); } - auto d2s = ngraph::builder::makeDepthToSpace(params[0], mode, blockSize); + auto d2s = std::make_shared(params[0], mode, blockSize); function = makeNgraphFunction(inType, params, d2s, "DepthToSpace"); } }; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/roll.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/roll.cpp index 60a1645ddca039..c66384281982ef 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/roll.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/roll.cpp @@ -61,8 +61,7 @@ class RollLayerCPUTest : public testing::WithParamInterface, auto shiftNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{shift.size()}, shift)->output(0); auto axesNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{axes.size()}, axes)->output(0); - const auto paramsOut = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramsIn)); - const auto roll = std::dynamic_pointer_cast(ngraph::builder::makeRoll(paramsOut[0], shiftNode, axesNode)); + const auto roll = std::make_shared(paramsIn[0], shiftNode, axesNode); const ngraph::ResultVector results{std::make_shared(roll)}; function = std::make_shared(results, paramsIn, "roll"); } diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp index 29b217c4681882..5d77a7c3ae4505 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp @@ -19,7 +19,7 @@ const std::vector netPrecisions = { const std::vector numOutChannels = {16, 32}; const std::vector levels = {256}; -const std::vector granularity = {Pertensor, Perchannel}; +const std::vector granularity = {QuantizationGranularity::Pertensor, QuantizationGranularity::Perchannel}; /* ============= 2D GroupConvolutionBackpropData ============= */ const std::vector> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}}; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp index 5f51b3f88cf8bb..bfd987be299b10 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp @@ -20,7 +20,7 @@ const std::vector numOutChannels = {16, 32}; const std::vector numGroups = {2, 8, 16}; const std::vector levels = {256}; -const std::vector granularity = {Pertensor, Perchannel}; +const std::vector granularity = {QuantizationGranularity::Pertensor, QuantizationGranularity::Perchannel}; /* ============= 2D GroupConvolutionBackpropData ============= */ const std::vector> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}}; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/deth_to_space.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/deth_to_space.cpp index d39008127d8fa2..3f0ea75534ebd1 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/deth_to_space.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/deth_to_space.cpp @@ -65,10 +65,10 @@ class DepthToSpaceLayerGPUTest : public testing::WithParamInterface(inType, shape)); - } - auto d2s = ngraph::builder::makeDepthToSpace(params[0], mode, blockSize); + + auto d2s = std::make_shared(params[0], mode, blockSize); ngraph::ResultVector results; for (size_t i = 0; i < d2s->get_output_size(); i++) diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp index ed65d68d70a798..c8503052f316ef 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp @@ -65,8 +65,7 @@ class SelectLayerGPUTest : public testing::WithParamInterface(netType, inputDynamicShapes[2]), }; - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); - auto select = builder::makeSelect(paramOuts, broadcast); + auto select = std::make_shared(params[0], params[1], params[2], broadcast); auto makeFunction = [](ParameterVector ¶ms, const std::shared_ptr &lastNode) { ResultVector results; diff --git a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index 5e325150357b4d..13b30787755769 100644 --- a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -19,6 +19,31 @@ namespace LayerTestsUtils { +namespace { +std::vector>> getConstData( + const std::shared_ptr& function) { + size_t numOutputs = function->get_output_size(); + std::vector>> outputs(numOutputs); + auto funcResults = function->get_results(); + for (size_t i = 0; i < numOutputs; i++) { + outputs[i].first = funcResults[i]->get_element_type(); + const auto& output = function->output(i).get_node_shared_ptr(); + OPENVINO_ASSERT(output->inputs().size() == 1); + auto parrentNode = output->input_value(0).get_node_shared_ptr(); + OPENVINO_ASSERT(ov::op::util::is_constant(parrentNode), + "Function was not fully folded to constant state!\n", + "Parent node of one of results is not constant and has type ", + parrentNode->get_type_name()); + + const auto data = std::dynamic_pointer_cast(parrentNode)->get_data_ptr(); + const auto dataSize = ov::shape_size(parrentNode->get_shape()) * parrentNode->get_element_type().size(); + outputs[i].second.resize(dataSize); + std::copy(data, data + dataSize, outputs[i].second.data()); + } + return outputs; +} +} // namespace + LayerTestsCommon::LayerTestsCommon() : threshold(1e-2f), abs_threshold(-1.f) { core = PluginCache::get().ie(targetDevice); } @@ -460,7 +485,7 @@ std::vector>> LayerTe } case CONSTANT_FOLDING: { const auto &foldedFunc = ngraph::helpers::foldFunction(functionRefs, referenceInputs, refInputsTypes); - expectedOutputs = ngraph::helpers::getConstData(foldedFunc); + expectedOutputs = getConstData(foldedFunc); break; } case IE: { diff --git a/src/tests/functional/shared_test_classes/src/single_layer/depth_to_space.cpp b/src/tests/functional/shared_test_classes/src/single_layer/depth_to_space.cpp index fbff80b33cab7a..ce7f04d7935c8e 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/depth_to_space.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/depth_to_space.cpp @@ -46,8 +46,7 @@ void DepthToSpaceLayerTest::SetUp() { std::tie(inShape, inputPrecision, mode, blockSize, targetDevice) = this->GetParam(); auto inPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); ov::ParameterVector params {std::make_shared(inPrc, ov::Shape(inShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto d2s = ngraph::builder::makeDepthToSpace(paramOuts[0], mode, blockSize); + auto d2s = std::make_shared(params[0], mode, blockSize); ngraph::ResultVector results{std::make_shared(d2s)}; function = std::make_shared(results, params, "DepthToSpace"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/roll.cpp b/src/tests/functional/shared_test_classes/src/single_layer/roll.cpp index be1e6cd0ad4c6c..10771dbb86b3ab 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/roll.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/roll.cpp @@ -37,8 +37,7 @@ void RollLayerTest::SetUp() { auto shiftNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{shift.size()}, shift)->output(0); auto axesNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{axes.size()}, axes)->output(0); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); - auto roll = std::dynamic_pointer_cast(ngraph::builder::makeRoll(paramOuts[0], shiftNode, axesNode)); + auto roll = std::make_shared(paramVector[0], shiftNode, axesNode); ngraph::ResultVector results{std::make_shared(roll)}; function = std::make_shared(results, paramVector, "roll"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/select.cpp b/src/tests/functional/shared_test_classes/src/single_layer/select.cpp index 0f3ef550e4fb06..58e1bd3b46ebcc 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/select.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/select.cpp @@ -36,9 +36,7 @@ namespace LayerTestsDefinitions { paramNode = std::make_shared(inType, ngraph::Shape(inputShapes[i])); paramNodesVector.push_back(paramNode); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramNodesVector)); - - auto select = std::dynamic_pointer_cast(ngraph::builder::makeSelect(paramOuts, broadcast)); + auto select = std::make_shared(paramNodesVector[0], paramNodesVector[1], paramNodesVector[2], broadcast); ngraph::ResultVector results{std::make_shared(select)}; function = std::make_shared(results, paramNodesVector, "select"); } diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp index e9fd7e7f837093..70ca3a12de9c36 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_convolution_backprop_data.cpp @@ -55,7 +55,7 @@ void QuantConvBackpropDataLayerTest::SetUp() { ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; std::vector dataFqConstShapes(inputShape.size(), 1); - if (quantGranularity == ngraph::helpers::Perchannel) + if (quantGranularity == QuantizationGranularity::Perchannel) dataFqConstShapes[1] = inputShape[1]; auto dataFq = ngraph::builder::makeFakeQuantize(params[0], ngPrc, quantLevels, dataFqConstShapes); @@ -66,7 +66,7 @@ void QuantConvBackpropDataLayerTest::SetUp() { auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty()); std::vector weightsFqConstShapes(weightsShapes.size(), 1); - if (quantGranularity == ngraph::helpers::Perchannel) + if (quantGranularity == QuantizationGranularity::Perchannel) weightsFqConstShapes[0] = weightsShapes[0]; auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp index 1f9a505e83f689..da0d304a378215 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution.cpp @@ -60,7 +60,7 @@ void QuantGroupConvLayerTest::SetUp() { ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; std::vector dataFqConstShapes(inputShape.size(), 1); - if (quantGranularity == ngraph::helpers::Perchannel) + if (quantGranularity == QuantizationGranularity::Perchannel) dataFqConstShapes[1] = inputShape[1]; auto dataFq = ngraph::builder::makeFakeQuantize(params[0], ngPrc, quantLevels, dataFqConstShapes); @@ -76,7 +76,7 @@ void QuantGroupConvLayerTest::SetUp() { auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty()); std::vector weightsFqConstShapes(weightsShapes.size(), 1); - if (quantGranularity == ngraph::helpers::Perchannel) + if (quantGranularity == QuantizationGranularity::Perchannel) weightsFqConstShapes[0] = weightsShapes[0]; std::shared_ptr weights; diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp index 42022f1ec483a2..3c399b0335dd1f 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_group_convolution_backprop_data.cpp @@ -56,7 +56,7 @@ void QuantGroupConvBackpropDataLayerTest::SetUp() { ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; std::vector dataFqConstShapes(inputShape.size(), 1); - if (quantGranularity == ngraph::helpers::Perchannel) + if (quantGranularity == QuantizationGranularity::Perchannel) dataFqConstShapes[1] = inputShape[1]; auto dataFq = ngraph::builder::makeFakeQuantize(params[0], ngPrc, quantLevels, dataFqConstShapes); @@ -72,7 +72,7 @@ void QuantGroupConvBackpropDataLayerTest::SetUp() { auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty()); std::vector weightsFqConstShapes(weightsShapes.size(), 1); - if (quantGranularity == ngraph::helpers::Perchannel) + if (quantGranularity == QuantizationGranularity::Perchannel) weightsFqConstShapes[0] = weightsShapes[0]; auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes); diff --git a/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp b/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp index 77413c4f8abe13..8221f451b2485d 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp @@ -78,7 +78,7 @@ void QuantMatMulTest::SetUp() { QuantizationGranularity quantGranularity, const ngraph::Output &in, std::vector inputShape, InferenceEngine::Precision prec) -> std::shared_ptr { std::vector dataFqConstShapes(inputShape.size(), 1); - if (quantGranularity == ngraph::helpers::Perchannel) + if (quantGranularity == QuantizationGranularity::Perchannel) dataFqConstShapes[1] = inputShape[1]; size_t constDataSize = ngraph::shape_size(dataFqConstShapes); std::vector inputLowData(constDataSize), inputHighData(constDataSize), outputLowData(constDataSize), outputHighData(constDataSize); diff --git a/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp b/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp index 15588fe27465fb..026ab37135b93c 100644 --- a/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp +++ b/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp @@ -427,10 +427,6 @@ std::shared_ptr makeEmbeddingSegmentsSum(const element::Type& dataType bool with_weights, bool with_default_index); -std::shared_ptr makeDepthToSpace(const ov::Output& in, - ov::op::v0::DepthToSpace::DepthToSpaceMode mode, - size_t blockSize); - std::shared_ptr makeSpaceToDepth(const ov::Output& in, ov::op::v0::SpaceToDepth::SpaceToDepthMode mode, size_t blockSize); @@ -628,10 +624,6 @@ std::shared_ptr makeOneHot(const ov::Output& indices, const float& off_val, const int64_t& axis); -std::shared_ptr makeRoll(const ov::Output& dataNode, - const ov::Output& shiftNode, - const ov::Output& axesNode); - std::shared_ptr makeDFT(const ov::Output& dataNode, const std::vector& axes, const std::vector& signalSize, diff --git a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp index 862ff798efcf30..0fb1c8f159483d 100644 --- a/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp +++ b/src/tests/ov_helpers/ov_models/include/ov_models/utils/ov_helpers.hpp @@ -91,13 +91,7 @@ using ov::test::utils::DFTOpType; using ov::test::utils::InputLayerType; using ov::test::utils::PadMode; using ov::test::utils::SequenceTestsMode; - -enum class MemoryTransformation { - NONE, - LOW_LATENCY_V2, - LOW_LATENCY_V2_REGULAR_API, - LOW_LATENCY_V2_ORIGINAL_INIT -}; +using ov::test::utils::MemoryTransformation; // clang-format on bool is_tensor_iterator_exist(const std::shared_ptr& func); @@ -138,31 +132,15 @@ std::vector>> interpr std::vector interpretFunction(const std::shared_ptr& function, const std::map, ov::Tensor>& inputs); -// -// This function compares two nGraph functions and requires them to have exactly one output -// Check nodes types -// Check number of inputs -// Check shapes of each Node -// -void CompareFunctions(const Function& actual, const Function& expected); - std::shared_ptr foldFunction(const std::shared_ptr& function, const std::vector>& inputs, const std::vector& inputTypes = {}); -std::vector>> getConstData( - const std::shared_ptr& function); - -std::shared_ptr getNodeSharedPtr(const ngraph::NodeTypeInfo& type_info, - const ngraph::OutputVector& outputVector); - std::vector convertOutputPrecision(const std::vector& output, const element::Type_t& fromPrecision, const element::Type_t& toPrecision, const size_t elementsCount); -std::ostream& operator<<(std::ostream& os, MemoryTransformation type); - // todo: remove the following function from the source code after cleaning up VPU repo void resize_function(std::shared_ptr function, const std::vector& targetInputStaticShapes); diff --git a/src/tests/ov_helpers/ov_models/src/depth_to_space.cpp b/src/tests/ov_helpers/ov_models/src/depth_to_space.cpp deleted file mode 100644 index 39dd90e7b970de..00000000000000 --- a/src/tests/ov_helpers/ov_models/src/depth_to_space.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "ov_models/builders.hpp" - -namespace ngraph { -namespace builder { - -std::shared_ptr makeDepthToSpace(const ov::Output& in, - ov::op::v0::DepthToSpace::DepthToSpaceMode mode, - size_t blockSize) { - auto dtsNode = std::make_shared(in, mode, blockSize); - return dtsNode; -} - -} // namespace builder -} // namespace ngraph diff --git a/src/tests/ov_helpers/ov_models/src/roll.cpp b/src/tests/ov_helpers/ov_models/src/roll.cpp deleted file mode 100644 index 72de6b4089be69..00000000000000 --- a/src/tests/ov_helpers/ov_models/src/roll.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "openvino/op/roll.hpp" - -#include - -#include "openvino/core/node.hpp" - -namespace ngraph { -namespace builder { - -std::shared_ptr makeRoll(const ov::Output& in, - const ov::Output& shift, - const ov::Output& axes) { - return std::make_shared(in, shift, axes); -} - -} // namespace builder -} // namespace ngraph diff --git a/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp b/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp index ab19b01563ab0c..d195dfeaefcedf 100644 --- a/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp +++ b/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp @@ -240,118 +240,6 @@ std::shared_ptr foldFunction(const std::shared_ptr& function return foldedFunc; } -std::vector>> getConstData( - const std::shared_ptr& function) { - size_t numOutputs = function->get_output_size(); - std::vector>> outputs(numOutputs); - auto funcResults = function->get_results(); - for (size_t i = 0; i < numOutputs; i++) { - outputs[i].first = funcResults[i]->get_element_type(); - const auto& output = function->output(i).get_node_shared_ptr(); - OPENVINO_ASSERT(output->inputs().size() == 1); - auto parrentNode = output->input_value(0).get_node_shared_ptr(); - OPENVINO_ASSERT(op::is_constant(parrentNode), - "Function was not fully folded to constant state!\n", - "Parent node of one of results is not constant and has type ", - parrentNode->get_type_name()); - - const auto data = std::dynamic_pointer_cast(parrentNode)->get_data_ptr(); - const auto dataSize = shape_size(parrentNode->get_shape()) * parrentNode->get_element_type().size(); - outputs[i].second.resize(dataSize); - std::copy(data, data + dataSize, outputs[i].second.data()); - } - return outputs; -} - -namespace { - -std::string toString(const NodeTypeInfo& typeInfo) { - return std::string(typeInfo.name) + " ver. " + std::string(typeInfo.version_id); -} - -void CompareShapes(const PartialShape& actual, const PartialShape& expected) { - OPENVINO_ASSERT(actual.relaxes(expected) && actual.refines(expected), - "Functions compare: Different shape detected ", - actual, - " and ", - expected); -} - -void CompareNodes(const Node& actual, const Node& expected) { - const auto& actualType = actual.get_type_info(); - const auto& expectedType = expected.get_type_info(); - OPENVINO_ASSERT(actualType == expectedType, - "Functions compare: data types must be equal ", - toString(actualType), - " != ", - toString(expectedType)); - - const auto& numActualInputs = actual.inputs().size(); - const auto& numExpectedInputs = expected.inputs().size(); - OPENVINO_ASSERT(numActualInputs == numExpectedInputs, - "Functions compare: numbers of inputs are different: ", - numActualInputs, - " and ", - numExpectedInputs); - - const auto& numActualOutputs = actual.outputs().size(); - const auto& numExpectedOutputs = expected.outputs().size(); - OPENVINO_ASSERT(numActualOutputs == numExpectedOutputs, - "Functions compare: numbers of outputs are different: ", - numActualOutputs, - " and ", - numExpectedOutputs); -} - -} // namespace - -void CompareFunctions(const Function& actual, const Function& expected) { - const auto& actualOrderedOps = actual.get_ordered_ops(); - const auto& expectedOrderedOps = expected.get_ordered_ops(); - - OPENVINO_ASSERT(expectedOrderedOps.size() == actualOrderedOps.size(), - "Functions compare: expected and actual ops number should be equal " - "but got ", - expectedOrderedOps.size(), - " and ", - actualOrderedOps.size(), - " respectively"); - - for (std::size_t i = 0; i < expectedOrderedOps.size(); i++) { - const auto& expectedOp = expectedOrderedOps[i]; - const auto& actualOp = actualOrderedOps[i]; - - CompareNodes(*actualOp, *expectedOp); - for (std::size_t i = 0; i < actualOp->inputs().size(); ++i) { - const auto& actualShape = actualOp->input(i).get_partial_shape(); - const auto& expectedShape = expectedOp->input(i).get_partial_shape(); - CompareShapes(actualShape, expectedShape); - } - - for (std::size_t i = 0; i < actualOp->outputs().size(); ++i) { - const auto& actualShape = actualOp->output(i).get_partial_shape(); - const auto& expectedShape = expectedOp->output(i).get_partial_shape(); - CompareShapes(actualShape, expectedShape); - } - } -} - -std::shared_ptr getNodeSharedPtr(const ov::NodeTypeInfo& type_info, const ov::OutputVector& outputVector) { - for (const auto& it : get_available_opsets()) { - const auto& opset = it.second(); - if (opset.contains_type(type_info)) { - const auto node = std::shared_ptr(opset.create(type_info.name)); - node->set_arguments(outputVector); - node->validate_and_infer_types(); - return node; - } - } - OPENVINO_THROW("supported opsets does not contain op with name: ", - type_info.name, - " version: ", - type_info.version_id); -} - bool is_tensor_iterator_exist(const std::shared_ptr& func) { const auto& ops = func->get_ops(); for (const auto& node : ops) { @@ -650,26 +538,6 @@ std::vector convertOutputPrecision(const std::vector } } -std::ostream& operator<<(std::ostream& os, MemoryTransformation type) { - switch (type) { - case MemoryTransformation::NONE: - os << "NONE"; - break; - case MemoryTransformation::LOW_LATENCY_V2: - os << "LOW_LATENCY_V2"; - break; - case MemoryTransformation::LOW_LATENCY_V2_REGULAR_API: - os << "LOW_LATENCY_V2_REGULAR_API"; - break; - case MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT: - os << "LOW_LATENCY_V2_ORIGINAL_INIT"; - break; - default: - throw std::runtime_error("NOT_SUPPORTED_TYPE"); - } - return os; -} - void resize_function(std::shared_ptr function, const std::vector& targetInputStaticShapes) { auto inputs = function->inputs(); std::map, ov::PartialShape> shapes; diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp index f0f6fe51521889..0f192fcfa2f54f 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp @@ -156,6 +156,11 @@ enum class DFTOpType { INVERSE }; +enum class QuantizationGranularity { + Pertensor, + Perchannel +}; + enum class TensorIteratorBody { RNN, GRU, @@ -169,6 +174,7 @@ enum class MemoryTransformation { LOW_LATENCY_V2_REGULAR_API, LOW_LATENCY_V2_ORIGINAL_INIT }; +// clang-format on std::ostream& operator<<(std::ostream& os, const ReductionType& m); diff --git a/src/tests/test_utils/common_test_utils/src/test_enums.cpp b/src/tests/test_utils/common_test_utils/src/test_enums.cpp index feb2d29a26facf..f1050f47a28629 100644 --- a/src/tests/test_utils/common_test_utils/src/test_enums.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_enums.cpp @@ -351,6 +351,20 @@ std::ostream& operator<<(std::ostream& os, TensorIteratorBody type) { return os; } +std::ostream& operator<<(std::ostream& os, QuantizationGranularity type) { + switch (type) { + case QuantizationGranularity::Pertensor: + os << "Pertensor"; + break; + case QuantizationGranularity::Perchannel: + os << "Perchannel"; + break; + default: + throw std::runtime_error("NOT_SUPPORTED_OP_TYPE"); + } + return os; +} + std::ostream& operator<<(std::ostream& os, MemoryTransformation type) { switch (type) { case MemoryTransformation::NONE: From 4bde741de43223ba044619ee8a5678d2a62a2b1d Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Thu, 9 Nov 2023 08:08:18 +0100 Subject: [PATCH 247/275] Refactor StaticShapeLoopLayerTest (#20963) --- .../functional/single_layer_tests/loop.cpp | 249 ++++++++---------- .../shared/include/single_op_tests/loop.hpp | 4 + .../shared_test_classes/single_op/loop.hpp | 27 ++ .../src/single_op/loop.cpp | 133 +++++++++- 4 files changed, 280 insertions(+), 133 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/loop.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/loop.cpp index cf891a9d7e4022..6aa4188a189258 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/loop.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/loop.cpp @@ -2,139 +2,124 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include -#include "single_layer_tests/loop.hpp" +#include "single_op_tests/loop.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace InferenceEngine; - namespace { - std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::I32 - }; - - std::map netConfigurations = { - {GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO} - }; - - static const std::vector> static_loop_types_axis_0 { - // GCC4.8 limitation: have to specify type of each element in list - // static_trip_count | max | dynamic_exit | axis - std::tuple{ true , 10, -1, 0 }, // n_iter 10, no dynamic exit - }; - - std::vector inputs_0 = { - {1, 4, 2} - }; - - INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_axis_0, StaticShapeLoopTest, - testing::Combine( - /* unrolling */ testing::ValuesIn(std::vector{false}), - /* static_continue_cond */ testing::Values(true), - /* args_papck */ testing::ValuesIn(static_loop_types_axis_0), - /* start_value */ testing::Values(0), - /* data_shape */ testing::ValuesIn(inputs_0), - /* data_prc */ testing::ValuesIn(netPrecisions), - /* device */ testing::Values(ov::test::utils::DEVICE_GPU), - /* configuration */ testing::Values>(netConfigurations)), - StaticShapeLoopTest::getTestCaseName); - - static const std::vector> static_loop_types_1 { - // GCC4.8 limitation: have to specify type of each element in list - // static_trip_count | max | dynamic_exit | axis - std::tuple{ true , 5, -1, 1 }, // n_iter 5, no dynamic exit - }; - - std::vector inputs_1 = { - {2, 1, 4, 6} - }; - - INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_axis_1, StaticShapeLoopTest, - testing::Combine( - /* unrolling */ testing::ValuesIn(std::vector{false}), - /* static_continue_cond */ testing::Values(true), - /* args_papck */ testing::ValuesIn(static_loop_types_1), - /* start_value */ testing::Values(0), - /* data_shape */ testing::ValuesIn(inputs_1), - /* data_prc */ testing::ValuesIn(netPrecisions), - /* device */ testing::Values(ov::test::utils::DEVICE_GPU), - /* configuration */ testing::Values>(netConfigurations)), - StaticShapeLoopTest::getTestCaseName); - - static const std::vector> static_loop_types_2 { - // GCC4.8 limitation: have to specify type of each element in list - // static_trip_count | max | dynamic_exit | axis - std::tuple{ true , 10, -1, 2 }, // n_iter 10, no dynamic exit - }; - - std::vector inputs_2 = { - {2, 4, 1, 6} - }; - - INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_axis_2, StaticShapeLoopTest, - testing::Combine( - /* unrolling */ testing::ValuesIn(std::vector{false}), - /* static_continue_cond */ testing::Values(true), - /* args_papck */ testing::ValuesIn(static_loop_types_2), - /* start_value */ testing::Values(0), - /* data_shape */ testing::ValuesIn(inputs_2), - /* data_prc */ testing::ValuesIn(netPrecisions), - /* device */ testing::Values(ov::test::utils::DEVICE_GPU), - /* configuration */ testing::Values>(netConfigurations)), - StaticShapeLoopTest::getTestCaseName); - - static const std::vector> static_loop_types_no_auto_concat { - // GCC4.8 limitation: have to specify type of each element in list - // static_trip_count | max | dynamic_exit | axis - std::tuple{ true , 10, -1, -1 }, // n_iter 5, no dynamic exit - }; - - std::vector inputs_no_auto_concat = { - {4, 20, 12} - }; - - INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_no_auto_concat, StaticShapeLoopTest, - testing::Combine( - /* unrolling */ testing::ValuesIn(std::vector{false}), - /* static_continue_cond */ testing::Values(true), - /* args_papck */ testing::ValuesIn(static_loop_types_no_auto_concat), - /* start_value */ testing::Values(0), - /* data_shape */ testing::ValuesIn(inputs_no_auto_concat), - /* data_prc */ testing::ValuesIn(netPrecisions), - /* device */ testing::Values(ov::test::utils::DEVICE_GPU), - /* configuration */ testing::Values>(netConfigurations)), - StaticShapeLoopTest::getTestCaseName); - - static const std::vector> static_loop_types_dynamic_exit { - // GCC4.8 limitation: have to specify type of each element in list - // static_trip_count | max | dynamic_exit | axis - std::tuple{ true , 5, 3, -1 }, // n_iter 3, dynamic exit on 3 - std::tuple{ true , 5, 7, 1 }, // n_iter 5, dynamic exit not reached - std::tuple{ true , -1, 5, -1 }, // n_iter 5, inf loop with dynamic exit on 5 - std::tuple{ false , 5, 3, -1 }, // | same with dynamic trip count - std::tuple{ false , 5, 7, 1 }, // | - std::tuple{ false , -1, 5, -1 } // | - }; - - std::vector inputs_dynamic_exit = { - {4, 1, 2} - }; - - INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_dynamic_exit, StaticShapeLoopTest, - testing::Combine( - /* unrolling */ testing::ValuesIn(std::vector{false}), - /* static_continue_cond */ testing::Values(true), - /* args_papck */ testing::ValuesIn(static_loop_types_dynamic_exit), - /* start_value */ testing::Values(0), - /* data_shape */ testing::ValuesIn(inputs_dynamic_exit), - /* data_prc */ testing::ValuesIn(netPrecisions), - /* device */ testing::Values(ov::test::utils::DEVICE_GPU), - /* configuration */ testing::Values>(netConfigurations)), - StaticShapeLoopTest::getTestCaseName); - +using ov::test::StaticShapeLoopLayerTest; + +std::vector model_types = { + ov::element::f32, + ov::element::i32 +}; + +static const std::vector> static_loop_types_axis_0 { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 10, -1, 0 }, // n_iter 10, no dynamic exit +}; + +std::vector inputs_0 = { + {1, 4, 2} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_axis_0, StaticShapeLoopLayerTest, + testing::Combine( + /* unrolling */ testing::ValuesIn(std::vector{false}), + /* static_continue_cond */ testing::Values(true), + /* args_papck */ testing::ValuesIn(static_loop_types_axis_0), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_0), + /* data_prc */ testing::ValuesIn(model_types), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU)), + StaticShapeLoopLayerTest::getTestCaseName); + +static const std::vector> static_loop_types_1 { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 5, -1, 1 }, // n_iter 5, no dynamic exit +}; + +std::vector inputs_1 = { + {2, 1, 4, 6} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_axis_1, StaticShapeLoopLayerTest, + testing::Combine( + /* unrolling */ testing::ValuesIn(std::vector{false}), + /* static_continue_cond */ testing::Values(true), + /* args_papck */ testing::ValuesIn(static_loop_types_1), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_1), + /* data_prc */ testing::ValuesIn(model_types), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU)), + StaticShapeLoopLayerTest::getTestCaseName); + +static const std::vector> static_loop_types_2 { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 10, -1, 2 }, // n_iter 10, no dynamic exit +}; + +std::vector inputs_2 = { + {2, 4, 1, 6} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_axis_2, StaticShapeLoopLayerTest, + testing::Combine( + /* unrolling */ testing::ValuesIn(std::vector{false}), + /* static_continue_cond */ testing::Values(true), + /* args_papck */ testing::ValuesIn(static_loop_types_2), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_2), + /* data_prc */ testing::ValuesIn(model_types), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU)), + StaticShapeLoopLayerTest::getTestCaseName); + +static const std::vector> static_loop_types_no_auto_concat { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 10, -1, -1 }, // n_iter 5, no dynamic exit +}; + +std::vector inputs_no_auto_concat = { + {4, 20, 12} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_no_auto_concat, StaticShapeLoopLayerTest, + testing::Combine( + /* unrolling */ testing::ValuesIn(std::vector{false}), + /* static_continue_cond */ testing::Values(true), + /* args_papck */ testing::ValuesIn(static_loop_types_no_auto_concat), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_no_auto_concat), + /* data_prc */ testing::ValuesIn(model_types), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU)), + StaticShapeLoopLayerTest::getTestCaseName); + +static const std::vector> static_loop_types_dynamic_exit { + // GCC4.8 limitation: have to specify type of each element in list + // static_trip_count | max | dynamic_exit | axis + std::tuple{ true , 5, 3, -1 }, // n_iter 3, dynamic exit on 3 + std::tuple{ true , 5, 7, 1 }, // n_iter 5, dynamic exit not reached + std::tuple{ true , -1, 5, -1 }, // n_iter 5, inf loop with dynamic exit on 5 + std::tuple{ false , 5, 3, -1 }, // | same with dynamic trip count + std::tuple{ false , 5, 7, 1 }, // | + std::tuple{ false , -1, 5, -1 } // | +}; + +std::vector inputs_dynamic_exit = { + {4, 1, 2} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_StaticShapeLoop_dynamic_exit, StaticShapeLoopLayerTest, + testing::Combine( + /* unrolling */ testing::ValuesIn(std::vector{false}), + /* static_continue_cond */ testing::Values(true), + /* args_papck */ testing::ValuesIn(static_loop_types_dynamic_exit), + /* start_value */ testing::Values(0), + /* data_shape */ testing::ValuesIn(inputs_dynamic_exit), + /* data_prc */ testing::ValuesIn(model_types), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU)), + StaticShapeLoopLayerTest::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/loop.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/loop.hpp index ada7e8b32b8e79..93d6f15e99d1e2 100644 --- a/src/tests/functional/plugin/shared/include/single_op_tests/loop.hpp +++ b/src/tests/functional/plugin/shared/include/single_op_tests/loop.hpp @@ -11,5 +11,9 @@ namespace test { TEST_P(LoopLayerTest, Inference) { run(); } + +TEST_P(StaticShapeLoopLayerTest, Inference) { + run(); +} } // namespace test } // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/loop.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/loop.hpp index c9645d6a5b1c74..dc064f23a16ee9 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/loop.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/loop.hpp @@ -32,6 +32,33 @@ class LoopLayerTest : public testing::WithParamInterface, public: static std::string getTestCaseName(const testing::TestParamInfo &obj); +protected: + void SetUp() override; +}; + +using StaticShapeLoopParams = typename std::tuple< + bool, + bool, + std::tuple< + bool, + int64_t, + int64_t, + int64_t + >, + int64_t, + ov::Shape, + ov::element::Type, + std::string>; + +/** + * Test case with static SHAPE version of loop operation. + * Total iteration count is dynamic. + */ +class StaticShapeLoopLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + protected: void SetUp() override; }; diff --git a/src/tests/functional/shared_test_classes/src/single_op/loop.cpp b/src/tests/functional/shared_test_classes/src/single_op/loop.cpp index d699fa51d8a550..c4d2b79398c6de 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/loop.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/loop.cpp @@ -4,11 +4,15 @@ #include "shared_test_classes/single_op/loop.hpp" -#include "transformations/control_flow/unroll_tensor_iterator.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/result.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/loop.hpp" +#include "openvino/op/less.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/control_flow/unroll_tensor_iterator.hpp" namespace ov { namespace test { @@ -121,5 +125,132 @@ void LoopLayerTest::SetUp() { auto result2 = std::make_shared(out2); function = std::make_shared(ov::ResultVector{result0, result1, result2}, params, "loop"); } + +std::string StaticShapeLoopLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + bool unrolling; + bool static_iter_num; + bool static_continue_cond; + int64_t max_iter_num; + int64_t dynamic_exit; + int64_t axis; + int64_t start_value; + ov::Shape data_shape; + ov::element::Type model_type; + std::string target_device; + auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis); + std::tie( + unrolling, + static_continue_cond, + args_papck, + start_value, + data_shape, + model_type, + target_device) = obj.param; + + std::ostringstream result; + result << "unrolling=" << std::to_string(unrolling) << "_"; + result << "static_iter_num=" << std::to_string(static_iter_num) << "_"; + result << "static_continue_cond=" << std::to_string(static_continue_cond) << "_"; + result << "max_iter_num=" << std::to_string(max_iter_num) << "_"; + result << "dynamic_exit=" << std::to_string(dynamic_exit) << "_"; + result << "axis=" << std::to_string(axis) << "_"; + result << "start_value=" << std::to_string(start_value) << "_"; + result << "max_iter_num=" << std::to_string(max_iter_num) << "_"; + result << "IS=" << ov::test::utils::vec2str(data_shape) << "_"; + result << "modelType=" << model_type.get_type_name() << "_"; + result << "targetDevice=" << target_device << "_"; + + auto res_str = result.str(); + std::replace(res_str.begin(), res_str.end(), '-', '_'); + return res_str; +} + +void StaticShapeLoopLayerTest::SetUp() { + bool unrolling; + bool static_iter_num; + bool static_continue_cond; + int64_t max_iter_num; + int64_t dynamic_exit; + int64_t axis; + int64_t start_value; + ov::Shape data_shape; + ov::element::Type model_type; + auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis); + std::tie( + unrolling, + static_continue_cond, + args_papck, + start_value, + data_shape, + model_type, + targetDevice) = GetParam(); + + const auto ngShape = ov::Shape{data_shape}; + const auto scalarShape = ov::Shape{}; + + ngraph::ParameterVector params{}; + auto cond_input_create = [¶ms] (ov::element::Type model_type, const ov::Shape &shape, int value = 0, bool is_static = false) + -> std::shared_ptr { + if (is_static) + return std::make_shared(model_type, shape, value); + + auto input = std::make_shared(model_type, shape); + params.push_back(input); + return input; + }; + + auto start = cond_input_create(model_type, ngShape); + auto count = cond_input_create(ov::element::i64, scalarShape, max_iter_num, static_iter_num); + auto skip = cond_input_create(ov::element::boolean, scalarShape, true, static_continue_cond); + + // + // count skip start count skip start + // / / + // ___*___*____ __________*___*____ | idx | data | out | + // | idx in | | ex_val idx in | | 0 | 7 | 7 | + // | | / | | | / | / | | 1 | 7 | 8 | + // | add | | less add | | 2 | 8 | 10 | + // | | true | | | | | | 3 | 10 | 13 | + // | | | | | | | | ~~~~~ * * * ~~~~~ + // | out cnd | | cnd out | + // |___*____*___| |____*_____*________| + // Full loop Dynamic exit loop + // n_iter = count n_iter = ex_val + // + auto b_indx = std::make_shared(ov::element::i64, ov::Shape{}); + auto b_data = std::make_shared(model_type, ngShape); + auto b_indx_cast = std::make_shared(b_indx, model_type); + auto b_add = std::make_shared(b_data, b_indx_cast); + + std::shared_ptr b_cond; + if (dynamic_exit == -1) { + b_cond = std::make_shared(ov::element::boolean, ov::Shape{}, true); + } else { + auto b_exit_value = std::make_shared(ov::element::i64, scalarShape, dynamic_exit); + b_cond = std::make_shared(b_indx, b_exit_value); + } + + auto body = std::make_shared( + ov::OutputVector {b_cond, b_add}, // TODO: check with reverse + ov::ParameterVector {b_indx, b_data}); // TODO: check with reverse + + auto loop = std::make_shared(count, skip); + loop->set_function(body); + loop->set_special_body_ports({0, 0}); + loop->set_merged_input(b_data, start, b_add); + if (axis == -1) + loop->get_iter_value(b_add, -1); + else + loop->get_concatenated_slices(b_add, 0, 1, 1, -1, axis); + + function = std::make_shared( + ov::OutputVector {loop}, + params); + if (unrolling) { + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + } +} } // namespace test } // namespace ov From 6e073b11656db75e8494bdd4031a88a1c1a02e1d Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Thu, 9 Nov 2023 08:10:37 +0100 Subject: [PATCH 248/275] [core] Migrate SoftSign operator to new API (#20958) * Align code style * Use Evaluate in place of switch case * Use std::transform in place of for loop --- src/core/include/openvino/op/softsign.hpp | 8 +- .../include/openvino/reference/softsign.hpp | 8 +- src/core/src/op/softsign.cpp | 86 ++++++++----------- 3 files changed, 44 insertions(+), 58 deletions(-) diff --git a/src/core/include/openvino/op/softsign.hpp b/src/core/include/openvino/op/softsign.hpp index 5fd112a4043234..05a2c337b1cb83 100644 --- a/src/core/include/openvino/op/softsign.hpp +++ b/src/core/include/openvino/op/softsign.hpp @@ -22,14 +22,12 @@ class OPENVINO_API SoftSign : public util::UnaryElementwiseArithmetic { /// SoftSign(const Output& arg); - bool visit_attributes(AttributeVisitor& visitor) override; - void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - bool evaluate(ov::TensorVector& outputs, - const ov::TensorVector& inputs, - const ov::EvaluationContext& evaluation_context) const override; + bool evaluate(TensorVector& outputs, + const TensorVector& inputs, + const EvaluationContext& evaluation_context) const override; bool has_evaluate() const override; }; } // namespace v9 diff --git a/src/core/reference/include/openvino/reference/softsign.hpp b/src/core/reference/include/openvino/reference/softsign.hpp index 59cd44f7d47514..4105386ffec388 100644 --- a/src/core/reference/include/openvino/reference/softsign.hpp +++ b/src/core/reference/include/openvino/reference/softsign.hpp @@ -4,16 +4,16 @@ #pragma once +#include #include -#include namespace ov { namespace reference { template void softsign(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = arg[i] / (1 + std::abs(arg[i])); - } + std::transform(arg, arg + count, out, [](const T v) { + return v / (T{1} + static_cast(std::abs(v))); + }); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/softsign.cpp b/src/core/src/op/softsign.cpp index fdb90e97f88fe0..733b193b7248a4 100644 --- a/src/core/src/op/softsign.cpp +++ b/src/core/src/op/softsign.cpp @@ -3,44 +3,33 @@ // #include "openvino/op/softsign.hpp" -#include - +#include "element_visitor.hpp" #include "itt.hpp" -#include "openvino/core/attribute_visitor.hpp" #include "openvino/core/shape_util.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/reference/softsign.hpp" #include "openvino/runtime/tensor.hpp" -namespace { -template -inline bool evaluate(const ov::Tensor& arg, const ov::Tensor& out, const size_t count) { - using T = typename ov::element_type_traits::value_type; - ov::reference::softsign(arg.data(), out.data(), count); - return true; -} - -bool evaluate_softsign(const ov::Tensor& arg, const ov::Tensor& out) { - bool rc = true; - size_t count = arg.get_size(); +namespace ov { +namespace op { +namespace softsign { +struct Evaluate : element::NoAction { + using element::NoAction::visit; - switch (arg.get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_softsign, bf16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_softsign, f16, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_softsign, f32, arg, out, count); - OPENVINO_TYPE_CASE(evaluate_softsign, f64, arg, out, count); - default: - rc = false; - break; + template > + static result_type visit(const Tensor& in, Tensor& out, const size_t count) { + reference::softsign(in.data(), out.data(), count); + return true; } - return rc; -} -} // namespace +}; -ov::op::v9::SoftSign::SoftSign(const Output& arg) : UnaryElementwiseArithmetic(arg) { +} // namespace softsign +namespace v9 { +SoftSign::SoftSign(const Output& arg) : UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -void ov::op::v9::SoftSign::validate_and_infer_types() { +void SoftSign::validate_and_infer_types() { OV_OP_SCOPE(v9_SoftSign_validate_and_infer_types); const element::Type& input_et = get_input_element_type(0); @@ -52,34 +41,28 @@ void ov::op::v9::SoftSign::validate_and_infer_types() { UnaryElementwiseArithmetic::validate_and_infer_types(); } -bool ov::op::v9::SoftSign::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v9_SoftSign_visit_attributes); - return true; -} - -std::shared_ptr ov::op::v9::SoftSign::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr SoftSign::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v9_SoftSign_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0)); + return std::make_shared(new_args.at(0)); } -bool ov::op::v9::SoftSign::has_evaluate() const { +bool SoftSign::has_evaluate() const { OV_OP_SCOPE(v9_SoftSign_has_evaluate); switch (get_input_element_type(0)) { - case ov::element::bf16: - case ov::element::f16: - case ov::element::f32: - case ov::element::f64: + case element::bf16: + case element::f16: + case element::f32: + case element::f64: return true; default: - break; + return false; } - return false; } -bool ov::op::v9::SoftSign::evaluate(ov::TensorVector& outputs, - const ov::TensorVector& inputs, - const ov::EvaluationContext& evaluation_context) const { +bool SoftSign::evaluate(TensorVector& outputs, + const TensorVector& inputs, + const EvaluationContext& evaluation_context) const { OV_OP_SCOPE(v9_SoftSign_evaluate); OPENVINO_ASSERT(outputs.size() == 1 && inputs.size() == 1, @@ -89,9 +72,14 @@ bool ov::op::v9::SoftSign::evaluate(ov::TensorVector& outputs, outputs.size(), " output(s)."); - const auto& in = inputs[0]; - auto& out = outputs[0]; - - out.set_shape(in.get_shape()); - return evaluate_softsign(in, out); + const auto& input_shape = inputs[0].get_shape(); + outputs[0].set_shape(input_shape); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(input_shape)); } +} // namespace v9 +} // namespace op +} // namespace ov From 000966660ca80a2bd5fe233f9d727edfe6dd5875 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Thu, 9 Nov 2023 08:38:58 +0100 Subject: [PATCH 249/275] Smart CI POC (#19825) * Try using a custom action directly from repo * Run smart CI under ubuntu-latest * Set output + add a sample step * Update linux.yml * Add components.yml * Add some conditions * Just to check if reference to "needs" work in job context * Update linux.yml * More example cases * Dummy change to CPU * Fix typo * Fix SAMPLES_AFFECTED variable * Use more correct dependents key * Fighting with messy GHA conditions * No brackets and no double quotes in conditions * Revert "Dummy change to CPU" This reverts commit 4eae09e5b51816a63f77c868276e8edc4869f816. * Use refactored action * Move action implementation to openvino repo * Extend components.yml config * Update labeler.yml * Dummy change to TF FE * Fix indentation * Add missing needs * Add missing records * Allow missing records for components in validation * install_openvino_dependencies as a separate step for Python_Unit_Tests * Improve config validation * Revert "Dummy change to TF FE" This reverts commit 01190864d1001c21ccc6d228ba445f4d94d8fc62. * Dummy change to model hub tests * Update CPU component config * Dummy change to Python API * Dummy change to Python API * Revert "Dummy change to Python API" This reverts commit 3fce0bb3fbdbdd0fcb70d65bf6cae0d06be1a881. * Dummy change to Python API * Simplify conditions. Cover "no components changed" case * Update components.yml * Update .gitignore * Revert "Dummy change to Python API" This reverts commit e57ea9852c6bcfd6091ff1db3ae7c8e285a9b9f4. * Fix dependencies scopes * Add simple unit tests for smart ci functionality * Revert "Dummy change to model hub tests" This reverts commit c3d6837e224117eb70083297aabd6a978aa1632d. * Use ghapi module with permissive license * Cover install_build_dependencies.sh script by labeler * More labels * Use ghapi. Apply review comments * Enable dot files to be matched by labeler * Warning instead of error in artifacts upload where smart ci is enabled * Fix master merge * Fix condition for TF FE common tests * Fix condition for Pytorch FE tests * Remove condition for pytorch model tests * Allow any label as a component * Refactor tests log handling * Allow any defined label as a component * Rearrange config structure. Fill the config with actual data * Run full scope on changes to non-matching files * Add missing conditions --------- Co-authored-by: Ilya Lavrenov --- .github/actions/smart-ci/action.yml | 86 ++++++++ .../actions/smart-ci/components_schema.yml | 35 ++++ .github/actions/smart-ci/requirements.txt | 3 + .github/actions/smart-ci/smart_ci.py | 197 +++++++++++++++++ .github/actions/smart-ci/smart_ci_test.py | 198 ++++++++++++++++++ .github/components.yml | 198 ++++++++++++++++++ .github/workflows/linux.yml | 90 +++++++- .gitignore | 1 + 8 files changed, 798 insertions(+), 10 deletions(-) create mode 100644 .github/actions/smart-ci/action.yml create mode 100644 .github/actions/smart-ci/components_schema.yml create mode 100644 .github/actions/smart-ci/requirements.txt create mode 100644 .github/actions/smart-ci/smart_ci.py create mode 100644 .github/actions/smart-ci/smart_ci_test.py create mode 100644 .github/components.yml diff --git a/.github/actions/smart-ci/action.yml b/.github/actions/smart-ci/action.yml new file mode 100644 index 00000000000000..ff04d53f943e02 --- /dev/null +++ b/.github/actions/smart-ci/action.yml @@ -0,0 +1,86 @@ +name: "Smart CI action" +description: "Returns product components affected by PR or commit" +inputs: + repository: + description: "GitHub repository" + required: true + repo_token: + description: "Token for access to GitHub repository" + required: true + pr: + description: "GitHub PR number. If not set - commit is used" + required: false + commit_sha: + description: "GitHub commit hash. Used if no PR number is set" + required: false + component_pattern: + description: "Pattern to extract component name from PR label. If not set, any label is considered a component name" + required: false + labeler_check_name: + description: "Name of the labeler check" + required: false + default: "triage" + components_config: + description: "Path to components configuration file" + required: false + default: ".github/components.yml" + components_config_schema: + description: "Path to the schema file for components configuration" + required: false + default: ".github/actions/smart-ci/components_schema.yml" + labeler_config: + description: "Path to labeler configuration file" + required: false + default: ".github/labeler.yml" + +outputs: + all_components: + description: "All components listed in configuration" + value: ${{ steps.smart_ci.outputs.all_components }} + affected_components: + description: "Affected components to run validation for and their validation scope" + value: ${{ steps.smart_ci.outputs.affected_components }} + +runs: + using: "composite" + steps: + - name: Wait for labeler to finish + uses: lewagon/wait-on-check-action@v1.3.1 + if: ${{ github.event_name == 'pull_request' }} + with: + ref: ${{ github.event.pull_request.head.sha }} + check-name: ${{ inputs.labeler_check_name }} + repo-token: ${{ inputs.repo_token }} + wait-interval: 10 + + - name: checkout components file + uses: actions/checkout@v4 + with: + sparse-checkout: .github/components.yml + sparse-checkout-cone-mode: false + + - name: Install Python dependencies + uses: py-actions/py-dependency-install@v4 + with: + path: "${{ github.action_path }}/requirements.txt" + update-setuptools: "false" + update-wheel: "false" + + - name: Test functionality + run: | + python ${{ github.action_path }}/smart_ci_test.py + shell: bash + + - name: Smart CI + id: smart_ci + run: | + python ${{ github.action_path }}/smart_ci.py \ + $([[ -n "${{ inputs.pr }}" ]] && echo '--pr ${{ inputs.pr }}' || echo '-s ${{ inputs.commit_sha }}') \ + -r ${{ inputs.repository }} \ + -p "${{ inputs.component_pattern }}" \ + -c "${{ inputs.components_config }}" \ + -m "${{ inputs.components_config_schema }}" \ + -l "${{ inputs.labeler_config }}" + shell: bash + env: + GITHUB_TOKEN: ${{ inputs.repo_token }} diff --git a/.github/actions/smart-ci/components_schema.yml b/.github/actions/smart-ci/components_schema.yml new file mode 100644 index 00000000000000..7e9d0e4483d346 --- /dev/null +++ b/.github/actions/smart-ci/components_schema.yml @@ -0,0 +1,35 @@ +# YAML schema for Smart CI configuration file components.yml (see https://json-schema.org) + +definitions: + component_name: + type: string + pattern: "^[a-zA-Z_][a-zA-Z0-9_]*$" + + component_data: + type: object # dict + additionalProperties: false + properties: + cmake: + type: array + uniqueItems: true + items: + '$ref': '#/definitions/component_name' + + revalidate: + type: array + uniqueItems: true + items: + '$ref': '#/definitions/component_name' + + build: + type: array + uniqueItems: true + items: + '$ref': '#/definitions/component_name' + +propertyNames: # Validates component names + '$ref': '#/definitions/component_name' +patternProperties: + ".*": # Component (name validated via propertyNames) + '$ref': '#/definitions/component_data' +additionalProperties: false diff --git a/.github/actions/smart-ci/requirements.txt b/.github/actions/smart-ci/requirements.txt new file mode 100644 index 00000000000000..0b63ed633222cb --- /dev/null +++ b/.github/actions/smart-ci/requirements.txt @@ -0,0 +1,3 @@ +ghapi~=1.0.4 +pyyaml~=6.0.1 +jsonschema~=4.19.1 \ No newline at end of file diff --git a/.github/actions/smart-ci/smart_ci.py b/.github/actions/smart-ci/smart_ci.py new file mode 100644 index 00000000000000..3430519f7176d3 --- /dev/null +++ b/.github/actions/smart-ci/smart_ci.py @@ -0,0 +1,197 @@ +import os +import re +import argparse +import yaml +import json +import jsonschema +import logging +from pathlib import Path +from ghapi.all import GhApi + + +class ComponentConfig: + FullScope = {'build', 'test'} + ScopeKeys = {'build', 'revalidate'} + + def __init__(self, config: dict, schema: dict, all_possible_components: set): + self.config = config + self.log = logging.getLogger(self.__class__.__name__) + self.all_defined_components = set(self.config.keys()) # already defined in components.yml + self.all_possible_components = all_possible_components # can be added to components.yml (based on labeler.yml) + + self.validate(schema, all_possible_components) + + def validate(self, schema: dict, all_possible_components: set) -> None: + """Validates syntax of configuration file""" + jsonschema.validate(self.config, schema) + + invalid_components = self.all_defined_components.difference(all_possible_components) + if invalid_components: + error_msg = f"components are invalid: " \ + f"{invalid_components} are not listed in labeler config: {all_possible_components}" + raise jsonschema.exceptions.ValidationError(error_msg) + + for component_name, data in self.config.items(): + dependent_components = set(data.get('dependent_components', dict()).keys()) if data else set() + + invalid_dependents = dependent_components.difference(all_possible_components) + if invalid_dependents: + error_msg = f"dependent_components of {component_name} are invalid: " \ + f"{invalid_dependents} are not listed in components config: {all_possible_components}" + raise jsonschema.exceptions.ValidationError(error_msg) + + def get_affected_components(self, changed_components_names: set) -> dict: + """Returns changed components, their dependencies and validation scope for them""" + affected_components = dict() + + # If some changed components were not defined in config or no changed components detected at all, + # run full scope for everything (just in case) + changed_not_defined_components = changed_components_names.difference(self.all_defined_components) + if not changed_components_names or changed_not_defined_components: + self.log.info(f"Changed components {changed_not_defined_components} are not defined in smart ci config, " + "run full scope") + affected_components.update({name: self.FullScope for name in self.all_possible_components}) + return affected_components + + # Else check changed components' dependencies and add them to affected + for name in changed_components_names: + component_scopes = {k: v for k, v in self.config.get(name, dict()).items() if k in self.ScopeKeys} + for key, dependents in component_scopes.items(): + for dep_name in dependents: + affected_components[dep_name] = affected_components.get(dep_name, set()) + scope = self.FullScope if key == 'revalidate' else {key} + affected_components[dep_name] = affected_components[dep_name].union(scope) + + if not component_scopes: + self.log.info(f"Changed component '{name}' doesn't have {self.ScopeKeys} keys in components config. " + f"Assuming that it affects everything, the whole scope will be started") + for dep_name in self.all_possible_components: + affected_components[dep_name] = self.FullScope + + # If the component was explicitly changed, run full scope for it + affected_components.update({name: self.FullScope for name in changed_components_names}) + self.log.info(f"Changed components with dependencies: {affected_components}") + + # For non-affected components that are not defined in config - run full scope + affected_components.update({name: self.FullScope for name in self.all_possible_components + if name not in self.all_defined_components}) + + return affected_components + + def get_static_data(self, components_names: set, data_key: str, default: str = None) -> dict: + """Returns requested generic static data defined for each component""" + data = {name: self.config[name].get(data_key, default) for name in components_names} + return data + + +def component_name_from_label(label: str, component_pattern: str = None) -> str: + """Extracts component name from label""" + component = label + if component_pattern: + matches = re.findall(component_pattern, label) + component = matches[0] if matches else None + component = component.replace(' ', '_') if component else None + return component + + +def get_changed_component_names(pr, all_possible_components: set, component_pattern: str = None) -> set: + """Returns component names changed in a given PR""" + components = set() + for label in pr.labels: + component = component_name_from_label(label.name, component_pattern) + if component: + components.add(component) + elif label.name in all_possible_components: + # Allow any labels defined explicitly in labeler config as components + # (predefined labels, such as "do not merge", are still ignored) + components.add(label.name) + + return components + + +def parse_args(): + parser = argparse.ArgumentParser(description='Returns product components changed in a given PR or commit') + parser.add_argument('--pr', type=int, required=False, help='PR number. If not set, --commit is used') + parser.add_argument('-s', '--commit-sha', required=False, help='Commit SHA. If not set, --pr is used') + parser.add_argument('-r', '--repo', help='GitHub repository') + parser.add_argument('-p', '--pattern', default=None, help='Pattern to extract component name from PR label. ' + 'If not set, any label is considered a component name') + parser.add_argument('-c', '--components-config', default='.github/components.yml', + help='Path to config file with info about dependencies between components') + parser.add_argument('-m', '--components-config-schema', default='.github/actions/smart-ci/components_schema.yml', + help='Path to the schema file for components config') + parser.add_argument('-l', '--labeler-config', default='.github/labeler.yml', + help='Path to PR labeler config file') + args = parser.parse_args() + return args + + +def init_logger(): + logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(name)-15s %(levelname)-8s %(message)s', + datefmt='%m-%d-%Y %H:%M:%S') + + +def set_github_output(name: str, value: str, github_output_var_name: str = 'GITHUB_OUTPUT'): + """Sets output variable for a GitHub Action""" + logger = logging.getLogger(__name__) + # In an environment variable "GITHUB_OUTPUT" GHA stores path to a file to write outputs to + with open(os.environ.get(github_output_var_name), 'a+') as file: + logger.info(f"Add {name}={value} to {github_output_var_name}") + print(f'{name}={value}', file=file) + + +def main(): + init_logger() + logger = logging.getLogger(__name__) + args = parse_args() + for arg, value in sorted(vars(args).items()): + logger.info(f"Argument {arg}: {value}") + + with open(Path(args.components_config), 'r') as config: + components_config = yaml.safe_load(config) + + owner, repository = args.repo.split('/') + gh_api = GhApi(owner=owner, repo=repository, token=os.getenv("GITHUB_TOKEN")) + pr = gh_api.pulls.get(args.pr) if args.pr else None + + with open(Path(args.components_config_schema), 'r') as schema_file: + schema = yaml.safe_load(schema_file) + + with open(Path(args.labeler_config), 'r') as labeler_file: + labeler_config = yaml.safe_load(labeler_file) + + all_possible_components = set() + for label in labeler_config.keys(): + component_name = component_name_from_label(label, args.pattern) + all_possible_components.add(component_name if component_name else label) + + # For now, we don't want to apply smart ci rules for post-commits + is_postcommit = not pr + if is_postcommit: + logger.info(f"The run is a post-commit run, executing full validation scope for all components") + + no_match_files_changed = 'no-match-files' in [label.name for label in pr.labels] + if no_match_files_changed: + logger.info(f"There are changed files that don't match any pattern in labeler config, " + f"executing full validation scope for all components") + + run_full_scope = is_postcommit or no_match_files_changed + + # In post-commits - validate all components regardless of changeset + # In pre-commits - validate only changed components with their dependencies + all_defined_components = components_config.keys() + changed_component_names = set(all_defined_components) if run_full_scope else \ + get_changed_component_names(pr, all_possible_components, args.pattern) + logger.info(f"changed_component_names: {changed_component_names}") + + cfg = ComponentConfig(components_config, schema, all_possible_components) + affected_components = cfg.get_affected_components(changed_component_names) + + # Syntactic sugar for easier use in GHA pipeline + affected_components_output = {name: {s: True for s in scope} for name, scope in affected_components.items()} + set_github_output("affected_components", json.dumps(affected_components_output)) + + +if __name__ == '__main__': + main() diff --git a/.github/actions/smart-ci/smart_ci_test.py b/.github/actions/smart-ci/smart_ci_test.py new file mode 100644 index 00000000000000..2c9aa1e4644a6a --- /dev/null +++ b/.github/actions/smart-ci/smart_ci_test.py @@ -0,0 +1,198 @@ +import logging +import sys +import unittest +from smart_ci import ComponentConfig + +log = logging.getLogger() +log.level = logging.DEBUG + + +def log_handler(func): + def wrapper(*args, **kwargs): + stream_handler = logging.StreamHandler(sys.stdout) + log.addHandler(stream_handler) + result = func(*args, **kwargs) + log.removeHandler(stream_handler) + return result + return wrapper + + +class TestComponentConfig(unittest.TestCase): + def setUp(self): + self.all_possible_components = {'comp1', 'comp2', 'comp3', 'comp4'} + ComponentConfig.ScopeKeys = {'build', 'revalidate', '_scope_1', '_scope_2', '_scope_3'} + + @log_handler + def validate(self, config_data: dict, changed_components: set, expected_result: dict): + log.info(f"{self._testMethodName}:") + config = ComponentConfig(config_data, {}, self.all_possible_components) + result = config.get_affected_components(changed_components) + self.assertEqual(expected_result, result) + + def test_no_changed_components(self): + config_data = { + 'comp1': {'build': {}, 'revalidate': {}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = set() + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': ComponentConfig.FullScope, + 'comp3': ComponentConfig.FullScope, + 'comp4': ComponentConfig.FullScope, + } + self.validate(config_data, changed_components, expected_result) + + def test_all_components_changed(self): + config_data = { + 'comp1': {'build': {}, 'revalidate': {}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1', 'comp2', 'comp3', 'comp4'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': ComponentConfig.FullScope, + 'comp3': ComponentConfig.FullScope, + 'comp4': ComponentConfig.FullScope, + } + self.validate(config_data, changed_components, expected_result) + + def test_changed_component_not_defined(self): + config_data = { + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': ComponentConfig.FullScope, + 'comp3': ComponentConfig.FullScope, + 'comp4': ComponentConfig.FullScope, + } + self.validate(config_data, changed_components, expected_result) + + def test_component_changed_no_scope_keys(self): + config_data = { + 'comp1': {}, + 'comp2': {}, + 'comp3': {}, + 'comp4': {}, + } + changed_components = {'comp1'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': ComponentConfig.FullScope, + 'comp3': ComponentConfig.FullScope, + 'comp4': ComponentConfig.FullScope, + } + self.validate(config_data, changed_components, expected_result) + + def test_one_component_changed_dependents_empty(self): + config_data = { + 'comp1': {'build': {}, 'revalidate': {}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + } + self.validate(config_data, changed_components, expected_result) + + def test_not_changed_dependent_component(self): + config_data = { + 'comp1': {'build': {'comp2'}, 'revalidate': {}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': {'build'} + } + self.validate(config_data, changed_components, expected_result) + + def test_changed_dependent_component(self): + config_data = { + 'comp1': {'build': {'comp2'}, 'revalidate': {}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1', 'comp2'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': ComponentConfig.FullScope + } + self.validate(config_data, changed_components, expected_result) + + def test_dependent_component_multiple_parents(self): + config_data = { + 'comp1': {'_scope_1': {'comp2'}, 'revalidate': {}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, '_scope_2': {'comp2'}, '_scope_3': {'comp2'}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1', 'comp3'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': {'_scope_1', '_scope_2', '_scope_3'}, + 'comp3': ComponentConfig.FullScope + } + self.validate(config_data, changed_components, expected_result) + + def test_dependent_component_empty_scopes(self): + config_data = { + 'comp1': {'build': {}, 'revalidate': {'comp2'}}, + 'comp2': {}, + 'comp3': {}, + 'comp4': {}, + } + changed_components = {'comp1', 'comp3'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': ComponentConfig.FullScope, + 'comp3': ComponentConfig.FullScope, + 'comp4': ComponentConfig.FullScope + } + self.validate(config_data, changed_components, expected_result) + + def test_changed_component_empty_dependencies(self): + config_data = { + 'comp1': {'build': {}, 'revalidate': {}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + } + self.validate(config_data, changed_components, expected_result) + + def test_multiple_dependents(self): + config_data = { + 'comp1': {'build': {'comp2'}, 'revalidate': {'comp3'}}, + 'comp2': {'build': {}, 'revalidate': {}}, + 'comp3': {'build': {'comp4'}, 'revalidate': {}}, + 'comp4': {'build': {}, 'revalidate': {}}, + } + changed_components = {'comp1'} + expected_result = { + 'comp1': ComponentConfig.FullScope, + 'comp2': {'build'}, + 'comp3': ComponentConfig.FullScope, + # We don't consider dependencies of dependencies affected, so comp4 is not expected here + } + self.validate(config_data, changed_components, expected_result) + + +if __name__ == '__main__': + unittest.main(verbosity=2) diff --git a/.github/components.yml b/.github/components.yml new file mode 100644 index 00000000000000..e414e61443d989 --- /dev/null +++ b/.github/components.yml @@ -0,0 +1,198 @@ +CPU: + revalidate: + - C_API + - Python_API + - samples + build: + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - IR_FE + +GPU: + build: + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - IR_FE + +GNA: + build: + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - IR_FE + +HETERO: + revalidate: + - CPU + - GPU + - GNA + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - C_API + - Python_API + build: + - IR_FE + +AUTO_BATCH: + revalidate: + - CPU + - GPU + - GNA + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - C_API + - Python_API + build: + - IR_FE + +TEMPLATE: + revalidate: + - CPU + - GPU + - GNA + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - C_API + - Python_API + build: + - IR_FE + +AUTO: + revalidate: + - CPU + - GPU + - GNA + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - C_API + - Python_API + build: + - IR_FE + +IR_FE: + revalidate: + - C_API + - Python_API + - samples + build: + - CPU + +ONNX_FE: + revalidate: + - MO + build: + - CPU + - Python_API + +PDPD_FE: + revalidate: + - MO + build: + - CPU + - Python_API + +TF_FE: + revalidate: + - MO + build: + - CPU + - Python_API + +TFL_FE: + revalidate: + - MO + build: + - CPU + - Python_API + +PyTorch_FE: + revalidate: + - MO + build: + - CPU + - Python_API + +C_API: + build: + - CPU + - HETERO + - AUTO_BATCH + - AUTO + - IR_FE + +Python_API: + revalidate: + - samples + - MO + - POT + - tools + build: + - CPU + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + - IR_FE + - ONNX_FE + - PDPD_FE + - TF_FE + - TFL_FE + - PyTorch_FE + +samples: + build: + - CPU + - AUTO_BATCH + - AUTO + - IR_FE + - C_API + - Python_API + +IE_Tests: + revalidate: + - CPU + - GPU + - GNA + - HETERO + - AUTO_BATCH + - TEMPLATE + - AUTO + build: + - IR_FE + +MO: + revalidate: + - POT + build: + - Python_API + +POT: + build: + - CPU + - Python_API + +tools: + build: + - CPU + - Python_API + +docs: + revalidate: [] + build: [] + +licensing: + revalidate: [] + build: [] diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ec23f7aba5aa33..51c8146ae3bf07 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -34,6 +34,31 @@ env: PYTHON_VERSION: '3.11' jobs: + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + steps: + - name: checkout action + uses: actions/checkout@v4 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.pull_request.number }} + commit_sha: ${{ github.sha }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Show affected components + run: | + echo "${{ toJSON(steps.smart_ci.outputs.affected_components) }}" + shell: bash + Build: timeout-minutes: 150 defaults: @@ -303,7 +328,7 @@ jobs: ovc --help Samples: - needs: Build + needs: [Build, Smart_CI] timeout-minutes: 20 defaults: run: @@ -316,6 +341,7 @@ jobs: INSTALL_DIR: /__w/openvino/openvino/install INSTALL_TEST_DIR: /__w/openvino/openvino/install/tests BUILD_DIR: /__w/openvino/openvino/build + if: fromJSON(needs.smart_ci.outputs.affected_components).samples steps: - name: Download OpenVINO package @@ -381,6 +407,7 @@ jobs: # - name: Samples tests + if: fromJSON(needs.smart_ci.outputs.affected_components).samples.test run: | export WORKSPACE=${INSTALL_DIR} export IE_APP_PATH=${INSTALL_DIR}/samples_bin @@ -402,7 +429,7 @@ jobs: with: name: test-results-samples path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml - if-no-files-found: 'error' + if-no-files-found: 'warn' Conformance: needs: Build @@ -646,7 +673,7 @@ jobs: CXX_Unit_Tests: name: C++ unit tests - needs: Build + needs: [Build, Smart_CI] timeout-minutes: 20 defaults: run: @@ -706,6 +733,7 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-InferenceUnit.xml - name: Low Precision Transformations Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).LP_transformations.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_lp_transformations_tests --gtest_print_time=1 \ @@ -718,6 +746,7 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ConditionalCompilation.xml - name: IR frontend tests + if: fromJSON(needs.smart_ci.outputs.affected_components).IR_FE.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_ir_frontend_tests --gtest_print_time=1 \ @@ -731,6 +760,7 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-PaddleTests.xml - name: ONNX frontend tests + if: fromJSON(needs.smart_ci.outputs.affected_components).ONNX_FE.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_onnx_frontend_tests --gtest_print_time=1 \ @@ -738,30 +768,36 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ONNXFrontend.xml - name: TensorFlow Common frontend tests + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test || + fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_tensorflow_common_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TensorFlowCommonFrontend.xml - name: TensorFlow frontend tests + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_tensorflow_frontend_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TensorFlowFrontend.xml - name: TensorFlow Lite frontend tests + if: fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_tensorflow_lite_frontend_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TensorFlowLiteFrontend.xml - name: Transformations func tests + if: fromJSON(needs.smart_ci.outputs.affected_components).transformations.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_transformations_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-Transformations.xml - name: Legacy Transformations func tests + if: fromJSON(needs.smart_ci.outputs.affected_components).transformations.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_legacy_transformations_tests --gtest_print_time=1 \ @@ -780,12 +816,14 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CommonUtilTests.xml - name: Snippets func tests + if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_snippets_func_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SnippetsFuncTests.xml - name: CPU plugin unit tests + if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_cpu_unit_tests --gtest_print_time=1 \ @@ -804,18 +842,21 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml - name: AUTO unit tests + if: fromJSON(needs.smart_ci.outputs.affected_components).AUTO.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_unit_tests.xml - name: AUTO func Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).AUTO.test run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_auto_func_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_func_tests.xml - name: Template plugin func tests + if: fromJSON(needs.smart_ci.outputs.affected_components).TEMPLATE.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_template_func_tests --gtest_print_time=1 \ @@ -823,23 +864,27 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateFuncTests.xml - name: Inference Engine C API tests + if: fromJSON(needs.smart_ci.outputs.affected_components).C_API.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/InferenceEngineCAPITests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-InferenceEngineCAPITests.xml - name: OpenVINO C API tests + if: fromJSON(needs.smart_ci.outputs.affected_components).C_API.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_capi_test --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpenVINOCAPITests.xml - name: AutoBatch unit tests + if: fromJSON(needs.smart_ci.outputs.affected_components).AUTO_BATCH.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_batch_unit_tests --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_batch_unit_tests.xml - name: AutoBatch func tests + if: fromJSON(needs.smart_ci.outputs.affected_components).AUTO_BATCH.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_batch_func_tests --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_batch_func_tests.xml @@ -850,11 +895,13 @@ jobs: ${INSTALL_TEST_DIR}/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVProxyTests.xml - name: Hetero unit tests + if: fromJSON(needs.smart_ci.outputs.affected_components).HETERO.test run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroUnitTests.xml - name: Hetero func tests + if: fromJSON(needs.smart_ci.outputs.affected_components).HETERO.test run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml @@ -865,11 +912,11 @@ jobs: with: name: test-results-cpp path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml - if-no-files-found: 'error' + if-no-files-found: 'warn' Python_Unit_Tests: name: Python unit tests - needs: Build + needs: [Build, Smart_CI] timeout-minutes: 40 defaults: run: @@ -926,6 +973,9 @@ jobs: pip-cache-path: ${{ env.PIP_CACHE_PATH }} should-setup-pip-paths: 'true' + - name: Install OpenVINO dependencies + run: ${INSTALL_DIR}/install_dependencies/install_openvino_dependencies.sh -c=core -y + - name: Install OpenVINO Python wheels run: | # Install the core OV wheel @@ -952,6 +1002,7 @@ jobs: # - name: Python API 1.0 Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test run: | python3 -m pytest -s ${INSTALL_TEST_DIR}/pyngraph \ --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ @@ -959,6 +1010,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/pyngraph/tests_compatibility/test_onnx/test_backend.py - name: Python API 2.0 Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test run: | # for 'template' extension export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH @@ -967,6 +1019,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - name: Model Optimizer unit tests + if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test run: | # required for MxNet apt-get install -y libgomp1 libquadmath0 @@ -982,6 +1035,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/onnx/test_python/test_zoo_models.py - name: OVC unit tests + if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test run: python3 -m pytest -s ${INSTALL_TEST_DIR}/ovc/unit_tests --junitxml=${INSTALL_TEST_DIR}/TEST-OpenVinoConversion.xml - name: Install Python Layer tests dependencies @@ -990,6 +1044,7 @@ jobs: python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - name: MO Python API Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test run: | # Import 'test_utils' installed in '/tests/python/openvino' export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH @@ -1000,6 +1055,7 @@ jobs: TEST_PRECISION: FP16 - name: OVC Python API Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test run: | # Import 'test_utils' installed in '/tests/python/openvino' export PYTHONPATH=${INSTALL_TEST_DIR}/python @@ -1010,6 +1066,8 @@ jobs: TEST_PRECISION: FP16 - name: Python Frontend tests + if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test || + fromJSON(needs.smart_ci.outputs.affected_components).PDPD_FE.test run: | # to allow 'libtest_builtin_extensions.so' to find 'libopenvino_onnx_frontend.so' export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH @@ -1022,6 +1080,7 @@ jobs: TEST_PRECISION: FP32 - name: PyTorch torch.compile TORCHFX Layer Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test run: | python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit_fx_backend --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml env: @@ -1030,6 +1089,7 @@ jobs: PYTORCH_TRACING_MODE: TORCHFX - name: PyTorch torch.compile TORCHSCRIPT Layer Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test run: | python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit_ts_backend --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml env: @@ -1038,6 +1098,7 @@ jobs: PYTORCH_TRACING_MODE: TORCHSCRIPT - name: ONNX Layer Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).ONNX_FE.test run: | # requires 'unit_tests' from 'tools/mo' export PYTHONPATH=${INSTALL_TEST_DIR}/mo:$PYTHONPATH @@ -1047,6 +1108,7 @@ jobs: TEST_PRECISION: FP16 - name: TensorFlow 1 Layer Tests - TF FE + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test run: | # requires 'unit_tests' from 'mo' export PYTHONPATH=${INSTALL_TEST_DIR}/mo @@ -1056,6 +1118,7 @@ jobs: TEST_PRECISION: FP16 - name: TensorFlow 2 Layer Tests - TF FE + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test run: | # requires 'unit_tests' from 'mo' export PYTHONPATH=${INSTALL_TEST_DIR}/mo @@ -1065,20 +1128,24 @@ jobs: TEST_PRECISION: FP16 - name: JAX Layer Tests - TF FE + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-jax.xml env: TEST_DEVICE: CPU - name: TensorFlow 1 Layer Tests - Legacy FE + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_tests/test_tf_Roll.py --ir_version=10 --junitxml=${INSTALL_TEST_DIR}/TEST-tf_Roll.xml - name: TensorFlow 2 Layer Tests - Legacy FE + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow2_keras_tests/test_tf2_keras_activation.py --ir_version=11 -k "sigmoid" --junitxml=${INSTALL_TEST_DIR}/TEST-tf2_Activation.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP16 - name: TensorFlow Lite Layer Tests - TFL FE + if: fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_lite_tests/ --junitxml=${INSTALL_TEST_DIR}/TEST-tfl_fe.xml env: TEST_DEVICE: CPU @@ -1107,11 +1174,11 @@ jobs: path: | ${{ env.INSTALL_TEST_DIR }}/TEST*.html ${{ env.INSTALL_TEST_DIR }}/TEST*.xml - if-no-files-found: 'error' + if-no-files-found: 'warn' CPU_Functional_Tests: name: CPU functional tests - needs: Build + needs: [Build, Smart_CI] timeout-minutes: 25 defaults: run: @@ -1125,7 +1192,7 @@ jobs: INSTALL_TEST_DIR: /__w/openvino/openvino/install/tests PARALLEL_TEST_SCRIPT: /__w/openvino/openvino/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py PARALLEL_TEST_CACHE: /__w/openvino/openvino/install/tests/test_cache.lst - + if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test steps: - name: Download OpenVINO package uses: actions/download-artifact@v3 @@ -1207,7 +1274,7 @@ jobs: TensorFlow_Hub_Models_Tests: name: TensorFlow Hub Models tests - needs: Build + needs: [Build, Smart_CI] defaults: run: shell: bash @@ -1223,6 +1290,8 @@ jobs: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests + if: fromJSON(needs.smart_ci.outputs.affected_components).TF_FE.test || + fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test steps: - name: Check sudo @@ -1291,7 +1360,7 @@ jobs: PyTorch_Models_Tests: name: PyTorch Models tests - needs: Build + needs: [Build, Smart_CI] timeout-minutes: ${{ github.event_name == 'schedule' && 400 || 30 }} defaults: run: @@ -1307,6 +1376,7 @@ jobs: INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests + if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test steps: - name: Check sudo diff --git a/.gitignore b/.gitignore index 0b0d07a220a027..769c4a4e581fae 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ __pycache__ /tools/mo/*.svg /src/plugins/intel_cpu/tools/commit_slider/*.json /src/plugins/intel_cpu/tools/commit_slider/slider_cache/* +.github/GITHUB_OUTPUT From d6852598cead3e68ca29a9df24328bd2a0cd39dd Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Thu, 9 Nov 2023 08:42:06 +0100 Subject: [PATCH 250/275] Fix Ubuntu20 build error on relu operator (#20965) --- .../include/openvino/reference/relu.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/reference/include/openvino/reference/relu.hpp b/src/core/reference/include/openvino/reference/relu.hpp index d19202d19c1073..eba942fff89118 100644 --- a/src/core/reference/include/openvino/reference/relu.hpp +++ b/src/core/reference/include/openvino/reference/relu.hpp @@ -12,6 +12,13 @@ namespace ov { namespace reference { +namespace func { + +template +bool is_negative(const T v) { + return v < T{0}; +} +} // namespace func /** * @brief Reference implementation of ReLU operator (signed values). @@ -22,14 +29,7 @@ namespace reference { */ template () || std::is_signed::value>::type* = nullptr> void relu(const T* arg, T* out, const size_t count) { - std::replace_copy_if( - arg, - arg + count, - out, - [](const T v) { - return v < T{0}; - }, - T{0}); + std::replace_copy_if(arg, arg + count, out, func::is_negative, T{0}); } /** From c851d643b39592a5efa1f690cdc44235d80bfc4d Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 9 Nov 2023 14:28:32 +0400 Subject: [PATCH 251/275] Fixed smart CI (#20980) --- .github/actions/smart-ci/smart_ci.py | 10 +++++----- .github/workflows/linux.yml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/actions/smart-ci/smart_ci.py b/.github/actions/smart-ci/smart_ci.py index 3430519f7176d3..6c281cc01cdf23 100644 --- a/.github/actions/smart-ci/smart_ci.py +++ b/.github/actions/smart-ci/smart_ci.py @@ -170,11 +170,11 @@ def main(): is_postcommit = not pr if is_postcommit: logger.info(f"The run is a post-commit run, executing full validation scope for all components") - - no_match_files_changed = 'no-match-files' in [label.name for label in pr.labels] - if no_match_files_changed: - logger.info(f"There are changed files that don't match any pattern in labeler config, " - f"executing full validation scope for all components") + else: + no_match_files_changed = 'no-match-files' in [label.name for label in pr.labels] + if no_match_files_changed: + logger.info(f"There are changed files that don't match any pattern in labeler config, " + f"executing full validation scope for all components") run_full_scope = is_postcommit or no_match_files_changed diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 51c8146ae3bf07..a4b6cae73658c1 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -49,7 +49,7 @@ jobs: uses: ./.github/actions/smart-ci with: repository: ${{ github.repository }} - pr: ${{ github.event.pull_request.number }} + pr: ${{ github.event.number }} commit_sha: ${{ github.sha }} component_pattern: "category: (.*)" repo_token: ${{ secrets.GITHUB_TOKEN }} From fa22836cfb737cc079b5ccbc73476518cf213a01 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Thu, 9 Nov 2023 12:07:47 +0100 Subject: [PATCH 252/275] Fix no match files change case (#20981) --- .github/actions/smart-ci/smart_ci.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/smart-ci/smart_ci.py b/.github/actions/smart-ci/smart_ci.py index 6c281cc01cdf23..f9eae5bd562041 100644 --- a/.github/actions/smart-ci/smart_ci.py +++ b/.github/actions/smart-ci/smart_ci.py @@ -166,6 +166,7 @@ def main(): component_name = component_name_from_label(label, args.pattern) all_possible_components.add(component_name if component_name else label) + no_match_files_changed = False # For now, we don't want to apply smart ci rules for post-commits is_postcommit = not pr if is_postcommit: From 3c88a9cf58f5035c037c57e445f79fba332ee53d Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Thu, 9 Nov 2023 12:12:22 +0100 Subject: [PATCH 253/275] [CPU] [ARM] Enable MatMul SLT tests on ARM (#20923) --- src/plugins/intel_cpu/src/node.cpp | 1 + .../intel_cpu/tests/functional/CMakeLists.txt | 9 +- .../single_layer_tests/classes/matmul.cpp | 310 +++ .../single_layer_tests/classes/matmul.hpp | 69 + .../single_layer_tests/convolution.cpp | 1 + .../convolution_backprop_data.cpp | 1 + .../single_layer_tests/depth_to_space.cpp | 1 + .../single_layer_tests/group_convolution.cpp | 1 + .../group_convolution_backprop_data.cpp | 1 + .../instances/common/matmul.cpp | 337 ++++ .../{ => instances/x64}/matmul.cpp | 1654 +++++------------ .../instances/x64/mlas/matmul.cpp | 80 + .../instances/x64/pooling.cpp | 1 + .../single_layer_tests/space_to_depth.cpp | 1 + .../tests/functional/specific_tests.cmake | 8 + .../subgraph_tests/src/conv_concat.cpp | 1 + .../tests/functional/target_per_test.cmake | 8 + .../test_utils/arm/filter_cpu_info.cpp | 42 + .../functional/test_utils/cpu_test_utils.cpp | 25 - .../functional/test_utils/cpu_test_utils.hpp | 1 - .../functional/test_utils/filter_cpu_info.hpp | 14 + .../test_utils/x64/filter_cpu_info.cpp | 63 + 22 files changed, 1443 insertions(+), 1186 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/matmul.cpp rename src/plugins/intel_cpu/tests/functional/single_layer_tests/{ => instances/x64}/matmul.cpp (57%) create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/mlas/matmul.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/test_utils/arm/filter_cpu_info.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/test_utils/filter_cpu_info.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/test_utils/x64/filter_cpu_info.cpp diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index a80908451c0281..dec33aa2e272ef 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -1039,6 +1039,7 @@ const std::vector& Node::getDefaultImplPriority() { impl_desc_type::gemm_avx2, impl_desc_type::gemm_avx, impl_desc_type::gemm_sse42, + impl_desc_type::gemm_acl, impl_desc_type::acl, impl_desc_type::jit_gemm, impl_desc_type::ref_any, diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index 06d6b1b6b3583b..2d837abb25eef3 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -21,9 +21,14 @@ endif() if(NOT (ARM OR AARCH64)) list(APPEND EXCLUDED_SOURCE_PATHS - ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/arm - ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/src/arm) + ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/arm + ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/src/arm + ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/arm) else() + list(APPEND EXCLUDED_SOURCE_PATHS + ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/x64 + ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/src/x64 + ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/x64) # temporary disable all custom tests for ARM list(APPEND EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.cpp new file mode 100644 index 00000000000000..17db8e3b291719 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.cpp @@ -0,0 +1,310 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "matmul.hpp" +#include "gtest/gtest.h" +#include "openvino/core/type/element_type.hpp" +#include "openvino/runtime/properties.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +std::string MatMulLayerCPUTest::getTestCaseName(const testing::TestParamInfo& obj) { + MatMulLayerTestParamsSet basicParamsSet; + MatMulNodeType nodeType; + fusingSpecificParams fusingParams; + CPUSpecificParams cpuParams; + + std::tie(basicParamsSet, nodeType, fusingParams, cpuParams) = obj.param; + + ElementType netType; + ElementType inType, outType; + ShapeRelatedParams shapeRelatedParams; + ngraph::helpers::InputLayerType secondaryInputType; + TargetDevice targetDevice; + std::map additionalConfig; + std::tie(shapeRelatedParams, netType, inType, outType, secondaryInputType, targetDevice, additionalConfig) = + basicParamsSet; + + std::ostringstream result; + result << (nodeType == MatMulNodeType::MatMul ? "MatMul_" : "FullyConnected_"); + result << "IS="; + for (const auto& shape : shapeRelatedParams.inputShapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : shapeRelatedParams.inputShapes) { + result << "("; + if (!shape.second.empty()) { + auto itr = shape.second.begin(); + do { + result << ov::test::utils::vec2str(*itr); + } while (++itr != shape.second.end() && result << "_"); + } + result << ")_"; + } + result << "transpose_a=" << shapeRelatedParams.transpose.first << "_"; + result << "transpose_b=" << shapeRelatedParams.transpose.second << "_"; + result << "secondaryInputType=" << secondaryInputType << "_"; + result << "netPRC=" << netType << "_"; + result << "inPRC=" << inType << "_"; + result << "outPRC=" << outType << "_"; + result << "trgDev=" << targetDevice; + result << "config=("; + for (const auto& configEntry : additionalConfig) { + result << configEntry.first << ", " << configEntry.second << ":"; + } + result << ")"; + result << CpuTestWithFusing::getTestCaseName(fusingParams); + result << CPUTestsBase::getTestCaseName(cpuParams); + + return result.str(); +} + +template +void MatMulLayerCPUTest::transpose(T& shape) { + IE_ASSERT(shape.size() > 1); + std::swap(*(shape.end() - 1), *(shape.end() - 2)); +} + +void MatMulLayerCPUTest::SetUp() { + MatMulLayerTestParamsSet basicParamsSet; + MatMulNodeType nodeType; + fusingSpecificParams fusingParams; + CPUSpecificParams cpuParams; + + std::tie(basicParamsSet, nodeType, fusingParams, cpuParams) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + ShapeRelatedParams shapeRelatedParams; + ElementType netType; + helpers::InputLayerType secondaryInputType; + std::map additionalConfig; + + std::tie(shapeRelatedParams, netType, inType, outType, secondaryInputType, targetDevice, additionalConfig) = basicParamsSet; + + init_input_shapes(shapeRelatedParams.inputShapes); + + bool transpA = shapeRelatedParams.transpose.first; + bool transpB = shapeRelatedParams.transpose.second; + + if (transpA) { + transpose(inputDynamicShapes[0]); + for (auto& shapes : targetStaticShapes) { + transpose(shapes[0]); + } + } + if (transpB) { + transpose(inputDynamicShapes[1]); + for (auto& shapes : targetStaticShapes) { + transpose(shapes[1]); + } + } + + const auto& inShapeA = inputDynamicShapes[0]; + const auto& inShapeB = inputDynamicShapes[1]; + + // see comment in MatMul::canFuse + if (!(nodeType == MatMulNodeType::MatMul && + std::get<0>(fusingParams) && std::get<0>(fusingParams)->getFusedOpsNames().find("(PerChannel)") != std::string::npos && + std::max(inShapeA.size(), inShapeB.size()) > 2)) + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) + inType = outType = netType = ElementType::bf16; + else + inType = outType = netType; + + cpuNodeType = nodeType == MatMulNodeType::MatMul ? "MatMul" : "FullyConnected"; + selectedType = makeSelectedTypeStr(selectedType, outType); + + ov::ParameterVector params{std::make_shared(netType, inShapeA)}; + + auto matrixB = builder::makeDynamicInputLayer(netType, secondaryInputType, inShapeB); + if (secondaryInputType == helpers::InputLayerType::PARAMETER) { + params.push_back(std::dynamic_pointer_cast(matrixB)); + } + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); + auto matMul = builder::makeMatMul(paramOuts[0], matrixB, transpA, transpB); + function = makeNgraphFunction(netType, params, matMul, cpuNodeType); + checkFusingPosition = false; +} + +TEST_P(MatMulLayerCPUTest, CompareWithRefs) { + // due to disabled BF16 fakequant fusing: src/plugins/intel_cpu/src/graph_optimizer.cpp#L755, skip this case + if (inType == ElementType::bf16) { + if (cpuNodeType == "FullyConnected") { + if (priority[0].find("amx") != std::string::npos || priority[0] == "brgemm_avx512") { + if (fusedOps.size() == 2 && fusedOps[0] == std::string("FakeQuantize") && fusedOps[1] == std::string("Relu")) { + GTEST_SKIP() << "Skip MatMul BF16 FakeQuantization Fusing test" << std::endl; + } + } + } + } + run(); + CheckPluginRelatedResults(compiledModel, cpuNodeType); +} + +namespace MatMul { +const std::map& emptyAdditionalConfig() { + static const std::map emptyAdditionalConfig; + return emptyAdditionalConfig; +} + +const std::vector& filterSpecificParams() { + static const std::vector specificParams = { + CPUSpecificParams{{}, {}, {"gemm_acl"}, "gemm_acl"}, + CPUSpecificParams{{}, {}, {"jit_gemm"}, "jit_gemm"}}; + return specificParams; +} + +const std::vector& netPRCs() { + static const std::vector netPRCs { + ElementType::f32, + ElementType::bf16 + }; + return netPRCs; +} + +const std::vector>& additionalConfig() { + static std::vector> additionalConfig { + #ifndef OV_CPU_WITH_MLAS + // FP32 precision is covered by MLAS + std::map{/* empty config */}, + #endif + {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}} + }; + return additionalConfig; +} + +const std::vector& matmulFusingParams() { + static std::vector matmulFusingParams { + emptyFusingSpec, + fusingElu, + fusingSqrt, + fusingPReluPerTensor, + fusingMultiplyPerChannel, + }; + return matmulFusingParams; +} + +const std::vector& IS2D_nightly() { + static const std::vector IS2D_nightly = { + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, false}}, + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, false}}, + + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {true, false}}, + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {false, true}}, + + {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {true, true}}, + {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {false, true}}, + + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, true}}, + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, false}}, + + { + { + {{-1, -1}, {{71, 128}, {50, 128}}}, + {{128, 20}, {{128, 20}, {128, 20}}} + }, + {false, false} + }, + { + { + {{-1, 59}, {{10, 59}, {15, 59}, {15, 59}}}, + {{59, 1}, {{59, 1}, {59, 1}, {59, 1}}} + }, + {true, false} + }, + { + { + {{{0, 120}, 59}, {{5, 59}, {11, 59}, {5, 59}, {10, 59}}}, + {{59, 120}, {{59, 120}, {59, 120}, {59, 120}, {59, 120}}} + }, + {false, true} + }, + }; + return IS2D_nightly; +} + +const std::vector& IS2D_smoke() { + static const std::vector IS2D_smoke = { + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, true}}, + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, true}}, + + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {false, false}}, + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {true, true}}, + + {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {false, false}}, + {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {true, false}}, + + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}}, + + { + { + {{-1, -1}, {{20, 60}, {20, 60}}}, + {{60, 120}, {{60, 120}, {60, 120}}} + }, + {false, false} + }, + { + { + {{{0, 100}, {0, 12}}, {{20, 1}, {14, 1}, {20, 1}, {14, 1}}}, + {{1, 120}, {{1, 120}, {1, 120}, {1, 120}, {1, 120}}} + }, + {true, true} + }, + }; + return IS2D_smoke; +} + +const std::vector& IS3D_smoke() { + static const std::vector IS3D_smoke = { + {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {false, false}}, + {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {false, true}}, + // needed by 'IS3D_Brgconv1x1_smoke' + {static_shapes_to_test_representation({{1, 1, 120}, {120, 120}}), {false, false}}, + {static_shapes_to_test_representation({{3, 1, 120}, {120, 120}}), {false, false}}, + + {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {true, false}}, + {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {false, true}}, + + { + { + {{1, 5, 32}, {{1, 5, 32}, {1, 5, 32}}}, + {{32, 3}, {{32, 3}, {32, 3}}} + }, + {false, true} + }, + + {static_shapes_to_test_representation({{1, 429}, {1, 429, 1}}), {true, true}}, + { + { + {{-1, -1}, {{1, 129}, {2, 129}, {1, 129}, {2, 129}}}, + {{1, 129, 1}, {{1, 129, 1}, {1, 129, 1}, {1, 129, 1}, {1, 129, 1}}} + }, + {true, true} + }, + + { + { + {{{0, 60}, {0, 60}, {0, 60}}, {{1, 3, 14}, {1, 7, 14}}}, + {{14, 10}, {{14, 10}, {14, 10}}} + }, + {true, true} + }, + }; + return IS3D_smoke; +} + +} // namespace MatMul +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.hpp new file mode 100644 index 00000000000000..b965fab5ee5b46 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/matmul.hpp @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/mat_mul.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ie_precision.hpp" +#include "test_utils/fusing_test_utils.hpp" +#include "ov_models/builders.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +enum class MatMulNodeType { + MatMul, + FullyConnected +}; + +struct ShapeRelatedParams { + std::vector inputShapes; + std::pair transpose; +}; + +typedef std::tuple< + ShapeRelatedParams, + ElementType, // Network precision + ElementType, // Input precision + ElementType, // Output precision + ngraph::helpers::InputLayerType, // Secondary input type + TargetDevice, // Device name + std::map // Additional network configuration +> MatMulLayerTestParamsSet; + +using MatMulLayerCPUTestParamSet = std::tuple; + +class MatMulLayerCPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CpuTestWithFusing { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + std::string cpuNodeType; + + template + void transpose(T& shape); + + void SetUp() override; +}; + +namespace MatMul { + const std::vector& netPRCs(); + const std::vector& matmulFusingParams(); + const std::vector>& additionalConfig(); + const std::map& emptyAdditionalConfig(); + const std::vector& filterSpecificParams(); + const std::vector& IS2D_nightly(); + const std::vector& IS2D_smoke(); + const std::vector& IS3D_smoke(); +} // namespace MatMul +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp index f713112cfc72e4..81776f8b5052ee 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution.cpp @@ -3,6 +3,7 @@ // #include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "test_utils/convolution_params.hpp" #include "test_utils/fusing_test_utils.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp index a602d3cbac45a8..81752bdea0e954 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/convolution_backprop_data.cpp @@ -11,6 +11,7 @@ #include "shared_test_classes/base/ov_subgraph.hpp" #include "test_utils/convolution_params.hpp" #include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "test_utils/fusing_test_utils.hpp" using namespace CPUTestUtils; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp index 0ad2af8b85ad4d..60a56a42b38b59 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/depth_to_space.cpp @@ -4,6 +4,7 @@ #include "shared_test_classes/single_layer/depth_to_space.hpp" #include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp index 7ea599b7c32847..578381767590da 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution.cpp @@ -7,6 +7,7 @@ #include "test_utils/cpu_test_utils.hpp" #include "test_utils/convolution_params.hpp" #include "test_utils/fusing_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" using namespace InferenceEngine; using namespace CPUTestUtils; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp index 96a295830079ed..ed25946b8470e9 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/group_convolution_backprop_data.cpp @@ -3,6 +3,7 @@ // #include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "test_utils/convolution_params.hpp" #include "test_utils/fusing_test_utils.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/matmul.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/matmul.cpp new file mode 100644 index 00000000000000..a1eaf7ac28db09 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/matmul.cpp @@ -0,0 +1,337 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/matmul.hpp" +#include "shared_test_classes/single_layer/mat_mul.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" +#include "test_utils/fusing_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace MatMul { +/* ============= MatMul ============= */ +namespace matmul { + +const std::vector IS = { + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, false}}, + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, false}}, + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, true}}, + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, true}}, + + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, false}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, false}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, true}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, true}}, + + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, false}}, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, false}}, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, +}; + +const std::vector IS_Dynamic = { + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 + {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 + {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 + }, + {true, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 + {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 + }, + {false, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 + {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 + }, + {true, true} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 + {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 + {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 + }, + {true, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 + {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 + }, + {false, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 + {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 + }, + {true, true} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 + {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 + {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 + }, + {true, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 + {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 + }, + {false, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 + {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 + }, + {true, true} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {true, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {false, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {true, true} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {true, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {false, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, 16 }, {{ 4, 16 }, { 2, 16 }}}, // input 0 + {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 + }, + {true, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 + {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 + }, + {false, false} + }, +}; + +const std::vector IS_Dynamic_nightly = { + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{{5, 15}, {1, 12}, {4, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{{1, 13}, {3, 15}, {1, 10}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 + }, + {true, true} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ {2, 10}, {3, 15}, -1, 16 }, {{ 2, 12, 4, 16 }, { 3, 12, 2, 16 }}}, // input 0 + {{ 1, 1, -1, 4 }, {{ 1, 1, 16, 4 }, { 1, 1, 16, 4 }}} // input 1 + }, + {true, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ 1, 1, -1, 16 }, {{ 1, 1, 4, 16 }, { 1, 1, 2, 16 }}}, // input 0 + {{ {2, 5}, {3, 15}, -1, 4 }, {{ 2, 12, 16, 4 }, { 2, 12, 16, 4 }}} // input 1 + }, + {false, false} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, 16 }, {{ 4, 16 }, { 2, 16 }}}, // input 0 + {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, {2, 15}, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 + {{ -1, 4 }, {{ 16, 4 }, { 16, 4 }}} // input 1 + }, + {true, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, {1, 15}, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 + {{ -1, 4 }, {{ 16, 4 }, { 16, 4 }}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ {1, 3}, {1, 9}, {1, 5}, {1, 10} }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 + {{ {1, 5}, {1, 7}, {1, 8}, {1, 5} }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 + }, + {true, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ {1, 3}, {1, 9}, {1, 5}, {1, 10} }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 + {{ {1, 5}, {1, 7}, {1, 8}, {1, 5} }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 + }, + {false, false} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ 1, 7, 4, -1 }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 + {{ 1, 7, -1, 4 }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 + }, + {true, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ 1, 7, 4, -1 }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 + {{ 1, 7, -1, 4 }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 + {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 + }, + {true, true} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 + {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 + }, + {true, false} + }, + + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 + {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 + }, + {false, true} + }, +}; + +const auto matMulParams = ::testing::Combine(::testing::ValuesIn(IS), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())); + +const auto testParams = ::testing::Combine(matMulParams, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Static, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName); + +const auto matMulParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Dynamic), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())); + +const auto testParamsDynamic = ::testing::Combine(matMulParamsDynamic, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic, MatMulLayerCPUTest, testParamsDynamic, MatMulLayerCPUTest::getTestCaseName); + +const auto matMulParamsDynamic_nightly = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_nightly), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())); + +const auto testParamsDynamic_nightly = ::testing::Combine(matMulParamsDynamic_nightly, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); + +INSTANTIATE_TEST_SUITE_P(nightly_MM_Dynamic, MatMulLayerCPUTest, testParamsDynamic_nightly, MatMulLayerCPUTest::getTestCaseName); + +} // namespace matmul +} // namespace MatMul +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/matmul.cpp similarity index 57% rename from src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul.cpp rename to src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/matmul.cpp index ab38ccb19510c0..bc357d158f2b57 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/matmul.cpp @@ -1,202 +1,106 @@ -// Copyright (C) 2018-2023 Intel Corporation +// Copyright (C) 2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "single_layer_tests/classes/matmul.hpp" #include "shared_test_classes/single_layer/mat_mul.hpp" -#include "shared_test_classes/base/ov_subgraph.hpp" -#include "ie_precision.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "test_utils/fusing_test_utils.hpp" #include "ov_models/builders.hpp" #include -using namespace ngraph; using namespace InferenceEngine; using namespace CPUTestUtils; +using namespace ngraph::helpers; using namespace ov::test; namespace CPULayerTestsDefinitions { +namespace MatMul { +namespace { +const std::vector IS_x64 = { + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, false}}, + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, false}}, + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, true}}, + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, true}}, +}; -enum class MatMulNodeType { - MatMul, - FullyConnected +std::vector fusingParamsSet2D_nightly { + fusingRelu, +#ifndef OV_CPU_WITH_MLAS + fusingScaleShift, //covered by MLAS +#endif + fusingPReluPerTensor, + fusingFakeQuantizePerChannelRelu, }; -struct ShapeRelatedParams { - std::vector inputShapes; - std::pair transpose; +std::vector fusingParamsSet2D_smoke { +// The following three patterns are covered by MLAS test +#ifndef OV_CPU_WITH_MLAS + emptyFusingSpec, + fusingBias, + fusingMultiplyPerChannel, +#endif + fusingFakeQuantizePerTensorRelu, }; -typedef std::tuple< - ShapeRelatedParams, - ElementType, // Network precision - ElementType, // Input precision - ElementType, // Output precision - ngraph::helpers::InputLayerType, // Secondary input type - TargetDevice, // Device name - std::map // Additional network configuration -> MatMulLayerTestParamsSet; - -using MatMulLayerCPUTestParamSet = std::tuple; - -class MatMulLayerCPUTest : public testing::WithParamInterface, - virtual public SubgraphBaseTest, public CpuTestWithFusing { -public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - MatMulLayerTestParamsSet basicParamsSet; - MatMulNodeType nodeType; - fusingSpecificParams fusingParams; - CPUSpecificParams cpuParams; - - std::tie(basicParamsSet, nodeType, fusingParams, cpuParams) = obj.param; - - ElementType netType; - ElementType inType, outType; - ShapeRelatedParams shapeRelatedParams; - ngraph::helpers::InputLayerType secondaryInputType; - TargetDevice targetDevice; - std::map additionalConfig; - std::tie(shapeRelatedParams, netType, inType, outType, secondaryInputType, targetDevice, additionalConfig) = - basicParamsSet; - - std::ostringstream result; - result << (nodeType == MatMulNodeType::MatMul ? "MatMul_" : "FullyConnected_"); - result << "IS="; - for (const auto& shape : shapeRelatedParams.inputShapes) { - result << ov::test::utils::partialShape2str({shape.first}) << "_"; - } - result << "TS="; - for (const auto& shape : shapeRelatedParams.inputShapes) { - result << "("; - if (!shape.second.empty()) { - auto itr = shape.second.begin(); - do { - result << ov::test::utils::vec2str(*itr); - } while (++itr != shape.second.end() && result << "_"); - } - result << ")_"; - } - result << "transpose_a=" << shapeRelatedParams.transpose.first << "_"; - result << "transpose_b=" << shapeRelatedParams.transpose.second << "_"; - result << "secondaryInputType=" << secondaryInputType << "_"; - result << "netPRC=" << netType << "_"; - result << "inPRC=" << inType << "_"; - result << "outPRC=" << outType << "_"; - result << "trgDev=" << targetDevice; - result << "config=("; - for (const auto& configEntry : additionalConfig) { - result << configEntry.first << ", " << configEntry.second << ":"; - } - result << ")"; - result << CpuTestWithFusing::getTestCaseName(fusingParams); - result << CPUTestsBase::getTestCaseName(cpuParams); - - return result.str(); - } +std::vector fusingParamsSet2DBF16 { + emptyFusingSpec, + fusingBias, + fusingRelu, + fusingPReluPerTensor, +}; -protected: - std::string cpuNodeType; +const auto matMulParams_x64 = ::testing::Combine(::testing::ValuesIn(IS_x64), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())); - template - void transpose(T& shape) { - IE_ASSERT(shape.size() > 1); - std::swap(*(shape.end() - 1), *(shape.end() - 2)); - } +const auto testParams_Static_IS_x64 = ::testing::Combine(matMulParams_x64, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); - void SetUp() override { - MatMulLayerTestParamsSet basicParamsSet; - MatMulNodeType nodeType; - fusingSpecificParams fusingParams; - CPUSpecificParams cpuParams; - - std::tie(basicParamsSet, nodeType, fusingParams, cpuParams) = this->GetParam(); - std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; - - ShapeRelatedParams shapeRelatedParams; - ElementType netType; - helpers::InputLayerType secondaryInputType; - std::map additionalConfig; - - std::tie(shapeRelatedParams, netType, inType, outType, secondaryInputType, targetDevice, additionalConfig) = basicParamsSet; - - init_input_shapes(shapeRelatedParams.inputShapes); - - bool transpA = shapeRelatedParams.transpose.first; - bool transpB = shapeRelatedParams.transpose.second; - - if (transpA) { - transpose(inputDynamicShapes[0]); - for (auto& shapes : targetStaticShapes) { - transpose(shapes[0]); - } - } - if (transpB) { - transpose(inputDynamicShapes[1]); - for (auto& shapes : targetStaticShapes) { - transpose(shapes[1]); - } - } - - const auto& inShapeA = inputDynamicShapes[0]; - const auto& inShapeB = inputDynamicShapes[1]; - - // see comment in MatMul::canFuse - if (!(nodeType == MatMulNodeType::MatMul && - std::get<0>(fusingParams) && std::get<0>(fusingParams)->getFusedOpsNames().find("(PerChannel)") != std::string::npos && - std::max(inShapeA.size(), inShapeB.size()) > 2)) - std::tie(postOpMgrPtr, fusedOps) = fusingParams; - - configuration.insert(additionalConfig.begin(), additionalConfig.end()); - - if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) - inType = outType = netType = ElementType::bf16; - else - inType = outType = netType; - - cpuNodeType = nodeType == MatMulNodeType::MatMul ? "MatMul" : "FullyConnected"; - selectedType = makeSelectedTypeStr(selectedType, outType); - - ov::ParameterVector params{std::make_shared(netType, inShapeA)}; - - auto matrixB = builder::makeDynamicInputLayer(netType, secondaryInputType, inShapeB); - if (secondaryInputType == helpers::InputLayerType::PARAMETER) { - params.push_back(std::dynamic_pointer_cast(matrixB)); - } - auto matMul = builder::makeMatMul(params[0], matrixB, transpA, transpB); - function = makeNgraphFunction(netType, params, matMul, cpuNodeType); - checkFusingPosition = false; - } -}; +INSTANTIATE_TEST_SUITE_P(smoke_MM_Static_IS_x64, MatMulLayerCPUTest, testParams_Static_IS_x64, MatMulLayerCPUTest::getTestCaseName); -TEST_P(MatMulLayerCPUTest, CompareWithRefs) { - // due to disabled BF16 fakequant fusing: src/plugins/intel_cpu/src/graph_optimizer.cpp#L755, skip this case - if (inType == ElementType::bf16) { - if (cpuNodeType == "FullyConnected") { - if (priority[0].find("amx") != std::string::npos || priority[0] == "brgemm_avx512") { - if (fusedOps.size() == 2 && fusedOps[0] == std::string("FakeQuantize") && fusedOps[1] == std::string("Relu")) { - GTEST_SKIP() << "Skip MatMul BF16 FakeQuantization Fusing test" << std::endl; - } - } - } - } - run(); - CheckPluginRelatedResults(compiledModel, cpuNodeType); -} +const auto testParams2D_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_smoke()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet2D_smoke), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); -namespace { +const auto testParams2DBF16_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_smoke()), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet2DBF16), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); -/* ============= Common params ============= */ -std::map emptyAdditionalConfig; +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulLayerCPUTest, testParams2D_smoke, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulLayerCPUTest, testParams2DBF16_smoke, MatMulLayerCPUTest::getTestCaseName); -std::vector> additionalConfig { -#ifndef OV_CPU_WITH_MLAS - // FP32 precision is covered by MLAS - std::map{/* empty config */}, -#endif - {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}} -}; +const auto testParams2D_nightly = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_nightly()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values((emptyAdditionalConfig()))), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet2D_nightly), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); std::vector> filterAdditionalConfig_Brgemm() { #ifndef OV_CPU_WITH_MLAS @@ -214,27 +118,6 @@ std::vector> filterAdditionalConfig_Brgemm() return additionalConfig; } -std::vector> filterAdditionalConfig_BrgemmAmx() { - std::vector> additionalConfig; - if (with_cpu_x86_bfloat16()) { - additionalConfig.push_back({{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}); - } - - return additionalConfig; -} - -const std::vector netPRCs { - ElementType::f32, - ElementType::bf16 -}; - -std::vector filterSpecificParams() { - std::vector specificParams; - specificParams.push_back(CPUSpecificParams{{}, {}, {"jit_gemm"}, "jit_gemm"}); - - return specificParams; -} - //For FP32 precision, FC has brgemm avx2 support but Matmul doen't have brgemm avx2. //Need to specify tryBrgAVX2 based on test case. std::vector filterSpecificParams_Brgemm(bool tryBrgAVX2 = false) { @@ -248,112 +131,65 @@ std::vector filterSpecificParams_Brgemm(bool tryBrgAVX2 = fal return specificParams; } -std::vector filterSpecificParams_BrgemmAmx() { - std::vector specificParams; - if (with_cpu_x86_avx512_core_amx()) { - specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512_amx"}, "brgemm_avx512_amx"}); - } - - return specificParams; -} - - -std::vector filterSpecificParams_Brgconv1x1() { - std::vector specificParams; - if (with_cpu_x86_avx512_core()) { - specificParams.push_back(CPUSpecificParams{{}, {}, {/* brgconv_avx512_1x1 is not a part of fc impl list */}, "brgconv_avx512_1x1"}); - } - - return specificParams; -} - -std::vector filterSpecificParams_MLAS() { - // replace with mlas primitive type - std::vector specificParams; - specificParams.push_back(CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"}); - return specificParams; -} - -/* ============= FullyConnected ============= */ -namespace fullyConnected { - -const std::vector IS2D_smoke = { - {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, true}}, - {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, true}}, - - {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {false, false}}, - {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {true, true}}, +const std::vector IS2D_Brgemm_smoke = { + // needed by 'IS2D_Brgconv1x1_smoke' + {static_shapes_to_test_representation({{1, 120}, {120, 120}}), {true, false}}, + {static_shapes_to_test_representation({{1, 128}, {128, 166}}), {true, false}}, - {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {false, false}}, - {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {true, false}}, + {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, false}}, + {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, true}}, - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, false}}, {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}}, { { - {{-1, -1}, {{20, 60}, {20, 60}}}, - {{60, 120}, {{60, 120}, {60, 120}}} + {{-1, -1}, {{12, 16}, {25, 16}, {12, 16}, {25, 16}}}, + {{16, 35}, {{16, 35}, {16, 35}, {16, 35}, {16, 35}}} }, {false, false} }, { { - {{{0, 100}, {0, 12}}, {{20, 1}, {14, 1}, {20, 1}, {14, 1}}}, - {{1, 120}, {{1, 120}, {1, 120}, {1, 120}, {1, 120}}} + {{{0, 50}, {0, 50}}, {{17, 48}, {15, 48}}}, + {{48, 15}, {{48, 15}, {48, 15}}} }, {true, true} }, }; -const std::vector IS2D_nightly = { - {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, false}}, - {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, false}}, - - {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {true, false}}, - {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {false, true}}, +const std::vector IS2D_Brgconv1x1_smoke = { + {static_shapes_to_test_representation({{49, 120}, {120, 120}}), {true, false}}, + {static_shapes_to_test_representation({{79, 120}, {120, 120}}), {true, false}}, - {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {true, true}}, - {static_shapes_to_test_representation({{1, 120}, {120, 59}}), {false, true}}, + {static_shapes_to_test_representation({{256, 188}, {188, 120}}), {true, false}}, + {static_shapes_to_test_representation({{256, 188}, {188, 120}}), {true, true}}, - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, true}}, - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 200}}), {false, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 200}}), {false, true}}, { { - {{-1, -1}, {{71, 128}, {50, 128}}}, - {{128, 20}, {{128, 20}, {128, 20}}} + // ip->brg->ip->brg + // {1, 120} are covered in 'IS2D_Brgemm_smoke' which is ip + // {49, 120}, {79, 120} are covered above which is brg1x1 + {{-1, -1}, {{1, 120}, {49, 120}, {1, 120}, {79, 120}}}, + {{120, 120}, {{120, 120}, {120, 120}, {120, 120}, {120, 120}}} }, {false, false} }, { { - {{-1, 59}, {{10, 59}, {15, 59}, {15, 59}}}, - {{59, 1}, {{59, 1}, {59, 1}, {59, 1}}} - }, - {true, false} - }, - { - { - {{{0, 120}, 59}, {{5, 59}, {11, 59}, {5, 59}, {10, 59}}}, - {{59, 120}, {{59, 120}, {59, 120}, {59, 120}, {59, 120}}} + // ip->brg->ip(cached)->brg(cached) + {{{0, 200}, {0, 200}}, {{1, 128}, {199, 128}, {1, 128}, {199, 128}}}, + {{128, 166}, {{128, 166}, {128, 166}}} }, - {false, true} + {true, true} }, }; -std::vector fusingParamsSet2D_smoke { -// The following three patterns are convered by MLAS test -#ifndef OV_CPU_WITH_MLAS - emptyFusingSpec, - fusingBias, - fusingMultiplyPerChannel, -#endif - fusingFakeQuantizePerTensorRelu, -}; - std::vector fusingParamsSet2D_Brgemm_smoke { -// The following three patterns are convered by MLAS test +// The following three patterns are covered by MLAS test #ifndef OV_CPU_WITH_MLAS emptyFusingSpec, fusingBias, @@ -362,371 +198,355 @@ std::vector fusingParamsSet2D_Brgemm_smoke { fusingFakeQuantizePerTensorRelu, }; -std::vector fusingParamsSet2D_nightly { - fusingRelu, -#ifndef OV_CPU_WITH_MLAS - fusingScaleShift, //covered by MLAS -#endif - fusingPReluPerTensor, - fusingFakeQuantizePerChannelRelu, -}; - -std::vector fusingParamsSet2DBF16 { - emptyFusingSpec, - fusingBias, - fusingRelu, - fusingPReluPerTensor, -}; +const auto fullyConnectedParams2D_Brgemm_smoke = ::testing::Combine(::testing::ValuesIn(IS2D_Brgemm_smoke), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); -const auto testParams2D_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_smoke), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)), +const auto testParams2D_Brgemm_smoke = ::testing::Combine(fullyConnectedParams2D_Brgemm_smoke, ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet2D_smoke), - ::testing::ValuesIn(filterSpecificParams())); + ::testing::ValuesIn(fusingParamsSet2D_Brgemm_smoke), + ::testing::ValuesIn(filterSpecificParams_Brgemm(true))); -const auto testParams2DBF16_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_smoke), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)), - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet2DBF16), - ::testing::ValuesIn(filterSpecificParams())); +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_Brgemm, MatMulLayerCPUTest, testParams2D_Brgemm_smoke, MatMulLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulLayerCPUTest, testParams2D_smoke, MatMulLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulLayerCPUTest, testParams2DBF16_smoke, MatMulLayerCPUTest::getTestCaseName); +const std::vector IS_brgemm_smoke = { + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, false}}, + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, false}}, -#ifdef OV_CPU_WITH_MLAS -std::vector fusingParamsSet2D_MLAS_smoke { - emptyFusingSpec, - fusingBias, - fusingMultiplyPerChannel -}; + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, true}}, + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, true}}, -const auto testParams2D_MLAS_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_smoke), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)), - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet2D_MLAS_smoke), - ::testing::ValuesIn(filterSpecificParams_MLAS())); -INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_MLAS, MatMulLayerCPUTest, testParams2D_MLAS_smoke, MatMulLayerCPUTest::getTestCaseName); -#endif + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, false}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, false}}, -const auto testParams2D_nightly = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_nightly), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)), - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet2D_nightly), - ::testing::ValuesIn(filterSpecificParams())); + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, +}; -const auto testParams2DBF16_nightly = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_nightly), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)), - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet2DBF16), - ::testing::ValuesIn(filterSpecificParams())); +const auto matMulBrgemmParams_smoke = ::testing::Combine(::testing::ValuesIn(IS_brgemm_smoke), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); -INSTANTIATE_TEST_SUITE_P(nightly_FC_2D, MatMulLayerCPUTest, testParams2D_nightly, MatMulLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(nightly_FC_2D_BF16, MatMulLayerCPUTest, testParams2DBF16_nightly, MatMulLayerCPUTest::getTestCaseName); +const auto testBrgemmParams_smoke = ::testing::Combine(matMulBrgemmParams_smoke, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterSpecificParams_Brgemm())); -#ifdef OV_CPU_WITH_MLAS -std::vector fusingParamsSet2D_MLAS_nightly { - fusingScaleShift -}; -const auto testParams2D_MLAS_nightly = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_nightly), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)), - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet2D_MLAS_nightly), - ::testing::ValuesIn(filterSpecificParams_MLAS())); +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Static, MatMulLayerCPUTest, testBrgemmParams_smoke, MatMulLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(nightly_FC_2D_MLAS, MatMulLayerCPUTest, testParams2D_MLAS_nightly, MatMulLayerCPUTest::getTestCaseName); -#endif +const std::vector IS_brgemm_nightly = { + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, true}}, + {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, true}}, -const std::vector IS3D_smoke = { - {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {false, false}}, - {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {false, true}}, - // needed by 'IS3D_Brgconv1x1_smoke' - {static_shapes_to_test_representation({{1, 1, 120}, {120, 120}}), {false, false}}, - {static_shapes_to_test_representation({{3, 1, 120}, {120, 120}}), {false, false}}, + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, false}}, + {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, false}}, - {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {true, false}}, - {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {false, true}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, true}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, true}}, - { - { - {{1, 5, 32}, {{1, 5, 32}, {1, 5, 32}}}, - {{32, 3}, {{32, 3}, {32, 3}}} - }, - {false, true} - }, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, false}}, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, false}}, +}; - {static_shapes_to_test_representation({{1, 429}, {1, 429, 1}}), {true, true}}, - { +const auto matMulBrgemmParams_nightly = ::testing::Combine(::testing::ValuesIn(IS_brgemm_nightly), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); + +const auto testBrgemmParams_nightly = ::testing::Combine(matMulBrgemmParams_nightly, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterSpecificParams_Brgemm())); + +INSTANTIATE_TEST_SUITE_P(nightly_MM_Brgemm_Static, MatMulLayerCPUTest, testBrgemmParams_nightly, MatMulLayerCPUTest::getTestCaseName); + +const std::vector IS_Brgemm_Dynamic = { { - {{-1, -1}, {{1, 129}, {2, 129}, {1, 129}, {2, 129}}}, - {{1, 129, 1}, {{1, 129, 1}, {1, 129, 1}, {1, 129, 1}, {1, 129, 1}}} + { + {{-1, 256}, {{1, 256}}}, + {{256, 384}, {{256, 384}}} + }, + {false, false} }, - {true, true} - }, - - { { - {{{0, 60}, {0, 60}, {0, 60}}, {{1, 3, 14}, {1, 7, 14}}}, - {{14, 10}, {{14, 10}, {14, 10}}} + { + {{-1, -1}, {{55, 12}, {33, 7}}}, + {{-1, -1}, {{12, 55}, {7, 33}}} + }, + {false, false} + }, + { + { + {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, + {{-1, -1}, {{60, 5}, {30, 5}}} + }, + {true, false} + }, + { + { + {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, + {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} + }, + {false, true} + }, + { + { + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} + }, + {false, false} + }, + { + { + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} + }, + {true, true} + }, + { + { + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} + }, + {true, false} + }, + { + { + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, + {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} + }, + {false, true} }, - {true, true} - }, }; -const std::vector IS3D_nightly = { - {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {true, false}}, - {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {true, true}}, +const auto matMulBrgemmParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Brgemm_Dynamic), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); - {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {false, false}}, - {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {true, true}}, +const auto testBrgemmParamsDynamic = ::testing::Combine(matMulBrgemmParamsDynamic, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterSpecificParams_Brgemm())); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Dynamic, MatMulLayerCPUTest, testBrgemmParamsDynamic, MatMulLayerCPUTest::getTestCaseName); +const std::vector IS_Dynamic_Fusing = { { - { - {{-1, -1, -1}, {{1, 32, 120}, {1, 12, 120}}}, - {{120, 3}, {{120, 3}, {120, 3}}} + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1}, {{16, 12}, {33, 7}, {16, 12}}}, // input 0 + {{-1, 33}, {{12, 33}, {7, 33}, {12, 33}}} // input 1 }, {false, false} }, { - { - {{-1, -1, 50}, {{1, 2, 50}, {1, 10, 50}, {1, 2, 50}, {2, 2, 50}}}, - {{50, 7}, {{50, 7}, {50, 7}, {50, 7}, {50, 7}}} + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 + {{-1, 5}, {{60, 5}, {30, 5}}} // input 1 }, - {true, false} + {false, false} }, { - { - {{-1, -1, 32}, {{1, 5, 32}, {1, 5, 32}}}, - {{32, 3}, {{32, 3}, {32, 3}}} + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 + {{-1, -1, -1, 25}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 }, - {false, true} + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}, {10, 10, 10}}}, // input 0 + {{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}, {10, 10, 5}}} // input 1 + }, + {false, false} }, }; -std::vector fusingParamsSet3D_smoke { -// The following three patterns are convered by MLAS test -#ifndef OV_CPU_WITH_MLAS - emptyFusingSpec, - fusingBias, - fusingMultiplyPerChannel, -#endif - fusingFakeQuantizePerChannel, - fusingScaleShiftAndFakeQuantizePerChannel, -}; - -std::vector fusingParamsSet3D_nightly { - fusingFakeQuantizePerTensorRelu, -}; - -std::vector fusingParamsSet3DBF16 { - emptyFusingSpec, - fusingBias, - fusingMultiplyPerChannel, +const std::vector matmulFusingParams_x64{ + fusingAddPerTensor, + fusingBias, + fusingFakeQuantizePerChannel, + /* @todo FQ unfolds into FQ + Convert + Substract + Multiply after LPT, + * so Relu cannot be fused in this case. Should be analysed */ + // fusingFakeQuantizePerChannelRelu, + fusingFakeQuantizePerTensorRelu, + fusingScaleShiftAndFakeQuantizePerChannel, }; -const auto fullyConnectedParams3D_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_smoke), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)); +const auto testParams_x64 = ::testing::Combine(matMulParams_x64, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams_x64), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); -const auto fullyConnectedParams3DBF16_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_smoke), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); +INSTANTIATE_TEST_SUITE_P(smoke_MM_Static_Fusing_x64, MatMulLayerCPUTest, testParams_x64, MatMulLayerCPUTest::getTestCaseName); -const auto testParams3D_smoke = ::testing::Combine(fullyConnectedParams3D_smoke, - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet3D_smoke), - ::testing::ValuesIn(filterSpecificParams())); +const auto matMulParamsDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())); -const auto testParams3DBF16_smoke = ::testing::Combine(fullyConnectedParams3DBF16_smoke, - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet3DBF16), - ::testing::ValuesIn(filterSpecificParams())); +const auto testParamsDynamicFusing = ::testing::Combine(matMulParamsDynamicFusing, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); -INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulLayerCPUTest, testParams3D_smoke, MatMulLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulLayerCPUTest, testParams3DBF16_smoke, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic_Fusing, MatMulLayerCPUTest, testParamsDynamicFusing, MatMulLayerCPUTest::getTestCaseName); -#ifdef OV_CPU_WITH_MLAS -std::vector fusingParamsSet3D_MLAS_smoke { - emptyFusingSpec, - fusingBias, - fusingMultiplyPerChannel -}; +const auto matMulParamsBrgemmDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); -const auto testParams3D_MLAS_smoke = ::testing::Combine(fullyConnectedParams3D_smoke, - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet3D_MLAS_smoke), - ::testing::ValuesIn(filterSpecificParams_MLAS())); -INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_MLAS, MatMulLayerCPUTest, testParams3D_MLAS_smoke, MatMulLayerCPUTest::getTestCaseName); -#endif +const auto testParamsBrgemmDynamicFusing = ::testing::Combine(matMulParamsBrgemmDynamicFusing, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterSpecificParams_Brgemm())); -const auto fullyConnectedParams3D_nightly = ::testing::Combine(::testing::ValuesIn(IS3D_nightly), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)); +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Dynamic_Fusing, MatMulLayerCPUTest, testParamsBrgemmDynamicFusing, MatMulLayerCPUTest::getTestCaseName); -const auto fullyConnectedParams3DBF16_nightly = ::testing::Combine(::testing::ValuesIn(IS3D_nightly), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); +std::vector> filterAdditionalConfig_BrgemmAmx() { + std::vector> additionalConfig; + if (with_cpu_x86_bfloat16()) { + additionalConfig.push_back({{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}); + } -const auto testParams3D_nightly = ::testing::Combine(fullyConnectedParams3D_nightly, - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet3D_nightly), - ::testing::ValuesIn(filterSpecificParams())); + return additionalConfig; +} -const auto testParams3DBF16_nightly = ::testing::Combine(fullyConnectedParams3DBF16_nightly, - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet3DBF16), - ::testing::ValuesIn(filterSpecificParams())); +std::vector filterSpecificParams_BrgemmAmx() { + std::vector specificParams; + if (with_cpu_x86_avx512_core_amx()) { + specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512_amx"}, "brgemm_avx512_amx"}); + } -INSTANTIATE_TEST_SUITE_P(nightly_FC_3D, MatMulLayerCPUTest, testParams3D_nightly, MatMulLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(nightly_FC_3D_BF16, MatMulLayerCPUTest, testParams3DBF16_nightly, MatMulLayerCPUTest::getTestCaseName); + return specificParams; +} -const std::vector IS2D_Brgemm_smoke = { - // needed by 'IS2D_Brgconv1x1_smoke' - {static_shapes_to_test_representation({{1, 120}, {120, 120}}), {true, false}}, - {static_shapes_to_test_representation({{1, 128}, {128, 166}}), {true, false}}, +const std::vector IS_brgemm_Amx_smoke = { + {static_shapes_to_test_representation({{1, 2, 32, 64}, {64, 5}}), {false, false}}, + {static_shapes_to_test_representation({{1, 2, 32, 64}, {64, 5}}), {true, false}}, - {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, false}}, - {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, true}}, + {static_shapes_to_test_representation({{7, 32, 128}, {3, 7, 128, 5}}), {false, true}}, + {static_shapes_to_test_representation({{7, 32, 128}, {3, 7, 128, 5}}), {true, true}}, - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, false}}, - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, false}}, + {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, false}}, - { - { - {{-1, -1}, {{12, 16}, {25, 16}, {12, 16}, {25, 16}}}, - {{16, 35}, {{16, 35}, {16, 35}, {16, 35}, {16, 35}}} - }, - {false, false} - }, - { - { - {{{0, 50}, {0, 50}}, {{17, 48}, {15, 48}}}, - {{48, 15}, {{48, 15}, {48, 15}}} - }, - {true, true} - }, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, }; -const std::vector IS2D_Brgemm_nightly = { - {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {false, false}}, - {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {false, true}}, - - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, false}}, - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, true}}, +const auto matMulBrgemmAmxParams_smoke = ::testing::Combine(::testing::ValuesIn(IS_brgemm_Amx_smoke), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_BrgemmAmx())); - { - { - {{-1, 128}, {{11, 128}, {20, 128}, {11, 128}, {15, 128}}}, - {{128, 11}, {{128, 11}, {128, 11}, {128, 11}, {128, 11}}} - }, - {true, false} - }, - { - { - {{{0, 50}, 32}, {{50, 32}, {23, 32}}}, - {{32, 21}, {{32, 21}, {32, 21}}} - }, - {false, true} - }, +std::vector matmulBrgemmAmxFusingParams { + emptyFusingSpec, + fusingPReluPerTensor, + fusingAddPerTensor, + fusingBias, }; -const auto fullyConnectedParams2D_Brgemm_smoke = ::testing::Combine(::testing::ValuesIn(IS2D_Brgemm_smoke), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); +const auto testBrgemmAmxParams_smoke = ::testing::Combine(matMulBrgemmAmxParams_smoke, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulBrgemmAmxFusingParams), + ::testing::ValuesIn(filterSpecificParams_BrgemmAmx())); -const auto testParams2D_Brgemm_smoke = ::testing::Combine(fullyConnectedParams2D_Brgemm_smoke, - ::testing::Values(MatMulNodeType::FullyConnected), - ::testing::ValuesIn(fusingParamsSet2D_Brgemm_smoke), - ::testing::ValuesIn(filterSpecificParams_Brgemm(true))); +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Amx_Static, MatMulLayerCPUTest, testBrgemmAmxParams_smoke, MatMulLayerCPUTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_Brgemm, MatMulLayerCPUTest, testParams2D_Brgemm_smoke, MatMulLayerCPUTest::getTestCaseName); +const auto matMulBrgemmAmxParams_nightly = ::testing::Combine(::testing::ValuesIn(IS_brgemm_Amx_smoke), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_BrgemmAmx())); -const std::vector IS2D_Brgconv1x1_smoke = { - {static_shapes_to_test_representation({{49, 120}, {120, 120}}), {true, false}}, - {static_shapes_to_test_representation({{79, 120}, {120, 120}}), {true, false}}, +const auto testBrgemmAmxParams_nightly = ::testing::Combine(matMulBrgemmAmxParams_nightly, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulBrgemmAmxFusingParams), + ::testing::ValuesIn(filterSpecificParams_BrgemmAmx())); - {static_shapes_to_test_representation({{256, 188}, {188, 120}}), {true, false}}, - {static_shapes_to_test_representation({{256, 188}, {188, 120}}), {true, true}}, +INSTANTIATE_TEST_SUITE_P(nightly_MM_Brgemm_Amx_Static, MatMulLayerCPUTest, testBrgemmAmxParams_nightly, MatMulLayerCPUTest::getTestCaseName); - {static_shapes_to_test_representation({{71, 128}, {128, 200}}), {false, false}}, - {static_shapes_to_test_representation({{71, 128}, {128, 200}}), {false, true}}, +const auto matMulBrgemmAmxParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Brgemm_Dynamic), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_BrgemmAmx())); + +const auto testBrgemmAmxParamsDynamic = ::testing::Combine(matMulBrgemmAmxParamsDynamic, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterSpecificParams_BrgemmAmx())); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Amx_Dynamic, MatMulLayerCPUTest, testBrgemmAmxParamsDynamic, MatMulLayerCPUTest::getTestCaseName); + +const std::vector IS2D_Brgemm_Amx_smoke = { + {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, false}}, + {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, true}}, + + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}}, { { - // ip->brg->ip->brg - // {1, 120} are covered in 'IS2D_Brgemm_smoke' which is ip - // {49, 120}, {79, 120} are covered above which is brg1x1 - {{-1, -1}, {{1, 120}, {49, 120}, {1, 120}, {79, 120}}}, - {{120, 120}, {{120, 120}, {120, 120}, {120, 120}, {120, 120}}} + {{-1, -1}, {{12, 16}, {25, 16}, {12, 16}, {25, 16}}}, + {{16, 35}, {{16, 35}, {16, 35}, {16, 35}, {16, 35}}} }, {false, false} }, { { - // ip->brg->ip(cached)->brg(cached) - {{{0, 200}, {0, 200}}, {{1, 128}, {199, 128}, {1, 128}, {199, 128}}}, - {{128, 166}, {{128, 166}, {128, 166}}} + {{{0, 50}, {0, 50}}, {{17, 48}, {15, 48}}}, + {{48, 15}, {{48, 15}, {48, 15}}} }, {true, true} }, }; +std::vector filterSpecificParams_Brgconv1x1() { + std::vector specificParams; + if (with_cpu_x86_avx512_core()) { + specificParams.push_back(CPUSpecificParams{{}, {}, {/* brgconv_avx512_1x1 is not a part of fc impl list */}, "brgconv_avx512_1x1"}); + } + + return specificParams; +} + const auto fullyConnectedParams2D_Brgconv1x1_smoke = ::testing::Combine(::testing::ValuesIn(IS2D_Brgconv1x1_smoke), ::testing::Values(ElementType::f32), ::testing::Values(ElementType::undefined), ::testing::Values(ElementType::undefined), ::testing::Values(helpers::InputLayerType::CONSTANT), ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)); + ::testing::Values(emptyAdditionalConfig())); const auto testParams2D_Brgconv1x1_smoke = ::testing::Combine(fullyConnectedParams2D_Brgconv1x1_smoke, ::testing::Values(MatMulNodeType::FullyConnected), @@ -735,7 +555,6 @@ const auto testParams2D_Brgconv1x1_smoke = ::testing::Combine(fullyConnectedPara INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_Brgconv1x1, MatMulLayerCPUTest, testParams2D_Brgconv1x1_smoke, MatMulLayerCPUTest::getTestCaseName); - const std::vector IS3D_Brgconv1x1_smoke = { {static_shapes_to_test_representation({{2, 49, 120}, {120, 120}}), {true, false}}, {static_shapes_to_test_representation({{4, 79, 120}, {120, 120}}), {true, false}}, @@ -780,7 +599,7 @@ const auto fullyConnectedParams3D_Brgconv1x1_smoke = ::testing::Combine(::testin ::testing::Values(ElementType::undefined), ::testing::Values(helpers::InputLayerType::CONSTANT), ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(emptyAdditionalConfig)); + ::testing::Values(emptyAdditionalConfig())); const auto testParams3D_Brgconv1x1_smoke = ::testing::Combine(fullyConnectedParams3D_Brgconv1x1_smoke, ::testing::Values(MatMulNodeType::FullyConnected), @@ -789,29 +608,6 @@ const auto testParams3D_Brgconv1x1_smoke = ::testing::Combine(fullyConnectedPara INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_Brgconv1x1, MatMulLayerCPUTest, testParams3D_Brgconv1x1_smoke, MatMulLayerCPUTest::getTestCaseName); -const std::vector IS2D_Brgemm_Amx_smoke = { - {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, false}}, - {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {true, true}}, - - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, false}}, - {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}}, - - { - { - {{-1, -1}, {{12, 16}, {25, 16}, {12, 16}, {25, 16}}}, - {{16, 35}, {{16, 35}, {16, 35}, {16, 35}, {16, 35}}} - }, - {false, false} - }, - { - { - {{{0, 50}, {0, 50}}, {{17, 48}, {15, 48}}}, - {{48, 15}, {{48, 15}, {48, 15}}} - }, - {true, true} - }, -}; - const auto fullyConnectedParams2D_Brgemm_Amx_smoke = ::testing::Combine(::testing::ValuesIn(IS2D_Brgemm_Amx_smoke), ::testing::Values(ElementType::f32), ::testing::Values(ElementType::undefined), @@ -827,6 +623,28 @@ const auto testParams2D_Brgemm_Amx_smoke = ::testing::Combine(fullyConnectedPara INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_Brgemm_Amx, MatMulLayerCPUTest, testParams2D_Brgemm_Amx_smoke, MatMulLayerCPUTest::getTestCaseName); +const std::vector IS2D_Brgemm_nightly = { + {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {false, false}}, + {static_shapes_to_test_representation({{59, 16}, {16, 120}}), {false, true}}, + + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, true}}, + + { + { + {{-1, 128}, {{11, 128}, {20, 128}, {11, 128}, {15, 128}}}, + {{128, 11}, {{128, 11}, {128, 11}, {128, 11}, {128, 11}}} + }, + {true, false} + }, + { + { + {{{0, 50}, 32}, {{50, 32}, {23, 32}}}, + {{32, 21}, {{32, 21}, {32, 21}}} + }, + {false, true} + }, +}; const auto fullyConnectedParams2D_Brgemm_nightly = ::testing::Combine(::testing::ValuesIn(IS2D_Brgemm_nightly), ::testing::Values(ElementType::f32), @@ -858,610 +676,130 @@ const auto testParams2D_Brgemm_Amx_nightly = ::testing::Combine(fullyConnectedPa INSTANTIATE_TEST_SUITE_P(nightly_FC_2D_Brgemm_Amx, MatMulLayerCPUTest, testParams2D_Brgemm_Amx_nightly, MatMulLayerCPUTest::getTestCaseName); -} // namespace fullyConnected +const auto testParams2DBF16_nightly = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_nightly()), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet2DBF16), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); + +INSTANTIATE_TEST_SUITE_P(nightly_FC_2D, MatMulLayerCPUTest, testParams2D_nightly, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(nightly_FC_2D_BF16, MatMulLayerCPUTest, testParams2DBF16_nightly, MatMulLayerCPUTest::getTestCaseName); + +std::vector fusingParamsSet3D_smoke { +// The following three patterns are convered by MLAS test +#ifndef OV_CPU_WITH_MLAS + emptyFusingSpec, + fusingBias, + fusingMultiplyPerChannel, +#endif + fusingFakeQuantizePerChannel, + fusingScaleShiftAndFakeQuantizePerChannel, +}; + +std::vector fusingParamsSet3DBF16 { + emptyFusingSpec, + fusingBias, + fusingMultiplyPerChannel, +}; +const auto fullyConnectedParams3DBF16_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_smoke()), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())); -/* ============= MatMul ============= */ -namespace matmul { +const auto testParams3DBF16_smoke = ::testing::Combine(fullyConnectedParams3DBF16_smoke, + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet3DBF16), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); -const std::vector IS = { - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, false}}, - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, false}}, - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, true}}, - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, true}}, +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulLayerCPUTest, testParams3DBF16_smoke, MatMulLayerCPUTest::getTestCaseName); - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, false}}, - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, false}}, - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, true}}, - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, true}}, +const auto fullyConnectedParams3D_smoke = ::testing::Combine(::testing::ValuesIn(IS3D_smoke()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())); + +const auto testParams3D_smoke = ::testing::Combine(fullyConnectedParams3D_smoke, + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet3D_smoke), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, false}}, - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, false}}, - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, true}}, - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, true}}, +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulLayerCPUTest, testParams3D_smoke, MatMulLayerCPUTest::getTestCaseName); - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, false}}, - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, false}}, - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, +std::vector fusingParamsSet3D_nightly { + fusingFakeQuantizePerTensorRelu, }; -const std::vector IS_Dynamic = { - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 - {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 - {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 - }, - {true, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 - {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 - }, - {false, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 - {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 - }, - {true, true} - }, +const std::vector IS3D_nightly = { + {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {true, false}}, + {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {true, true}}, + + {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {false, false}}, + {static_shapes_to_test_representation({{1, 32, 120}, {120, 50}}), {true, true}}, { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 - {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 + { + {{-1, -1, -1}, {{1, 32, 120}, {1, 12, 120}}}, + {{120, 3}, {{120, 3}, {120, 3}}} }, {false, false} }, { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 - {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 + { + {{-1, -1, 50}, {{1, 2, 50}, {1, 10, 50}, {1, 2, 50}, {2, 2, 50}}}, + {{50, 7}, {{50, 7}, {50, 7}, {50, 7}, {50, 7}}} }, {true, false} }, { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 - {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 + { + {{-1, -1, 32}, {{1, 5, 32}, {1, 5, 32}}}, + {{32, 3}, {{32, 3}, {32, 3}}} }, {false, true} }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 - {{-1, -1}, {{60, 5}, {30, 5}}} // input 1 - }, - {true, true} - }, +}; - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 - {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 - {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 - }, - {true, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 - {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 - }, - {false, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 - {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 - }, - {true, true} - }, - - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {true, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {false, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {true, true} - }, - - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {true, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {false, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, 16 }, {{ 4, 16 }, { 2, 16 }}}, // input 0 - {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 - }, - {true, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 - {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 - }, - {false, false} - }, -}; - -const std::vector IS_Dynamic_nightly = { - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{{5, 15}, {1, 12}, {4, 15}}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 - {{{1, 13}, {3, 15}, {1, 10}}, {{10, 10, 10}, {5, 5, 5}}} // input 1 - }, - {true, true} - }, - - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ {2, 10}, {3, 15}, -1, 16 }, {{ 2, 12, 4, 16 }, { 3, 12, 2, 16 }}}, // input 0 - {{ 1, 1, -1, 4 }, {{ 1, 1, 16, 4 }, { 1, 1, 16, 4 }}} // input 1 - }, - {true, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ 1, 1, -1, 16 }, {{ 1, 1, 4, 16 }, { 1, 1, 2, 16 }}}, // input 0 - {{ {2, 5}, {3, 15}, -1, 4 }, {{ 2, 12, 16, 4 }, { 2, 12, 16, 4 }}} // input 1 - }, - {false, false} - }, - - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, 16 }, {{ 4, 16 }, { 2, 16 }}}, // input 0 - {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, {2, 15}, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 - {{ -1, 4 }, {{ 16, 4 }, { 16, 4 }}} // input 1 - }, - {true, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, {1, 15}, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 - {{ -1, 4 }, {{ 16, 4 }, { 16, 4 }}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ {1, 3}, {1, 9}, {1, 5}, {1, 10} }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 - {{ {1, 5}, {1, 7}, {1, 8}, {1, 5} }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 - }, - {true, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ {1, 3}, {1, 9}, {1, 5}, {1, 10} }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 - {{ {1, 5}, {1, 7}, {1, 8}, {1, 5} }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 - }, - {false, false} - }, - - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ 1, 7, 4, -1 }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 - {{ 1, 7, -1, 4 }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 - }, - {true, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ 1, 7, 4, -1 }, {{ 1, 7, 4, 5 }, { 1, 7, 4, 4 }}}, // input 0 - {{ 1, 7, -1, 4 }, {{ 1, 7, 5, 4 }, { 1, 7, 4, 4 }}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 - {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 - }, - {true, true} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 - {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 - }, - {true, false} - }, - - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{ -1, 12, -1, 16 }, {{ 1, 12, 4, 16 }, { 2, 12, 2, 16 }}}, // input 0 - {{ {1, 5}, 12, -1, 4 }, {{ 1, 12, 16, 4 }, { 1, 12, 16, 4 }}} // input 1 - }, - {false, true} - }, -}; - -std::vector matmulFusingParams { - emptyFusingSpec, - fusingElu, - fusingSqrt, - fusingPReluPerTensor, - fusingMultiplyPerChannel, - fusingAddPerTensor, - fusingBias, - fusingFakeQuantizePerChannel, - /* @todo FQ unfolds into FQ + Convert + Substract + Multiply after LPT, - * so Relu cannot be fused in this case. Should be analysed */ - // fusingFakeQuantizePerChannelRelu, - fusingFakeQuantizePerTensorRelu, - fusingScaleShiftAndFakeQuantizePerChannel, -}; - -const auto matMulParams = ::testing::Combine(::testing::ValuesIn(IS), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); - -const auto testParams = ::testing::Combine(matMulParams, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::ValuesIn(matmulFusingParams), - ::testing::ValuesIn(filterSpecificParams())); - -INSTANTIATE_TEST_SUITE_P(smoke_MM_Static, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName); - - -const auto matMulParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Dynamic), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); - -const auto testParamsDynamic = ::testing::Combine(matMulParamsDynamic, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::Values(emptyFusingSpec), - ::testing::ValuesIn(filterSpecificParams())); - -INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic, MatMulLayerCPUTest, testParamsDynamic, MatMulLayerCPUTest::getTestCaseName); - -const auto matMulParamsDynamic_nightly = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_nightly), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); - -const auto testParamsDynamic_nightly = ::testing::Combine(matMulParamsDynamic_nightly, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::Values(emptyFusingSpec), - ::testing::ValuesIn(filterSpecificParams())); - -INSTANTIATE_TEST_SUITE_P(nightly_MM_Dynamic, MatMulLayerCPUTest, testParamsDynamic_nightly, MatMulLayerCPUTest::getTestCaseName); - -const std::vector IS_Dynamic_Fusing = { - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1}, {{16, 12}, {33, 7}, {16, 12}}}, // input 0 - {{-1, 33}, {{12, 33}, {7, 33}, {12, 33}}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 - {{-1, 5}, {{60, 5}, {30, 5}}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 - {{-1, -1, -1, 25}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 - }, - {false, false} - }, - { - { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}, {10, 10, 10}}}, // input 0 - {{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}, {10, 10, 5}}} // input 1 - }, - {false, false} - }, -}; - -const auto matMulParamsDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); - -const auto testParamsDynamicFusing = ::testing::Combine(matMulParamsDynamicFusing, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::ValuesIn(matmulFusingParams), - ::testing::ValuesIn(filterSpecificParams())); - -INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic_Fusing, MatMulLayerCPUTest, testParamsDynamicFusing, MatMulLayerCPUTest::getTestCaseName); - -const std::vector IS_brgemm_smoke = { - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, false}}, - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, false}}, - - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, true}}, - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, true}}, - - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, false}}, - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, false}}, - - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, -}; - -const std::vector IS_brgemm_nightly = { - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, true}}, - {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, true}}, - - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {false, false}}, - {static_shapes_to_test_representation({{7, 32, 120}, {3, 7, 120, 50}}), {true, false}}, - - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, true}}, - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, true}}, - - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, false}}, - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, false}}, -}; - -const auto matMulBrgemmParams_smoke = ::testing::Combine(::testing::ValuesIn(IS_brgemm_smoke), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); - -const auto testBrgemmParams_smoke = ::testing::Combine(matMulBrgemmParams_smoke, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::ValuesIn(matmulFusingParams), - ::testing::ValuesIn(filterSpecificParams_Brgemm())); - -INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Static, MatMulLayerCPUTest, testBrgemmParams_smoke, MatMulLayerCPUTest::getTestCaseName); - -std::vector matmulBrgemmAmxFusingParams { - emptyFusingSpec, - fusingPReluPerTensor, - fusingAddPerTensor, - fusingBias, -}; - -const std::vector IS_brgemm_Amx_smoke = { - {static_shapes_to_test_representation({{1, 2, 32, 64}, {64, 5}}), {false, false}}, - {static_shapes_to_test_representation({{1, 2, 32, 64}, {64, 5}}), {true, false}}, - - {static_shapes_to_test_representation({{7, 32, 128}, {3, 7, 128, 5}}), {false, true}}, - {static_shapes_to_test_representation({{7, 32, 128}, {3, 7, 128, 5}}), {true, true}}, - - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, false}}, - {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, false}}, - - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, -}; - -const auto matMulBrgemmAmxParams_smoke = ::testing::Combine(::testing::ValuesIn(IS_brgemm_Amx_smoke), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_BrgemmAmx())); - -const auto testBrgemmAmxParams_smoke = ::testing::Combine(matMulBrgemmAmxParams_smoke, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::ValuesIn(matmulBrgemmAmxFusingParams), - ::testing::ValuesIn(filterSpecificParams_BrgemmAmx())); - -INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Amx_Static, MatMulLayerCPUTest, testBrgemmAmxParams_smoke, MatMulLayerCPUTest::getTestCaseName); - -const auto matMulBrgemmParams_nightly = ::testing::Combine(::testing::ValuesIn(IS_brgemm_nightly), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); - -const auto testBrgemmParams_nightly = ::testing::Combine(matMulBrgemmParams_nightly, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::ValuesIn(matmulFusingParams), - ::testing::ValuesIn(filterSpecificParams_Brgemm())); - -INSTANTIATE_TEST_SUITE_P(nightly_MM_Brgemm_Static, MatMulLayerCPUTest, testBrgemmParams_nightly, MatMulLayerCPUTest::getTestCaseName); - -const auto matMulBrgemmAmxParams_nightly = ::testing::Combine(::testing::ValuesIn(IS_brgemm_Amx_smoke), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_BrgemmAmx())); - -const auto testBrgemmAmxParams_nightly = ::testing::Combine(matMulBrgemmAmxParams_nightly, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::ValuesIn(matmulBrgemmAmxFusingParams), - ::testing::ValuesIn(filterSpecificParams_BrgemmAmx())); - -INSTANTIATE_TEST_SUITE_P(nightly_MM_Brgemm_Amx_Static, MatMulLayerCPUTest, testBrgemmAmxParams_nightly, MatMulLayerCPUTest::getTestCaseName); - - -const std::vector IS_Brgemm_Dynamic = { - { - { - {{-1, 256}, {{1, 256}}}, - {{256, 384}, {{256, 384}}} - }, - {false, false} - }, - { - { - {{-1, -1}, {{55, 12}, {33, 7}}}, - {{-1, -1}, {{12, 55}, {7, 33}}} - }, - {false, false} - }, - { - { - {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, - {{-1, -1}, {{60, 5}, {30, 5}}} - }, - {true, false} - }, - { - { - {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, - {{-1, -1, -1, -1}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} - }, - {false, true} - }, - { - { - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} - }, - {false, false} - }, - { - { - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, - {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}} - }, - {true, true} - }, - { - { - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} - }, - {true, false} - }, - { - { - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}}, - {{{1, 15}, {1, 15}, {1, 15}}, {{10, 10, 10}, {5, 5, 5}}} - }, - {false, true} - }, -}; - -const auto matMulBrgemmParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Brgemm_Dynamic), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); - -const auto testBrgemmParamsDynamic = ::testing::Combine(matMulBrgemmParamsDynamic, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::Values(emptyFusingSpec), - ::testing::ValuesIn(filterSpecificParams_Brgemm())); - -INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Dynamic, MatMulLayerCPUTest, testBrgemmParamsDynamic, MatMulLayerCPUTest::getTestCaseName); - -const auto matMulBrgemmAmxParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Brgemm_Dynamic), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_BrgemmAmx())); - -const auto testBrgemmAmxParamsDynamic = ::testing::Combine(matMulBrgemmAmxParamsDynamic, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::Values(emptyFusingSpec), - ::testing::ValuesIn(filterSpecificParams_BrgemmAmx())); - -INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Amx_Dynamic, MatMulLayerCPUTest, testBrgemmAmxParamsDynamic, MatMulLayerCPUTest::getTestCaseName); +const auto fullyConnectedParams3D_nightly = ::testing::Combine(::testing::ValuesIn(IS3D_nightly), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())); -const auto matMulParamsBrgemmDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing), - ::testing::Values(ElementType::f32), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); +const auto fullyConnectedParams3DBF16_nightly = ::testing::Combine(::testing::ValuesIn(IS3D_nightly), + ::testing::ValuesIn(netPRCs()), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig())); -const auto testParamsBrgemmDynamicFusing = ::testing::Combine(matMulParamsBrgemmDynamicFusing, - ::testing::Values(MatMulNodeType::MatMul), - ::testing::ValuesIn(matmulFusingParams), - ::testing::ValuesIn(filterSpecificParams_Brgemm())); +const auto testParams3DBF16_nightly = ::testing::Combine(fullyConnectedParams3DBF16_nightly, + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet3DBF16), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); -INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Dynamic_Fusing, MatMulLayerCPUTest, testParamsBrgemmDynamicFusing, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(nightly_FC_3D_BF16, MatMulLayerCPUTest, testParams3DBF16_nightly, MatMulLayerCPUTest::getTestCaseName); -} // namespace matmul +const auto testParams3D_nightly = ::testing::Combine(fullyConnectedParams3D_nightly, + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet3D_nightly), + ::testing::ValuesIn(filterCPUInfo(filterSpecificParams()))); +INSTANTIATE_TEST_SUITE_P(nightly_FC_3D, MatMulLayerCPUTest, testParams3D_nightly, MatMulLayerCPUTest::getTestCaseName); } // namespace - -} // namespace CPULayerTestsDefinitions +} // namespace MatMul +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/mlas/matmul.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/mlas/matmul.cpp new file mode 100644 index 00000000000000..3be916593f0b7b --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/mlas/matmul.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/matmul.hpp" +#include "shared_test_classes/single_layer/mat_mul.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; +using namespace ngraph::helpers; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace MatMul { +namespace { +#ifdef OV_CPU_WITH_MLAS +std::vector filterSpecificParams_MLAS() { + // replace with mlas primitive type + std::vector specificParams; + specificParams.push_back(CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"}); + return specificParams; +} + +std::vector fusingParamsSet3D_MLAS_smoke { + emptyFusingSpec, + fusingBias, + fusingMultiplyPerChannel +}; + +const auto testParams3D_MLAS_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS3D_smoke()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet3D_MLAS_smoke), + ::testing::ValuesIn(filterSpecificParams_MLAS())); +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_MLAS, MatMulLayerCPUTest, testParams3D_MLAS_smoke, MatMulLayerCPUTest::getTestCaseName); + +std::vector fusingParamsSet2D_MLAS_nightly { + fusingScaleShift +}; +const auto testParams2D_MLAS_nightly = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_nightly()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet2D_MLAS_nightly), + ::testing::ValuesIn(filterSpecificParams_MLAS())); + +INSTANTIATE_TEST_SUITE_P(nightly_FC_2D_MLAS, MatMulLayerCPUTest, testParams2D_MLAS_nightly, MatMulLayerCPUTest::getTestCaseName); + +std::vector fusingParamsSet2D_MLAS_smoke { + emptyFusingSpec, + fusingBias, + fusingMultiplyPerChannel +}; + +const auto testParams2D_MLAS_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_smoke()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet2D_MLAS_smoke), + ::testing::ValuesIn(filterSpecificParams_MLAS())); +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_MLAS, MatMulLayerCPUTest, testParams2D_MLAS_smoke, MatMulLayerCPUTest::getTestCaseName); +#endif +} // namespace +} // namespace MatMul +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/pooling.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/pooling.cpp index 89331ea284d49a..89406d9c6a43b1 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/pooling.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/pooling.cpp @@ -5,6 +5,7 @@ #include "single_layer_tests/classes/pooling.hpp" #include "shared_test_classes/single_layer/pooling.hpp" #include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "test_utils/fusing_test_utils.hpp" #include #include diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_depth.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_depth.cpp index 8e840ed01ceaec..d0297397d2c2a5 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_depth.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_depth.cpp @@ -4,6 +4,7 @@ #include "shared_test_classes/single_layer/space_to_depth.hpp" #include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" diff --git a/src/plugins/intel_cpu/tests/functional/specific_tests.cmake b/src/plugins/intel_cpu/tests/functional/specific_tests.cmake index 6ec6d019bea143..7d1641654f5654 100644 --- a/src/plugins/intel_cpu/tests/functional/specific_tests.cmake +++ b/src/plugins/intel_cpu/tests/functional/specific_tests.cmake @@ -40,6 +40,14 @@ if(DEFINED ENABLE_CPU_SUBSET_TESTS_PATH) ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/fusing_test_utils.cpp ${CPU_SUBSET_TEST_ABS_PATH}) +if(X86_64) + list(APPEND REQUIRED_OBJECT_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/x64/filter_cpu_info.cpp) +elseif(ARM OR AARCH64) + list(APPEND REQUIRED_OBJECT_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/arm/filter_cpu_info.cpp) +endif() + ov_add_test_target( NAME ${SUBSET_TARGET_NAME} ROOT ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_concat.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_concat.cpp index e40894ced80918..e908285e950d09 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_concat.cpp @@ -3,6 +3,7 @@ // #include "test_utils/convolution_params.hpp" +#include "test_utils/filter_cpu_info.hpp" #include "subgraph_tests/include/conv_concat.hpp" using namespace InferenceEngine; diff --git a/src/plugins/intel_cpu/tests/functional/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/target_per_test.cmake index c876474426b486..2251b0ab5a9587 100644 --- a/src/plugins/intel_cpu/tests/functional/target_per_test.cmake +++ b/src/plugins/intel_cpu/tests/functional/target_per_test.cmake @@ -17,6 +17,14 @@ function(create_target_per_test_for_directory TEST_DIR TARGET_PREFIX) ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/fusing_test_utils.cpp ) +if(X86_64) + list(APPEND REQUIRED_OBJECT_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/x64/filter_cpu_info.cpp) +elseif(ARM OR AARCH64) + list(APPEND REQUIRED_OBJECT_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/test_utils/arm/filter_cpu_info.cpp) +endif() + file(GLOB LIST_OF_TEST_FILES ${TEST_DIR}/*.cpp) # create targed for each test file in directory foreach(TEST_FILE ${LIST_OF_TEST_FILES}) diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/arm/filter_cpu_info.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/arm/filter_cpu_info.cpp new file mode 100644 index 00000000000000..bb16db068b01ec --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/test_utils/arm/filter_cpu_info.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" +#include "ie_ngraph_utils.hpp" +#include "openvino/core/type/element_type.hpp" +#include "utils/rt_info/memory_formats_attribute.hpp" +#include "utils/general_utils.h" +#include + +namespace CPUTestUtils { + +std::vector filterCPUInfo(const std::vector& CPUParams) { + std::vector archCPUParams = filterCPUInfoForArch(CPUParams); + std::vector deviceCPUParams = filterCPUInfoForDevice(archCPUParams); + return deviceCPUParams; +} + +std::vector filterCPUInfoForArch(const std::vector& CPUParams) { + std::vector resCPUParams; + const int selectedTypeIndex = 3; + + for (auto param : CPUParams) { + auto selectedTypeStr = std::get(param); + + if (selectedTypeStr.find("acl") == std::string::npos && + selectedTypeStr.find("ref") == std::string::npos) + continue; + + resCPUParams.push_back(param); + } + + return resCPUParams; +} + +std::vector filterCPUInfoForDevice(const std::vector& CPUParams) { + return CPUParams; +} + +} // namespace CPUTestUtils diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp index a1da0abdfb4381..e2b3a7788a4a64 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp @@ -465,29 +465,4 @@ void CheckNumberOfNodesWithType(InferenceEngine::ExecutableNetwork &execNet, con CheckNumberOfNodesWithTypes(execNet, {nodeType}, expectedCount); } -std::vector filterCPUInfoForDevice(const std::vector& CPUParams) { - std::vector resCPUParams; - const int selectedTypeIndex = 3; - - for (auto param : CPUParams) { - auto selectedTypeStr = std::get(param); - - if (selectedTypeStr.find("jit") != std::string::npos && !InferenceEngine::with_cpu_x86_sse42()) - continue; - if (selectedTypeStr.find("sse42") != std::string::npos && !InferenceEngine::with_cpu_x86_sse42()) - continue; - if (selectedTypeStr.find("avx") != std::string::npos && !InferenceEngine::with_cpu_x86_avx()) - continue; - if (selectedTypeStr.find("avx2") != std::string::npos && !InferenceEngine::with_cpu_x86_avx2()) - continue; - if (selectedTypeStr.find("avx512") != std::string::npos && !InferenceEngine::with_cpu_x86_avx512f()) - continue; - if (selectedTypeStr.find("amx") != std::string::npos && !InferenceEngine::with_cpu_x86_avx512_core_amx()) - continue; - - resCPUParams.push_back(param); - } - - return resCPUParams; -} } // namespace CPUTestUtils diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp index fff65f9e1c442f..8777314f4980bf 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp @@ -179,7 +179,6 @@ const std::map cpuBF16PluginConfig = // utility functions std::vector filterCPUSpecificParams(const std::vector& paramsVector); -std::vector filterCPUInfoForDevice(const std::vector& CPUParams); void CheckNumberOfNodesWithType(const ov::CompiledModel &compiledModel, const std::string& nodeType, size_t expectedCount); void CheckNumberOfNodesWithType(InferenceEngine::ExecutableNetwork &execNet, const std::string& nodeType, size_t expectedCount); void CheckNumberOfNodesWithTypes(const ov::CompiledModel &compiledModel, const std::unordered_set& nodeTypes, size_t expectedCount); diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/filter_cpu_info.hpp b/src/plugins/intel_cpu/tests/functional/test_utils/filter_cpu_info.hpp new file mode 100644 index 00000000000000..8a92dea80575ab --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/test_utils/filter_cpu_info.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_test_utils.hpp" +#include + +namespace CPUTestUtils { +std::vector filterCPUInfo(const std::vector& CPUParams); +std::vector filterCPUInfoForArch(const std::vector& CPUParams); +std::vector filterCPUInfoForDevice(const std::vector& CPUParams); +} // namespace CPUTestUtils diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/x64/filter_cpu_info.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/x64/filter_cpu_info.cpp new file mode 100644 index 00000000000000..1e22e6e1cd08c6 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/test_utils/x64/filter_cpu_info.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/filter_cpu_info.hpp" +#include "ie_ngraph_utils.hpp" +#include "openvino/core/type/element_type.hpp" +#include "utils/rt_info/memory_formats_attribute.hpp" +#include "utils/general_utils.h" +#include + +namespace CPUTestUtils { + +std::vector filterCPUInfo(const std::vector& CPUParams) { + std::vector archCPUParams = filterCPUInfoForArch(CPUParams); + std::vector deviceCPUParams = filterCPUInfoForDevice(archCPUParams); + return deviceCPUParams; +} + +std::vector filterCPUInfoForArch(const std::vector& CPUParams) { + std::vector resCPUParams; + const int selectedTypeIndex = 3; + + for (auto param : CPUParams) { + auto selectedTypeStr = std::get(param); + + if (selectedTypeStr.find("acl") != std::string::npos) + continue; + + resCPUParams.push_back(param); + } + + return resCPUParams; +} + +std::vector filterCPUInfoForDevice(const std::vector& CPUParams) { + std::vector resCPUParams; + const int selectedTypeIndex = 3; + + for (auto param : CPUParams) { + auto selectedTypeStr = std::get(param); + + if (selectedTypeStr.find("jit") != std::string::npos && !InferenceEngine::with_cpu_x86_sse42()) + continue; + if (selectedTypeStr.find("sse42") != std::string::npos && !InferenceEngine::with_cpu_x86_sse42()) + continue; + if (selectedTypeStr.find("avx") != std::string::npos && !InferenceEngine::with_cpu_x86_avx()) + continue; + if (selectedTypeStr.find("avx2") != std::string::npos && !InferenceEngine::with_cpu_x86_avx2()) + continue; + if (selectedTypeStr.find("avx512") != std::string::npos && !InferenceEngine::with_cpu_x86_avx512f()) + continue; + if (selectedTypeStr.find("amx") != std::string::npos && !InferenceEngine::with_cpu_x86_avx512_core_amx()) + continue; + + resCPUParams.push_back(param); + } + + return resCPUParams; +} + +} // namespace CPUTestUtils From 09a45bceaec0d6529ff07cf80e66585e53b17b65 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 9 Nov 2023 19:14:27 +0800 Subject: [PATCH 254/275] [CPU][MLAS]Apply lower bound protection for K stride (#20873) --- src/plugins/intel_cpu/thirdparty/mlas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/thirdparty/mlas b/src/plugins/intel_cpu/thirdparty/mlas index f6425b13943348..7a35e48a723944 160000 --- a/src/plugins/intel_cpu/thirdparty/mlas +++ b/src/plugins/intel_cpu/thirdparty/mlas @@ -1 +1 @@ -Subproject commit f6425b1394334822390fcd9da12788c9cd0d11da +Subproject commit 7a35e48a723944972088627be1a8b60841e8f6a5 From 9cc4c25e481e96760eda7ee553220f2492122303 Mon Sep 17 00:00:00 2001 From: Sungeun Kim Date: Thu, 9 Nov 2023 20:46:50 +0900 Subject: [PATCH 255/275] [GPU] print datashape of input for benchmark_app (#20943) * print datashape of input for benchmark_app --- .../intel_gpu/runtime/debug_configuration.hpp | 1 + src/plugins/intel_gpu/src/graph/network.cpp | 22 +++++++++++++++++++ .../src/runtime/debug_configuration.cpp | 3 +++ 3 files changed, 26 insertions(+) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index 0e6269641e0a44..0a5fb7513ee379 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -100,6 +100,7 @@ class debug_configuration { int verbose_color; // Print verbose color int list_layers; // Print list layers int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive + int print_input_data_shapes; // Print the input data_shape for benchmark_app. int disable_usm; // Disable usm usage int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) int disable_onednn_opt_post_ops; // Disable onednn optimize post operators diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 2aaca0c73b2df5..cd5d0124cd173e 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -1378,6 +1378,28 @@ void network::execute_impl(const std::vector& events) { } } + // print '-data_shape' option for benchmark_app + GPU_DEBUG_IF(debug_config->print_input_data_shapes == 1) { + std::stringstream data_shape_str; + auto add_string = [&data_shape_str](std::string str) { + data_shape_str << ((data_shape_str.rdbuf()->in_avail() == 0) ? " -data_shape " : ",") << str; + }; + + for (auto& inst : _exec_order) { + auto name = inst->id(); + auto pos = name.find(':'); + auto type = name.substr(0, pos); + name.erase(0, pos + 1); + if (inst->is_input() && type == "parameter") { + add_string(name + inst->get_output_layout().get_partial_shape().to_string()); + } + } + + GPU_DEBUG_COUT << "[program:" << std::setw(2) << ((get_program() != nullptr) ? get_program()->get_id() : 0) + << "|network:" << std::setw(2) << get_id() << "|iter:" << std::setw(4) << curr_iter << "] benchmark_app cmd: " + << data_shape_str.str() << std::endl; + } + // Store events only in case of OOO queue or enabled Profiling auto store_events = is_out_of_order_queue || _enable_profiling; if (store_events) { diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index eb7324cb7b5747..55f166c4880015 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -108,6 +108,7 @@ static void print_help_messages() { message_list.emplace_back("OV_GPU_VerboseColor", "Print verbose color"); message_list.emplace_back("OV_GPU_ListLayers", "Print layers names"); message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive"); + message_list.emplace_back("OV_GPU_PrintInputDataShapes", "Print data_shapes of input layers for benchmark_app."); message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage"); message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)"); message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators"); @@ -173,6 +174,7 @@ debug_configuration::debug_configuration() , verbose_color(0) , list_layers(0) , print_multi_kernel_perf(0) + , print_input_data_shapes(0) , disable_usm(0) , disable_onednn(0) , disable_onednn_opt_post_ops(0) @@ -206,6 +208,7 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("VerboseColor", verbose_color); get_gpu_debug_env_var("ListLayers", list_layers); get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf); + get_gpu_debug_env_var("PrintInputDataShapes", print_input_data_shapes); get_gpu_debug_env_var("DisableUsm", disable_usm); get_gpu_debug_env_var("DumpGraphs", dump_graphs); get_gpu_debug_env_var("DumpSources", dump_sources); From b1705e8bd3081dd5f8cf843a8fa7d0f7a545de60 Mon Sep 17 00:00:00 2001 From: Sungeun Kim Date: Thu, 9 Nov 2023 20:48:34 +0900 Subject: [PATCH 256/275] [GPU] clean up for extend pad/stride/dilation (#20828) * clean up for extend pad/stride/dilation --- .../include/intel_gpu/plugin/common_utils.hpp | 11 ++++++++ .../src/graph/impls/ocl/convolution.cpp | 27 +++++++------------ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index cd6a1a0e44d3b2..fd529b32b3d6b7 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "intel_gpu/runtime/layout.hpp" #include "openvino/core/layout.hpp" #include "openvino/core/type/element_type.hpp" @@ -39,6 +40,16 @@ inline cldnn::tensor tensor_from_dims(const ov::Shape& dims, int def = 1) { } } +template +std::tuple get_xyz(const T data, V def) { + switch (data.size()) { + case 1: return std::make_tuple(def, static_cast(data[0]), def); + case 2: return std::make_tuple(static_cast(data[1]), static_cast(data[0]), def); + case 3: return std::make_tuple(static_cast(data[2]), static_cast(data[1]), static_cast(data[0])); + default: return std::make_tuple(def, def, def); + } +} + inline cldnn::layout make_layout(const ov::element::Type type, const ov::Shape& shape) { return cldnn::layout{ov::PartialShape{shape}, cldnn::element_type_to_data_type(type), diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp index 2c8904847e21c9..5ca29b7a4f9ac2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp @@ -8,6 +8,7 @@ #include "convolution/convolution_kernel_selector.h" #include "convolution/convolution_params.h" #include "ngraph/validation_util.hpp" +#include "intel_gpu/plugin/common_utils.hpp" namespace cldnn { namespace ocl { @@ -113,30 +114,20 @@ struct convolution_impl : typed_primitive_impl_ocl { uint32_t kz = weights_layout.spatial(2); conv_params.filterSize = { kx, ky, kz }; - // WA: If 1d conv and dynamic shape, 1d pad should be applied to y axis. - if (pads_begin.size() == 1) pads_begin.push_back(0); - if (pads_end.size() == 1) pads_end.push_back(0); - if (stride.size() == 1) stride.push_back(1); - if (dilation.size() == 1) dilation.push_back(1); - - uint32_t pad_begin_z = std::max(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0); - uint32_t pad_begin_y = std::max(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0); - uint32_t pad_begin_x = std::max(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0); + uint32_t pad_begin_x, pad_begin_y, pad_begin_z; + std::tie(pad_begin_x, pad_begin_y, pad_begin_z) = ov::intel_gpu::get_xyz(pads_begin, 0); conv_params.padding_begin = {pad_begin_x, pad_begin_y, pad_begin_z}; - uint32_t pad_end_z = std::max(pads_end.size() >= 3 ? pads_end[pads_end.size() - 3] : 0, 0); - uint32_t pad_end_y = std::max(pads_end.size() >= 2 ? pads_end[pads_end.size() - 2] : 0, 0); - uint32_t pad_end_x = std::max(pads_end.size() >= 1 ? pads_end[pads_end.size() - 1] : 0, 0); + uint32_t pad_end_x, pad_end_y, pad_end_z; + std::tie(pad_end_x, pad_end_y, pad_end_z) = ov::intel_gpu::get_xyz(pads_end, 0); conv_params.padding_end = {pad_end_x, pad_end_y, pad_end_z}; - uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; - uint32_t stride_y = stride.size() >= 2 ? static_cast(stride[stride.size() - 2]) : 1; - uint32_t stride_x = stride.size() >= 1 ? static_cast(stride[stride.size() - 1]) : 1; + uint32_t stride_x, stride_y, stride_z; + std::tie(stride_x, stride_y, stride_z) = ov::intel_gpu::get_xyz(stride, 1); conv_params.stride = {stride_x, stride_y, stride_z}; - uint32_t dilation_z = dilation.size() >= 3 ? static_cast(dilation[dilation.size() - 3]) : 1; - uint32_t dilation_y = dilation.size() >= 2 ? static_cast(dilation[dilation.size() - 2]) : 1; - uint32_t dilation_x = dilation.size() >= 1 ? static_cast(dilation[dilation.size() - 1]) : 1; + uint32_t dilation_x, dilation_y, dilation_z; + std::tie(dilation_x, dilation_y, dilation_z) = ov::intel_gpu::get_xyz(dilation, 1); conv_params.dilation = {dilation_x, dilation_y, dilation_z}; if ((impl_param.input_layouts[0].data_type == data_types::u8 || From 8034d1795f8ce4d149c6d480a4056f80e014dde6 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Thu, 9 Nov 2023 15:03:01 +0100 Subject: [PATCH 257/275] change structure (#20988) --- docs/articles_en/documentation/openvino_ir.md | 3 +++ .../documentation/openvino_ir/operation_sets.md | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/articles_en/documentation/openvino_ir.md b/docs/articles_en/documentation/openvino_ir.md index a9595eac3c8329..7bb50ce8cf9850 100644 --- a/docs/articles_en/documentation/openvino_ir.md +++ b/docs/articles_en/documentation/openvino_ir.md @@ -12,6 +12,9 @@ :hidden: openvino_docs_MO_DG_IR_and_opsets + openvino_docs_ops_opset + openvino_docs_operations_specifications + openvino_docs_ops_broadcast_rules openvino_docs_MO_DG_prepare_model_convert_model_IR_suitable_for_INT8_inference The models, built and trained using various frameworks, can be large and architecture-dependent. To successfully run inference from any device and maximize the benefits of OpenVINO tools, you can convert the model to the OpenVINO Intermediate Representation (IR) format. diff --git a/docs/articles_en/documentation/openvino_ir/operation_sets.md b/docs/articles_en/documentation/openvino_ir/operation_sets.md index f0692bd2b92b5b..59eaf1bd0ff30d 100644 --- a/docs/articles_en/documentation/openvino_ir/operation_sets.md +++ b/docs/articles_en/documentation/openvino_ir/operation_sets.md @@ -6,13 +6,6 @@ :description: Learn the essentials of representing deep learning models in OpenVINO IR format and the use of supported operation sets. -.. toctree:: - :maxdepth: 1 - :hidden: - - openvino_docs_ops_opset - openvino_docs_operations_specifications - openvino_docs_ops_broadcast_rules This article provides essential information on the format used for representation of deep learning models in OpenVINO toolkit and supported operation sets. From 51a17ba6426048bf78d0738befc4e53fae410c02 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Thu, 9 Nov 2023 18:07:57 +0100 Subject: [PATCH 258/275] Workaround failing MO unit when Python API unit are skipped (#20997) --- .github/workflows/linux.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index a4b6cae73658c1..ead5a4fb77d601 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -1002,7 +1002,7 @@ jobs: # - name: Python API 1.0 Tests - if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + #if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test run: | python3 -m pytest -s ${INSTALL_TEST_DIR}/pyngraph \ --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ @@ -1010,7 +1010,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/pyngraph/tests_compatibility/test_onnx/test_backend.py - name: Python API 2.0 Tests - if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + #if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test run: | # for 'template' extension export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH From d4dd169ca30010905a78b0ef67eb39bde71134ca Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 9 Nov 2023 23:40:26 +0400 Subject: [PATCH 259/275] Added more smart CI conditions (#20999) --- .github/workflows/linux.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ead5a4fb77d601..d136f91f003b8b 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -452,6 +452,7 @@ jobs: CONFORMANCE_TOOLS_DIR: ${{ github.workspace }}/install/tests/functional_test_utils/layer_tests_summary CONFORMANCE_ARTIFACTS_DIR: ${{ github.workspace }}/install/conformance_artifacts TEST_DEVICE: 'CPU' + if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test steps: - name: Create Directories @@ -1028,6 +1029,8 @@ jobs: --junitxml=${INSTALL_TEST_DIR}/TEST-ModelOptimizer.xml - name: Python ONNX operators tests + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test || + fromJSON(needs.smart_ci.outputs.affected_components).ONNX_FE.test run: | # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time - ONNX Model Zoo tests are run separately python3 -m pytest -sv ${INSTALL_TEST_DIR}/onnx -k 'not cuda' \ @@ -1074,6 +1077,7 @@ jobs: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml - name: PyTorch Layer Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -n logical -m precommit --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml env: TEST_DEVICE: CPU From ebf1874eee05c3f4e7b0950d1e941b8c727394cd Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Fri, 10 Nov 2023 12:39:58 +0400 Subject: [PATCH 260/275] [GHA] Check OpImplCheck conformance for template plugin (to make sure that all functions are checked by conformance) (#20712) * [CONFROMANCE] Fix Template OpImplCheck on Win * Update windows.yml * check win * Update windows.yml * remove extra changes --- .github/workflows/windows.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 35274b5cccdf75..5f766740024081 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -4,14 +4,14 @@ on: schedule: # at 00:00 on workdays - cron: '0 0 * * 1,2,3,4,5' -# pull_request: -# paths-ignore: -# - '**/docs/**' -# - 'docs/**' -# - '**/**.md' -# - '**.md' -# - '**/layer_tests_summary/**' -# - '**/conformance/**' + # pull_request: + # paths-ignore: + # - '**/docs/**' + # - 'docs/**' + # - '**/**.md' + # - '**.md' + # - '**/layer_tests_summary/**' + # - '**/conformance/**' # push: # paths-ignore: # - '**/docs/**' @@ -602,13 +602,12 @@ jobs: - name: SubgraphsDumper tests shell: cmd run: | - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SubgraphsDumperTests.xml + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 --device=TEMPLATE --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SubgraphsDumperTests.xml - name: Template OpImpl tests - if: ${{ 'false' }} # Ticket: 123572 shell: cmd run: | - call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateOpImplTests.xml + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_print_time=1 --gtest_filter="*OpImpl*" --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateOpImplTests.xml - name: GNA plugin unit tests shell: cmd From 641d3ab3519f5f35b7387d553775892296ee45aa Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 10 Nov 2023 09:49:08 +0100 Subject: [PATCH 261/275] Migrate Constant operator to new API (#20760) * Migrate Constant operator to new API - refactor to reduce binary size * Fix code style * Fix build issues * Apply corrections after review: - Restore mem_size calculation for bit widths >= 8 - Remove element type helpers functions * Use float cast for floating types except f64 --- src/core/include/openvino/op/constant.hpp | 101 ++- .../openvino/reference/utils/type_util.hpp | 1 + src/core/src/op/constant.cpp | 629 +++++------------- src/core/tests/constant.cpp | 11 + 4 files changed, 243 insertions(+), 499 deletions(-) diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 6299dde459061c..ff5dacd75daf79 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -12,7 +12,6 @@ # define WAS_OV_LIBRARY_DEFINED_CONSTANT #endif -#include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/shared_buffer.hpp" #ifdef WAS_OV_LIBRARY_DEFINED_CONSTANT @@ -22,7 +21,6 @@ #include "openvino/core/coordinate_diff.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/core/type/element_type_traits.hpp" -#include "openvino/op/op.hpp" namespace ov { @@ -38,10 +36,25 @@ class OPENVINO_API Constant : public Op { Constant() = default; + OPENVINO_SUPPRESS_DEPRECATED_START /// \brief Initialize a constant from tensor /// \param tensor The tensor with data - OPENVINO_SUPPRESS_DEPRECATED_START + OPENVINO_DEPRECATED("This constructor is deprecated and will be removed in 2024.0 release") Constant(const std::shared_ptr& tensor); + + /// \brief Constructs a tensor constant with the supplied data + /// + /// \param type The element type of the tensor constant. + /// \param shape The shape of the tensor constant. + /// \param data A pointer to pre-allocated shared data. + template + OPENVINO_DEPRECATED("This constructor is deprecated and will be removed in 2024.0 release") + Constant(const element::Type& type, const Shape& shape, std::shared_ptr> data) + : m_element_type(type), + m_shape(shape) { + m_data = legacy_to_ov_aligned_buffer(data); + constructor_validate_and_infer_types(); + } OPENVINO_SUPPRESS_DEPRECATED_END /// \brief Initialize a constant from ov::Tensor @@ -172,21 +185,6 @@ class OPENVINO_API Constant : public Op { /// \param data A void* to constant data. Constant(const element::Type& type, const Shape& shape, const void* data); - OPENVINO_SUPPRESS_DEPRECATED_START - /// \brief Constructs a tensor constant with the supplied data - /// - /// \param type The element type of the tensor constant. - /// \param shape The shape of the tensor constant. - /// \param data A pointer to pre-allocated shared data. - template - OPENVINO_DEPRECATED("This constructor is deprecated and will be removed in 2024.0 release") - Constant(const element::Type& type, const Shape& shape, std::shared_ptr> data) - : m_element_type(type), - m_shape(shape) { - m_data = legacy_to_ov_aligned_buffer(data); - constructor_validate_and_infer_types(); - } - OPENVINO_SUPPRESS_DEPRECATED_END Constant(const element::Type& type, const Shape& shape, const std::shared_ptr& data) : m_element_type(type), m_shape(shape) { @@ -463,14 +461,11 @@ class OPENVINO_API Constant : public Op { // build complains for vector creation based on iterators // which point on different type than destination vector::value_type using IN_T = fundamental_type_for; - auto source_vector = get_vector(); - auto output_size = std::min(num_elements, source_vector.size()); + auto first = get_data_ptr(); + auto output_size = std::min(num_elements, shape_size(m_shape)); output_vector.reserve(output_size); - std::transform(source_vector.begin(), - source_vector.begin() + output_size, - std::back_inserter(output_vector), - [](IN_T c) { + std::transform(first, first + output_size, std::back_inserter(output_vector), [](IN_T c) { #ifdef __clang__ # pragma clang diagnostic push # ifdef __has_warning @@ -489,23 +484,22 @@ class OPENVINO_API Constant : public Op { # pragma warning(disable : 4018) # pragma warning(disable : 4804) #endif - if (!std::is_same::value) { - OPENVINO_ASSERT( - !std::numeric_limits::is_signed || std::numeric_limits::lowest() <= c, - "Cannot cast vector from ", - Type, - " constant to ", - element::from(), - ". Some values are outside the range. Example: ", - c); - OPENVINO_ASSERT(std::numeric_limits::max() >= c, - "Cannot cast vector from ", - Type, - " constant to ", - element::from(), - ". Some values are outside the range. Example: ", - c); - } + if (!std::is_same::value) { + OPENVINO_ASSERT(!std::numeric_limits::is_signed || std::numeric_limits::lowest() <= c, + "Cannot cast vector from ", + Type, + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + c); + OPENVINO_ASSERT(std::numeric_limits::max() >= c, + "Cannot cast vector from ", + Type, + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + c); + } #if defined(__clang__) # pragma clang diagnostic pop #elif defined(__GNUC__) @@ -513,8 +507,8 @@ class OPENVINO_API Constant : public Op { #elif defined(_MSC_VER) # pragma warning(pop) #endif - return static_cast(c); - }); + return static_cast(c); + }); } template ::type = true> static ov::fundamental_type_for value_in_range(const ValueT& value) { - const auto result = ov::fundamental_type_for(value); + const auto result = static_cast>(value); OPENVINO_ASSERT(0 <= result && result <= 15, "assigned value out of range u4 values"); return result; } @@ -843,18 +837,21 @@ class OPENVINO_API Constant : public Op { } size_t mem_size() const { - const bool bitwidth_less_than_byte = m_element_type.bitwidth() < 8; - if (bitwidth_less_than_byte) { - const auto size = shape_size(m_shape); - const auto bitwidth = size * m_element_type.bitwidth(); - // for rounding by `(bitwidth + 7) / 8` will work for - // `bitwidth < numeric_limits::max() - 7` - return bitwidth / 8 + (bitwidth % 8 ? 1 : 0); + constexpr size_t bits_in_byte = 8; + const auto bit_width = m_element_type.bitwidth(); + auto size = shape_size(m_shape); + if (bit_width < bits_in_byte) { + size *= bit_width; + return (size % bits_in_byte) ? (size / bits_in_byte) + 1 : (size / bits_in_byte); + } else { + return size * m_element_type.size(); } - return shape_size(m_shape) * m_element_type.size(); } + static uint8_t quantize_nf4(float x); + friend struct ValueToString; + element::Type m_element_type; Shape m_shape{}; std::shared_ptr m_data; diff --git a/src/core/reference/include/openvino/reference/utils/type_util.hpp b/src/core/reference/include/openvino/reference/utils/type_util.hpp index f1299b6cd80c38..12291761612340 100644 --- a/src/core/reference/include/openvino/reference/utils/type_util.hpp +++ b/src/core/reference/include/openvino/reference/utils/type_util.hpp @@ -7,6 +7,7 @@ #include #include "openvino/core/type/bfloat16.hpp" +#include "openvino/core/type/element_type.hpp" #include "openvino/core/type/float16.hpp" namespace ov { diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index 34e97d73eeee30..1ecee02aa65067 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -9,46 +9,69 @@ #include #include +#include "compare.hpp" +#include "element_visitor.hpp" #include "itt.hpp" #include "ngraph/runtime/aligned_buffer.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/tensor.hpp" -#include "openvino/core/type/element_type.hpp" #include "openvino/core/type/float16.hpp" #include "openvino/core/type/nf4.hpp" +#include "openvino/reference/utils/type_util.hpp" #include "openvino/runtime/shared_buffer.hpp" -template -static inline std::string to_cpp_string(T value) { - std::string rc; +namespace ov { +namespace op { + +template +TContainer convert_values_to(std::vector&& values, const Shape& shape) { + auto out = TContainer(shape_size(shape)); + std::replace_copy_if(values.begin(), values.end(), out.begin(), cmp::Less(0), 0); + return out; +} + +namespace { +template ::value>::type* = nullptr> +std::string to_cpp_string(T value) { if (std::isnan(value)) { - rc = "NAN"; + return "NAN"; } else if (std::isinf(value)) { - rc = (value > 0 ? "INFINITY" : "-INFINITY"); + return std::signbit(value) ? "-INFINITY" : "INFINITY"; } else { std::stringstream ss; ss << value; - rc = ss.str(); + return ss.str(); } - return rc; } + +std::vector from_string_vector(const std::vector& str_values) { + std::vector values; + values.reserve(str_values.size()); + std::transform(str_values.cbegin(), str_values.cend(), std::back_inserter(values), [](const std::string& s) { + size_t pos; + auto v = std::stold(s, &pos); + OPENVINO_ASSERT(s.size() == pos, "Could not parse literal '", s, "'"); + return v; + }); + return values; +} +} // namespace + +namespace v0 { OPENVINO_SUPPRESS_DEPRECATED_START -std::shared_ptr ov::op::v0::Constant::legacy_to_ov_aligned_buffer( +std::shared_ptr Constant::legacy_to_ov_aligned_buffer( const std::shared_ptr& buffer) { - return std::make_shared>>(buffer->get_ptr(), - buffer->size(), - buffer); + return std::make_shared>>(buffer->get_ptr(), + buffer->size(), + buffer); } -OPENVINO_SUPPRESS_DEPRECATED_END -OPENVINO_SUPPRESS_DEPRECATED_START -ov::op::v0::Constant::Constant(const std::shared_ptr& tensor) { +Constant::Constant(const std::shared_ptr& tensor) { m_element_type = tensor->get_element_type(); m_shape = tensor->get_shape(); // Share data from HostTensor if we work with it // And copy data in other cas if (auto hostTensor = std::dynamic_pointer_cast(tensor)) { - m_data = std::make_shared>>( + m_data = std::make_shared>>( static_cast(hostTensor->get_data_ptr()), tensor->get_size_in_bytes(), tensor); @@ -61,524 +84,233 @@ ov::op::v0::Constant::Constant(const std::shared_ptr& t } OPENVINO_SUPPRESS_DEPRECATED_END -ov::op::v0::Constant::Constant(const ov::Tensor& tensor) { - m_element_type = tensor.get_element_type(); - m_shape = tensor.get_shape(); - // Share data from ov::Tensor - m_data = std::make_shared>(static_cast(tensor.data()), - tensor.get_byte_size(), - tensor); - +Constant::Constant(const Tensor& tensor) + : m_element_type{tensor.get_element_type()}, + m_shape{tensor.get_shape()}, + m_data{ + std::make_shared>(static_cast(tensor.data()), tensor.get_byte_size(), tensor)} { constructor_validate_and_infer_types(); } -ov::op::v0::Constant::Constant(const element::Type& type, - const ov::Shape& shape, - const std::vector& values) - : Constant(false, type, shape) { - NGRAPH_SUPPRESS_DEPRECATED_START - NODE_VALIDATION_CHECK(this, - values.size() == shape_size(m_shape) || values.size() == 1, - "Did not get the expected number of literals for a constant of shape ", - m_shape, - " (got ", - values.size(), - ", expected ", - shape_size(m_shape), - "."); - - using Type_t = element::Type_t; - - if (values.size() == 1 && shape_size(m_shape) != 1) { - // broadcast single value - switch (m_element_type) { - case Type_t::boolean: - fill_data(stoi(values[0])); - break; - case Type_t::bf16: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::f16: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::f32: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::f64: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::i4: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::i8: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::i16: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::i32: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::i64: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::u1: - fill_data(stoi(values[0])); - break; - case Type_t::u4: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::u8: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::u16: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::u32: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::u64: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::nf4: - fill_data(ngraph::parse_string(values[0])); - break; - case Type_t::undefined: - OPENVINO_THROW("deserialize unsupported type undefined"); - case Type_t::dynamic: - OPENVINO_THROW("deserialize unsupported type dynamic"); - } - update_identical_flags(true, true); - } else { - switch (m_element_type) { - case Type_t::boolean: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::bf16: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::f16: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::f32: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::f64: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::i4: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::i8: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::i16: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::i32: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::i64: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::u1: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::u4: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::u8: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::u16: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::u32: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::u64: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::nf4: - write_buffer(ngraph::parse_string(values)); - break; - case Type_t::undefined: - OPENVINO_THROW("deserialize unsupported type undefined"); - case Type_t::dynamic: - OPENVINO_THROW("deserialize unsupported type dynamic"); - } - update_identical_flags(false, false); - } - NGRAPH_SUPPRESS_DEPRECATED_END +Constant::Constant(const element::Type& type, const Shape& shape, const std::vector& values) + : Constant(type, shape, from_string_vector(values)) { + const auto is_checked_and_identical = (values.size() == 1) && (shape_size(m_shape) != 1); + update_identical_flags(is_checked_and_identical, is_checked_and_identical); } -ov::op::v0::Constant::Constant(const element::Type& type, const ov::Shape& shape) : Constant(true, type, shape) {} +Constant::Constant(const element::Type& type, const Shape& shape) : Constant(true, type, shape) {} -ov::op::v0::Constant::Constant(bool memset_allocation, const element::Type& type, const ov::Shape& shape) +Constant::Constant(bool memset_allocation, const element::Type& type, const Shape& shape) : m_element_type(type), m_shape(shape) { allocate_buffer(memset_allocation); constructor_validate_and_infer_types(); } -void ov::op::v0::Constant::allocate_buffer(bool memset_allocation) { - m_data = std::make_shared(mem_size(), host_alignment()); +void Constant::allocate_buffer(bool memset_allocation) { + m_data = std::make_shared(mem_size(), host_alignment()); if (memset_allocation) { std::memset(m_data->get_ptr(), 0, m_data->size()); } } -ov::op::v0::Constant::Constant(const element::Type& type, const ov::Shape& shape, const void* data) - : Constant(false, type, shape) { - size_t size = (shape_size(m_shape) * m_element_type.bitwidth() + 7) >> 3; - std::memcpy(get_data_ptr_nc(), data, size); +Constant::Constant(const element::Type& type, const Shape& shape, const void* data) : Constant(false, type, shape) { + std::memcpy(get_data_ptr_nc(), data, mem_size()); } -ov::op::v0::Constant::Constant(const Constant& other) { - m_element_type = other.m_element_type; - m_shape = other.m_shape; - m_data = other.m_data; - update_identical_flags(other.m_all_elements_bitwise_identical_checked, other.m_all_elements_bitwise_identical); +Constant::Constant(const Constant& other) + : m_element_type{other.m_element_type}, + m_shape{other.m_shape}, + m_data{other.m_data}, + m_all_elements_bitwise_identical{other.m_all_elements_bitwise_identical.load()}, + m_all_elements_bitwise_identical_checked{other.m_all_elements_bitwise_identical_checked.load()} { constructor_validate_and_infer_types(); } -ov::op::v0::Constant::Constant(const Constant& other, const ov::Shape& new_shape) { - OPENVINO_ASSERT(shape_size(other.m_shape) == shape_size(new_shape), - "ov::Shape size " + std::to_string(shape_size(new_shape)) + " is not equal to " + - std::to_string(shape_size(other.m_shape))); - m_element_type = other.m_element_type; - m_shape = new_shape; - m_data = other.m_data; - update_identical_flags(other.m_all_elements_bitwise_identical_checked, other.m_all_elements_bitwise_identical); +Constant::Constant(const Constant& other, const Shape& new_shape) + : m_element_type{other.m_element_type}, + m_shape{new_shape}, + m_data{other.m_data}, + m_all_elements_bitwise_identical{other.m_all_elements_bitwise_identical.load()}, + m_all_elements_bitwise_identical_checked{other.m_all_elements_bitwise_identical_checked.load()} { + const auto new_size = shape_size(new_shape); + const auto other_size = shape_size(other.m_shape); + OPENVINO_ASSERT(other_size == new_size, "ov::Shape size ", new_size, " is not equal to ", other_size); constructor_validate_and_infer_types(); } -ov::op::v0::Constant::~Constant() = default; +Constant::~Constant() = default; -std::string ov::op::v0::Constant::convert_value_to_string(size_t index) const { - std::string rc; -#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) -# pragma GCC diagnostic push -# pragma GCC diagnostic error "-Wswitch" -# pragma GCC diagnostic error "-Wswitch-enum" -#endif - using Type_t = element::Type_t; - switch (get_element_type()) { - case Type_t::boolean: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::bf16: - rc = to_cpp_string(static_cast(get_element_value(index))); - break; - case Type_t::f16: - rc = to_cpp_string(static_cast(get_element_value(index))); - break; - case Type_t::f32: - rc = to_cpp_string(get_element_value(index)); - break; - case Type_t::f64: - rc = to_cpp_string(get_element_value(index)); - break; - case Type_t::i4: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::i8: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::i16: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::i32: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::i64: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::u1: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::u4: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::u8: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::u16: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::u32: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::u64: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::nf4: - rc = std::to_string(get_element_value(index)); - break; - case Type_t::undefined: - case Type_t::dynamic: - OPENVINO_THROW("unsupported type"); +struct ValueToString : ov::element::NotSupported { + using ov::element::NotSupported::visit; + + template ::type* = nullptr> + static result_type visit(const Constant* const c, const size_t index) { + return to_cpp_string(c->get_element_value(index)); } -#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) -# pragma GCC diagnostic pop -#endif - return rc; + + template >() && ET != element::f64>::type* = + nullptr> + static result_type visit(const Constant* const c, const size_t index) { + return to_cpp_string(c->get_element_value(index)); + } + + template >::value>::type* = nullptr> + static result_type visit(const Constant* const c, const size_t index) { + return std::to_string(c->get_element_value(index)); + } +}; + +std::string Constant::convert_value_to_string(size_t index) const { + using namespace ov::element; + return IfTypeOf::apply< + ValueToString>(get_element_type(), this, index); } -size_t ov::op::v0::Constant::get_byte_size() const { +size_t Constant::get_byte_size() const { return m_data->size(); } -const void* ov::op::v0::Constant::get_data_ptr() const { +const void* Constant::get_data_ptr() const { return (m_data ? m_data->get_ptr() : nullptr); } -void* ov::op::v0::Constant::get_data_ptr_nc() { +void* Constant::get_data_ptr_nc() { return (m_data ? m_data->get_ptr() : nullptr); } -std::vector ov::op::v0::Constant::get_value_strings() const { - std::vector rc; +struct ValuesToString : ov::element::NotSupported { + using ov::element::NotSupported::visit; -#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) -# pragma GCC diagnostic push -# pragma GCC diagnostic error "-Wswitch" -# pragma GCC diagnostic error "-Wswitch-enum" -#endif - switch (get_element_type()) { - case element::Type_t::boolean: - for (int value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::bf16: - for (bfloat16 value : get_vector()) { - rc.push_back(to_cpp_string(static_cast(value))); - } - break; - case element::Type_t::f16: - for (float16 value : get_vector()) { - rc.push_back(to_cpp_string(static_cast(value))); - } - break; - case element::Type_t::f32: - for (float value : get_vector()) { - rc.push_back(to_cpp_string(value)); - } - break; - case element::Type_t::f64: - for (double value : get_vector()) { - rc.push_back(to_cpp_string(value)); + template , + typename std::enable_if()>::type* = nullptr> + static result_type visit(const Constant* const c, std::vector& strs) { + for (auto&& v : c->get_vector()) { + strs.push_back(to_cpp_string(v)); } - break; - case element::Type_t::i4: - for (auto value : cast_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::i8: - for (int value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::i16: - for (int value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::i32: - for (int32_t value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::i64: - for (int64_t value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::u1: - case element::Type_t::u4: - case element::Type_t::nf4: - for (auto value : cast_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::u8: - for (uint32_t value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::u16: - for (uint32_t value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::u32: - for (uint32_t value : get_vector()) { - rc.push_back(std::to_string(value)); - } - break; - case element::Type_t::u64: - for (uint64_t value : get_vector()) { - rc.push_back(std::to_string(value)); + } + + template , + typename std::enable_if::value && !std::is_same::value>::type* = nullptr> + static result_type visit(const Constant* const c, std::vector& strs) { + for (auto&& v : c->get_vector()) { + strs.push_back(std::to_string(v)); } - break; - case element::Type_t::undefined: - case element::Type_t::dynamic: - OPENVINO_THROW("unsupported type"); } -#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) -# pragma GCC diagnostic pop -#endif - return rc; + template , int8_t>::value>::type* = nullptr> + static result_type visit(const Constant* const c, std::vector& strs) { + for (auto&& v : c->cast_vector()) { + strs.push_back(std::to_string(v)); + } + } +}; + +std::vector Constant::get_value_strings() const { + std::vector out; + using namespace ov::element; + IfTypeOf::apply< + ValuesToString>(get_element_type(), this, out); + return out; } -ov::Shape ov::op::v0::Constant::get_shape_val() const { +Shape Constant::get_shape_val() const { OPENVINO_ASSERT(m_element_type.is_integral_number()); - std::vector out_shape = cast_vector(); - ov::Shape output_shape(shape_size(m_shape)); - std::transform(out_shape.begin(), out_shape.end(), output_shape.begin(), [&](const int64_t& v) { - return (v > 0) ? v : 0; - }); - return output_shape; + return convert_values_to(cast_vector(), m_shape); } -ov::Strides ov::op::v0::Constant::get_strides_val() const { +Strides Constant::get_strides_val() const { OPENVINO_ASSERT(m_element_type == element::i64); - std::vector out_strides = cast_vector(); - Strides output_strides(shape_size(m_shape)); - std::transform(out_strides.begin(), out_strides.end(), output_strides.begin(), [&](const int64_t& v) { - return (v > 0) ? v : 0; - }); - return output_strides; + return convert_values_to(get_vector(), m_shape); } -ov::Coordinate ov::op::v0::Constant::get_coordinate_val() const { +Coordinate Constant::get_coordinate_val() const { OPENVINO_ASSERT(m_element_type == element::i64); - std::vector out_coordinate = cast_vector(); - Coordinate output_coordinate(shape_size(m_shape)); - std::transform(out_coordinate.begin(), out_coordinate.end(), output_coordinate.begin(), [&](const int64_t& v) { - return (v > 0) ? v : 0; - }); - return output_coordinate; + return convert_values_to(get_vector(), m_shape); } -ov::CoordinateDiff ov::op::v0::Constant::get_coordinate_diff_val() const { +CoordinateDiff Constant::get_coordinate_diff_val() const { OPENVINO_ASSERT(m_element_type == element::i64); - std::vector out_coordinate_diff = cast_vector(); - CoordinateDiff output_coordinate_diff(shape_size(m_shape)); - std::transform(out_coordinate_diff.begin(), - out_coordinate_diff.end(), - output_coordinate_diff.begin(), - [&](const int64_t& v) { - return (v > 0) ? v : 0; - }); - return output_coordinate_diff; -} - -ov::AxisVector ov::op::v0::Constant::get_axis_vector_val() const { + return convert_values_to(get_vector(), m_shape); +} + +AxisVector Constant::get_axis_vector_val() const { OPENVINO_ASSERT(m_element_type.is_integral_number()); - std::vector out_axis_vector = cast_vector(); - AxisVector output_axis_vector(shape_size(m_shape)); - std::transform(out_axis_vector.begin(), out_axis_vector.end(), output_axis_vector.begin(), [&](const int64_t& v) { - return (v > 0) ? v : 0; - }); - return output_axis_vector; + return convert_values_to(cast_vector(), m_shape); } -ov::AxisSet ov::op::v0::Constant::get_axis_set_val() const { +AxisSet Constant::get_axis_set_val() const { OPENVINO_ASSERT(m_element_type.is_integral_number()); - std::vector out_axis_set = cast_vector(); - AxisSet output_axis_set; - for (auto& axis : out_axis_set) { - output_axis_set.insert(axis > 0 ? axis : 0); - } - return output_axis_set; + const auto values = cast_vector(); + AxisSet out; + std::replace_copy_if(values.begin(), values.end(), std::inserter(out, out.end()), cmp::Less(0), 0); + return out; } -std::shared_ptr ov::op::v0::Constant::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Constant::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Constant_clone_with_new_inputs); check_new_args_count(this, new_args); return std::make_shared(*this); } template -static bool test_bitwise_identical(const T* data, const size_t size) { - bool data_is_constant = true; - if (size > 0) { - OPENVINO_ASSERT(data != nullptr); - const T compare = data[0]; - for (size_t i = 1; i < size; i++) { - if (data[i] != compare) { - data_is_constant = false; - break; - } - } - } - return data_is_constant; +bool test_bitwise_identical(const T* data, const size_t size) { + OPENVINO_ASSERT(size == 0 || data != nullptr); + return std::all_of(data, data + size, [&](const T value) { + return value == data[0]; + }); } -bool ov::op::v0::Constant::are_all_data_elements_bitwise_identical() const { - bool rc = false; -#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) -# pragma GCC diagnostic push -# pragma GCC diagnostic error "-Wswitch" -# pragma GCC diagnostic error "-Wswitch-enum" -#endif +bool Constant::are_all_data_elements_bitwise_identical() const { + bool all_identical; + switch (m_element_type) { case element::Type_t::boolean: case element::Type_t::i8: - case element::Type_t::u8: { - rc = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); + case element::Type_t::u8: + all_identical = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); break; - } case element::Type_t::bf16: case element::Type_t::f16: case element::Type_t::i16: - case element::Type_t::u16: { - rc = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); + case element::Type_t::u16: + all_identical = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); break; - } case element::Type_t::f32: case element::Type_t::i32: - case element::Type_t::u32: { - rc = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); + case element::Type_t::u32: + all_identical = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); break; - } case element::Type_t::f64: case element::Type_t::i64: - case element::Type_t::u64: { - rc = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); + case element::Type_t::u64: + all_identical = test_bitwise_identical(get_data_ptr(), shape_size(m_shape)); break; - } - case element::Type_t::i4: - case element::Type_t::u1: - case element::Type_t::u4: - case element::Type_t::nf4: - case element::Type_t::undefined: - case element::Type_t::dynamic: + default: + all_identical = false; break; } -#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8) -# pragma GCC diagnostic pop -#endif - return rc; + return all_identical; } -void ov::op::v0::Constant::update_identical_flags(bool is_checked, bool identical_value) const { +void Constant::update_identical_flags(bool is_checked, bool identical_value) const { m_all_elements_bitwise_identical_checked = is_checked; m_all_elements_bitwise_identical = identical_value; } -bool ov::op::v0::Constant::visit_attributes(AttributeVisitor& visitor) { +bool Constant::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_Constant_visit_attributes); - ov::Shape prev_shape = m_shape; - element::Type prev_type = m_element_type; + const auto prev_shape = m_shape; + const auto prev_type = m_element_type; visitor.on_attribute("element_type", m_element_type); visitor.on_attribute("shape", m_shape); - bool need_to_reallocate = (m_shape != prev_shape || prev_type != m_element_type); + const auto need_to_reallocate = (m_shape != prev_shape) || (prev_type != m_element_type); if (m_alloc_buffer_on_visit_attributes && need_to_reallocate) { // Filling in a fresh constant allocate_buffer(false); @@ -588,28 +320,31 @@ bool ov::op::v0::Constant::visit_attributes(AttributeVisitor& visitor) { return true; } -bool ov::op::v0::Constant::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { +bool Constant::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v0_Constant_evaluate); if (outputs.empty()) - outputs.emplace_back(ov::Tensor(m_element_type, m_shape)); + outputs.emplace_back(m_element_type, m_shape); else outputs[0].set_shape(m_shape); std::memcpy(outputs[0].data(), get_data_ptr(), outputs[0].get_byte_size()); return true; } -bool ov::op::v0::Constant::has_evaluate() const { +bool Constant::has_evaluate() const { OV_OP_SCOPE(v0_Constant_has_evaluate); return true; } -bool ov::op::v0::Constant::evaluate_lower(TensorVector& outputs) const { +bool Constant::evaluate_lower(TensorVector& outputs) const { return evaluate(outputs, {}); } -bool ov::op::v0::Constant::evaluate_upper(TensorVector& outputs) const { +bool Constant::evaluate_upper(TensorVector& outputs) const { return evaluate(outputs, {}); } -uint8_t ov::op::v0::Constant::quantize_nf4(float x) { - return ov::ConvertNF4::quantize(x); +uint8_t Constant::quantize_nf4(float x) { + return ConvertNF4::quantize(x); } +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/tests/constant.cpp b/src/core/tests/constant.cpp index 010bb83b3e75d9..b6cffdc31f1799 100644 --- a/src/core/tests/constant.cpp +++ b/src/core/tests/constant.cpp @@ -1819,3 +1819,14 @@ TEST(constant, cast_vector) { << "Constant::cast_vector failed empty casting for type " << type; } } + +TEST(constant, get_values_as) { + ov::op::v0::Constant c(element::i64, Shape{6}, std::vector{2, -3, 1, 0, 1, 5}); + + EXPECT_EQ(c.get_shape_val(), Shape({2, 0, 1, 0, 1, 5})); + EXPECT_EQ(c.get_strides_val(), Strides({2, 0, 1, 0, 1, 5})); + EXPECT_EQ(c.get_coordinate_val(), Coordinate({2, 0, 1, 0, 1, 5})); + EXPECT_EQ(c.get_coordinate_diff_val(), CoordinateDiff({2, 0, 1, 0, 1, 5})); + EXPECT_EQ(c.get_axis_vector_val(), AxisVector({2, 0, 1, 0, 1, 5})); + EXPECT_EQ(c.get_axis_set_val(), AxisSet({0, 1, 2, 5})); +} From e8d28f7f6d8c00dff3434147965fca921c4161de Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Fri, 10 Nov 2023 12:10:36 +0100 Subject: [PATCH 262/275] Make conformance wait for Smart CI step (#21014) --- .github/workflows/linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index d136f91f003b8b..7a741a755380a7 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -432,7 +432,7 @@ jobs: if-no-files-found: 'warn' Conformance: - needs: Build + needs: [Build, Smart_CI] timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 15 }} defaults: run: From b64d6be8baa4b9381a99ddffafab6c648b9ff396 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Fri, 10 Nov 2023 15:44:28 +0400 Subject: [PATCH 263/275] [TF FE] Fix TF1 OD SSD PPN model conversion (#20994) * [TF FE] Fix TF1 SSD PPN model conversion It contains a case when one Merge node eliminated different conditional flows. Signed-off-by: Kazantsev, Roman * Add layer test Signed-off-by: Kazantsev, Roman --------- Signed-off-by: Kazantsev, Roman --- .../transformations/switch_merge_resolve.cpp | 20 +++--- .../tensorflow_tests/test_tf_SwitchMerge.py | 65 +++++++++++++++++++ 2 files changed, 77 insertions(+), 8 deletions(-) create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_SwitchMerge.py diff --git a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp index 35c2cd1b7f23e1..07e5ac31ee5268 100644 --- a/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp +++ b/src/frontends/tensorflow/src/transformations/switch_merge_resolve.cpp @@ -55,12 +55,14 @@ void generate_if_clusters(const shared_ptr& ov_model, continue; } auto eliminated_markers = merge_node->get_eliminated_cond_flow_marker(); - if (eliminated_markers.size() != 1) { - continue; - } - auto eliminated_marker = eliminated_markers[0]; - auto switch_nodes = merge_node->get_switch_nodes_set_by_cond_index(eliminated_marker); + // combine all Switch nodes for which conditional flow is resolved + // by the current Merge node + SetOfSwitchNodes switch_nodes; + for (const auto& eliminated_marker : eliminated_markers) { + auto curr_switch_nodes = merge_node->get_switch_nodes_set_by_cond_index(eliminated_marker); + switch_nodes.insert(curr_switch_nodes.begin(), curr_switch_nodes.end()); + } // insert into clusters ClusterType combined_cluster = {switch_nodes, {merge_node}}; @@ -121,11 +123,13 @@ void insert_result_before_merge(const shared_ptr& merge_node, "[TensorFlow Frontend] internal error: Merge node " + merge_node_name + " does not have conditional flow marker"); - // get eliminated marker and check that it is the single one + // get eliminated marker and check that eliminated marker exists + // Merge node may contain several eliminated markers, in this case it means some Switch nodes have different + // condition nodes and values generated by this condition nodes are identical auto merge_cf_marker = get_cf_marker(merge_node); - FRONT_END_GENERAL_CHECK(merge_cf_marker.merge_eliminated_markers.size() == 1, + FRONT_END_GENERAL_CHECK(merge_cf_marker.merge_eliminated_markers.size() > 0, "[TensorFlow Frontend] internal error: Merge node " + merge_node_name + - " does not contain the single eliminated marker"); + " does not contain any eliminated marker"); auto eliminated_marker = merge_cf_marker.merge_eliminated_markers.begin()->first; // check that producer contains the same conditional flow marker diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SwitchMerge.py b/tests/layer_tests/tensorflow_tests/test_tf_SwitchMerge.py new file mode 100644 index 00000000000000..8acc25c3a608e1 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_SwitchMerge.py @@ -0,0 +1,65 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestSwitchMerge(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + x_shape = inputs_info['x'] + inputs_data = {} + rng = np.random.default_rng() + inputs_data['x'] = rng.integers(-10, 10, x_shape).astype(self.x_type) + return inputs_data + + def merge_eliminating_several_cond_flows_net(self, x_shape, x_type, cond_value): + self.x_type = x_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(x_type, x_shape, 'x') + cond = tf.constant(cond_value, dtype=tf.bool) + switch_false, switch_true = tf.raw_ops.Switch(data=x, pred=cond) + + cond2 = tf.constant(cond_value, dtype=tf.bool) + switch2_false, switch2_true = tf.raw_ops.Switch(data=cond2, pred=cond2) + with tf.control_dependencies([switch2_true]): + const_sub = tf.constant(5, dtype=x_type) + with tf.control_dependencies([switch2_false]): + const_add = tf.constant(2, dtype=x_type) + + add = tf.raw_ops.AddV2(x=switch_false, y=const_add) + sub = tf.raw_ops.Sub(x=switch_true, y=const_sub) + merge = tf.raw_ops.Merge(inputs=[add, sub]) + const_main = tf.constant(1, dtype=x_type) + tf.raw_ops.AddV2(x=merge[0], y=const_main) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(x_shape=[]), + dict(x_shape=[2]), + dict(x_shape=[4, 3]), + ] + + @pytest.mark.parametrize("cond_value", [ + True, False + ]) + @pytest.mark.parametrize("x_type", [ + np.float32, np.int32 + ]) + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_merge_eliminating_several_cond_flows(self, params, cond_value, x_type, ie_device, precision, ir_version, + temp_dir, + use_new_frontend, use_old_api): + self._test(*self.merge_eliminating_several_cond_flows_net(**params, cond_value=cond_value, x_type=x_type), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) From e446dac72705d247439e3eb1ac78381a031e4607 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Fri, 10 Nov 2023 16:28:09 +0100 Subject: [PATCH 264/275] [DOCS] Add content to about ov page (#21010) * add about ov info * fix format * fill ov page --- docs/articles_en/about_openvino.md | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/docs/articles_en/about_openvino.md b/docs/articles_en/about_openvino.md index 37545223e8b6f3..6ff9127557d915 100644 --- a/docs/articles_en/about_openvino.md +++ b/docs/articles_en/about_openvino.md @@ -17,10 +17,61 @@ OpenVINO is a toolkit for simple and efficient deployment of various deep learni In this section you will find information on the product itself, as well as the software and hardware solutions it supports. +OpenVINO (Open Visual Inference and Neural network Optimization) is an open-source software toolkit designed to optimize, accelerate, and deploy deep learning models for user applications. OpenVINO was developed by Intel to work efficiently on a wide range of Intel hardware platforms, including CPUs (x86 and Arm), GPUs, and NPUs. +Features +############################################################## +One of the main purposes of OpenVINO is to streamline the deployment of deep learning models in user applications. It optimizes and accelerates model inference, which is crucial for such domains as Generative AI, Large Language models, and use cases like object detection, classification, segmentation, and many others. +* :doc:`Model Optimization ` +OpenVINO provides multiple optimization methods for both the training and post-training stages, including weight compression for Large Language models and Intel Optimum integration with Hugging Face. + +* :doc:`Model Conversion and Framework Compatibility ` + +Supported models can be loaded directly or converted to the OpenVINO format to achieve better performance. Supported frameworks include ONNX, PyTorch, TensorFlow, TensorFlow Lite, Keras, and PaddlePaddle. + +* :doc:`Model Inference ` + +OpenVINO accelerates deep learning models on various hardware platforms, ensuring real-time, efficient inference. + +* `Deployment on a server `__ + +A model can be deployed either locally using OpenVINO Runtime or on a model server. Runtime is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions. The model server enables quick model inference using external resources. + +Architecture +############################################################## + +To learn more about how OpenVINO works, read the Developer documentation on its `architecture `__ and `core components `__. + +OpenVINO Ecosystem +############################################################## + +Along with the primary components of model optimization and runtime, the toolkit also includes: + +* `Neural Network Compression Framework (NNCF) `__ - a tool for enhanced OpenVINO™ inference to get performance boost with minimal accuracy drop. +* :doc:`Openvino Notebooks `- Jupyter Python notebook tutorials, which demonstrate key features of the toolkit. +* `OpenVINO Model Server `__ - a server that enables scalability via a serving microservice. +* :doc:`OpenVINO Training Extensions ` – a convenient environment to train Deep Learning models and convert them using the OpenVINO™ toolkit for optimized inference. +* :doc:`Dataset Management Framework (Datumaro) ` - a tool to build, transform, and analyze datasets. + +Community +############################################################## + +OpenVINO community plays a vital role in the growth and development of the open-sourced toolkit. Users can contribute to OpenVINO and get support using the following channels: + +* `OpenVINO GitHub issues, discussions and pull requests `__ +* `OpenVINO Blog `__ +* `Community Forum `__ +* `OpenVINO video tutorials `__ +* `Support Information `__ + +Case Studies +############################################################## + +OpenVINO has been employed in various case studies across a wide range of industries and applications, including healthcare, retail, safety and security, transportation, and more. Read about how OpenVINO enhances efficiency, accuracy, and safety in different sectors on the `success stories page `__. @endsphinxdirective + From c08e01d6d71490b5a2010caad9c78fb7b57c2044 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 10 Nov 2023 18:00:04 +0100 Subject: [PATCH 265/275] [PT FE] Optimize reverseprop in pytorch frontend (#20989) * [PT FE] Optimize reverseprop in pytorch frontend * Add transformation * Improve readability --------- Co-authored-by: Alina Kladieva --- src/frontends/pytorch/src/frontend.cpp | 2 + .../pytorch/src/helper_ops/gather_assign.hpp | 40 ++++++ .../pytorch/src/helper_ops/internal_op.hpp | 7 ++ .../pytorch/src/helper_ops/slice_assign.hpp | 64 ++++++++++ src/frontends/pytorch/src/node_context.cpp | 12 +- .../src/transforms/reverseprop_resolver.cpp | 119 ++++++++++++++++++ .../src/transforms/reverseprop_resolver.hpp | 27 ++++ .../pytorch/src/translate_session.cpp | 101 +++++---------- .../pytorch/src/translate_session.hpp | 8 +- 9 files changed, 297 insertions(+), 83 deletions(-) create mode 100644 src/frontends/pytorch/src/helper_ops/gather_assign.hpp create mode 100644 src/frontends/pytorch/src/helper_ops/slice_assign.hpp create mode 100644 src/frontends/pytorch/src/transforms/reverseprop_resolver.cpp create mode 100644 src/frontends/pytorch/src/transforms/reverseprop_resolver.hpp diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 36d4027dcc426f..1f021dfba441f5 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -38,6 +38,7 @@ #include "transforms/prim_list_unpack_replacer.hpp" #include "transforms/prim_tuple_unpack_parameter_replacer.hpp" #include "transforms/quantized_node_remover.hpp" +#include "transforms/reverseprop_resolver.hpp" #include "transforms/rfftn_complex_replacer.hpp" #include "transforms/softmax_reshape_elimination.hpp" #include "transforms/string_equality_replacer.hpp" @@ -204,6 +205,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); // Second pass of AlignTypesRemoval after all converting transformations diff --git a/src/frontends/pytorch/src/helper_ops/gather_assign.hpp b/src/frontends/pytorch/src/helper_ops/gather_assign.hpp new file mode 100644 index 00000000000000..eadc9dfda7ecdf --- /dev/null +++ b/src/frontends/pytorch/src/helper_ops/gather_assign.hpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "helper_ops/internal_op.hpp" +#include "openvino/frontend/decoder.hpp" +#include "openvino/op/op.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { + +class GatherAssign : public InternalReverseOperation { +public: + OPENVINO_OP("GatherAssign", "internal", InternalReverseOperation); + + GatherAssign(const Output& data, + const Output& updates, + const Output& indices, + const Output& axis) + : InternalReverseOperation({data, updates, indices, axis}) { + validate_and_infer_types(); + } + + void validate_and_infer_types() override { + auto data = input_value(0); + set_output_type(0, data.get_element_type(), data.get_partial_shape()); + } + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); + } +}; +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/helper_ops/internal_op.hpp b/src/frontends/pytorch/src/helper_ops/internal_op.hpp index 510654dce8620a..8d9cc9c727083c 100644 --- a/src/frontends/pytorch/src/helper_ops/internal_op.hpp +++ b/src/frontends/pytorch/src/helper_ops/internal_op.hpp @@ -8,6 +8,7 @@ #include #include "openvino/frontend/decoder.hpp" +#include "openvino/op/op.hpp" #include "pt_framework_node.hpp" #include "utils.hpp" @@ -51,6 +52,12 @@ class InternalOperation : public PtFrameworkNode { set_attrs(attrs); } }; + +class InternalReverseOperation : public ov::op::Op { +public: + OPENVINO_OP("InternalReverseOperation", "internal"); + InternalReverseOperation(const OutputVector& inputs) : ov::op::Op(inputs) {} +}; } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/pytorch/src/helper_ops/slice_assign.hpp b/src/frontends/pytorch/src/helper_ops/slice_assign.hpp new file mode 100644 index 00000000000000..238cf7aefee7aa --- /dev/null +++ b/src/frontends/pytorch/src/helper_ops/slice_assign.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "helper_ops/internal_op.hpp" +#include "openvino/frontend/decoder.hpp" +#include "openvino/op/op.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { + +class SliceAssign : public InternalReverseOperation { +public: + OPENVINO_OP("SliceAssign", "internal", InternalReverseOperation); + + SliceAssign(const Output& data, + const Output& updates, + const Output& start, + const Output& stop, + const Output& step) + : InternalReverseOperation({data, updates, start, stop, step}) { + validate_and_infer_types(); + } + + SliceAssign(const Output& data, + const Output& updates, + const Output& start, + const Output& stop, + const Output& step, + const Output& axes) + : InternalReverseOperation({data, updates, start, stop, step, axes}) { + validate_and_infer_types(); + } + + void validate_and_infer_types() override { + auto data = input_value(0); + set_output_type(0, data.get_element_type(), data.get_partial_shape()); + } + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + if (new_args.size() == 5) { + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4)); + } else { + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4), + new_args.at(5)); + } + } +}; +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp index 5d8a138a52f1ef..4c6bf2e9e5080f 100644 --- a/src/frontends/pytorch/src/node_context.cpp +++ b/src/frontends/pytorch/src/node_context.cpp @@ -65,25 +65,25 @@ void NodeContext::mutate_input(size_t index, Output ov_output) const { auto back_input_id = input_id; auto back_node_input = ov_output; while (m_translate_session->m_may_be_alias.count(back_input_id)) { - // Create node to backprop data. While loop is needed for the cases when alias to tensor point to another alias - // to tensor. In that case we need to create a chain of backprop ops + // Create node to reverseprop data. While loop is needed for the cases when alias to tensor point to another + // alias to tensor. In that case we need to create a chain of reverseprop ops size_t in_tensor; std::shared_ptr node; Output node_converted_output; std::tie(in_tensor, node, node_converted_output) = m_translate_session->m_may_be_alias.at(back_input_id); - auto backprop_node = m_translate_session->get_backprop_op(node, node_converted_output, back_node_input); + auto reverseprop_node = m_translate_session->get_reverseprop_op(node, node_converted_output, back_node_input); if (m_tensor_map->count(in_tensor)) { // Tensor is not found in the scope of this body, need to get it from internal context and mark mutated OPENVINO_DEBUG << "Couldn't find in the current body the initial aliased tensor: " << in_tensor << " for operation: " << node->get_op_type() << " creating new body input."; get_tensor_from_model_or_create_input(in_tensor); } - m_translate_session->encode_tensor_name(backprop_node, in_tensor); - (*m_tensor_map)[in_tensor] = backprop_node; + m_translate_session->encode_tensor_name(reverseprop_node, in_tensor); + (*m_tensor_map)[in_tensor] = reverseprop_node; m_mutated_tensors->insert(in_tensor); OPENVINO_DEBUG << "Propagated back data from tensor: " << back_input_id << " to tensor: " << in_tensor << ".\n"; back_input_id = in_tensor; - back_node_input = backprop_node; + back_node_input = reverseprop_node; } } diff --git a/src/frontends/pytorch/src/transforms/reverseprop_resolver.cpp b/src/frontends/pytorch/src/transforms/reverseprop_resolver.cpp new file mode 100644 index 00000000000000..4bdc28b07f2fc7 --- /dev/null +++ b/src/frontends/pytorch/src/transforms/reverseprop_resolver.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reverseprop_resolver.hpp" + +#include +#include + +#include "helper_ops/gather_assign.hpp" +#include "helper_ops/internal_op.hpp" +#include "helper_ops/slice_assign.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reduce_prod.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_nd_update.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +using namespace ov::pass; +using namespace ov::op; + +ReversepropResolver::ReversepropResolver() { + auto reverse_op = pattern::wrap_type(); + + ov::matcher_pass_callback callback = [](pattern::Matcher& m) { + auto base_op = m.get_match_root(); + // Apply this transformation only to starting reverse operation + if (ov::as_type_ptr(base_op->get_input_node_shared_ptr(1))) + return false; + + auto curr_op = base_op; + std::vector> rev_ops; + while (ov::as_type_ptr(curr_op)) { + rev_ops.push_back(curr_op); + auto target_inputs = curr_op->get_output_target_inputs(0); + if (target_inputs.size() != 1) + break; + curr_op = target_inputs.begin()->get_node()->shared_from_this(); + } + if (rev_ops.size() < 1) + return false; + + ov::pass::NodeRegistry rg; + auto zero = v0::Constant::create(element::i64, Shape{}, {0}); + auto one = v0::Constant::create(element::i64, Shape{}, {1}); + auto neg_one_1d = v0::Constant::create(element::i64, Shape{1}, {-1}); + auto scattering_shape = v0::Constant::create(element::i64, Shape{2}, {-1, 1}); + + // Get 1d indices [0..numel) for whole input tensor + auto start_op = rev_ops.back(); + auto data_to_insert_into = start_op->input_value(0); + auto input_shape = rg.make(data_to_insert_into, element::i64); + auto numel = rg.make(input_shape, zero, false); + auto full_data_indices_1d = rg.make(zero, numel, one, element::i64); + auto full_data_indices = rg.make(full_data_indices_1d, input_shape, false); + + // cut indices in accordance with operations + Output data_indices = full_data_indices; + for (auto it = rev_ops.rbegin(); it != rev_ops.rend(); ++it) { + curr_op = *it; + if (ov::as_type_ptr(curr_op)) { + if (curr_op->get_input_size() == 6) { + data_indices = rg.make(data_indices, + curr_op->input_value(2), + curr_op->input_value(3), + curr_op->input_value(4), + curr_op->input_value(5)); + } else if (curr_op->get_input_size() == 5) { + data_indices = rg.make(data_indices, + curr_op->input_value(2), + curr_op->input_value(3), + curr_op->input_value(4)); + } else { + return false; + } + } else if (ov::as_type_ptr(curr_op)) { + data_indices = rg.make(data_indices, curr_op->input_value(2), curr_op->input_value(3)); + } else { + return false; + } + } + + // Scatter in flattened tensor with indices and flattened data to be inserted + auto data_to_insert_into_1d = rg.make(data_to_insert_into, neg_one_1d, false); + auto data_indices_1d = rg.make(data_indices, scattering_shape, false); + auto to_be_inserted_data_1d = rg.make(base_op->input_value(1), neg_one_1d, false); + auto updated_data_1d = + rg.make(data_to_insert_into_1d, data_indices_1d, to_be_inserted_data_1d); + + // Reshape to initial shape + auto res_node = rg.make(updated_data_1d, input_shape, false); + copy_runtime_info_and_name(base_op, rg.get()); + start_op->output(0).replace(res_node); + + return true; + }; + + auto m = + std::make_shared(reverse_op, "ov::frontend::pytorch::pass::ReversepropResolver"); + this->register_matcher(m, callback); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/reverseprop_resolver.hpp b/src/frontends/pytorch/src/transforms/reverseprop_resolver.hpp new file mode 100644 index 00000000000000..d07162889c7b9e --- /dev/null +++ b/src/frontends/pytorch/src/transforms/reverseprop_resolver.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +/** + * Replace sequence of reverseprop operations with ScatterNdUpdate. + */ +class ReversepropResolver : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::ReversepropResolver"); + ReversepropResolver(); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index f08a7d08c7a36a..16090fe0e42931 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -4,18 +4,11 @@ #include "translate_session.hpp" +#include "helper_ops/gather_assign.hpp" +#include "helper_ops/slice_assign.hpp" #include "input_model.hpp" -#include "openvino/op/constant.hpp" #include "openvino/op/gather.hpp" -#include "openvino/op/parameter.hpp" -#include "openvino/op/range.hpp" -#include "openvino/op/reduce_prod.hpp" -#include "openvino/op/reshape.hpp" -#include "openvino/op/result.hpp" -#include "openvino/op/scatter_nd_update.hpp" -#include "openvino/op/shape_of.hpp" #include "openvino/op/slice.hpp" -#include "openvino/op/transpose.hpp" #include "openvino/util/log.hpp" #include "place.hpp" #include "pt_framework_node.hpp" @@ -344,92 +337,54 @@ size_t TranslateSession::decode_tensor_name(const Output& output) { } namespace { -Output slice_backprop(const Output& slice_output, const Output& value) { +Output slice_reverseprop(const Output& slice_output, const Output& value) { auto slice_node = slice_output.get_node_shared_ptr(); FRONT_END_OP_CONVERSION_CHECK(ov::as_type_ptr(slice_node), "Conversion rule for aten::slice doesn't contain Slice node."); - auto zero = v0::Constant::create(element::i64, Shape{}, {0}); - auto one = v0::Constant::create(element::i64, Shape{}, {1}); - auto neg_one_1d = v0::Constant::create(element::i64, Shape{1}, {-1}); - auto scattering_shape = v0::Constant::create(element::i64, Shape{2}, {-1, 1}); - - // Get 1d indices [0..numel) auto to_insert_data = slice_node->input_value(0); - auto input_shape = std::make_shared(to_insert_data, element::i64); - auto numel = std::make_shared(input_shape, zero, false); - auto full_data_indices_1d = std::make_shared(zero, numel, one, element::i64); - - // Slice indices by same start, stop, slice, axes as initial Slice - auto full_data_indices = std::make_shared(full_data_indices_1d, input_shape, false); - Output data_indices; + Output res; if (slice_node->get_input_size() == 5) { - data_indices = std::make_shared(full_data_indices, - slice_node->input_value(1), - slice_node->input_value(2), - slice_node->input_value(3), - slice_node->input_value(4)); + res = std::make_shared(to_insert_data, + value, + slice_node->input_value(1), + slice_node->input_value(2), + slice_node->input_value(3), + slice_node->input_value(4)); } else if (slice_node->get_input_size() == 4) { - data_indices = std::make_shared(full_data_indices, - slice_node->input_value(1), - slice_node->input_value(2), - slice_node->input_value(3)); + res = std::make_shared(to_insert_data, + value, + slice_node->input_value(1), + slice_node->input_value(2), + slice_node->input_value(3)); } else { FRONT_END_OP_CONVERSION_CHECK(false, "Incorrect number of Slice inputs"); } - // Scatter in flattened tensor with indices and flattened data to be inserted - auto to_insert_data_1d = std::make_shared(to_insert_data, neg_one_1d, false); - auto data_indices_1d = std::make_shared(data_indices, scattering_shape, false); - auto to_be_inserted_data_1d = std::make_shared(value, neg_one_1d, false); - auto updated_data_1d = - std::make_shared(to_insert_data_1d, data_indices_1d, to_be_inserted_data_1d); - - // Reshape to initial shape - return std::make_shared(updated_data_1d, input_shape, false); + return res; } -Output select_backprop(const Output& select_output, const Output& value) { +Output select_reverseprop(const Output& select_output, const Output& value) { auto gather_node = select_output.get_node_shared_ptr(); FRONT_END_OP_CONVERSION_CHECK(ov::as_type_ptr(gather_node), "Conversion rule for aten::select doesn't contain Gather node."); - auto zero = v0::Constant::create(element::i64, Shape{}, {0}); - auto one = v0::Constant::create(element::i64, Shape{}, {1}); - auto neg_one_1d = v0::Constant::create(element::i64, Shape{1}, {-1}); - auto scattering_shape = v0::Constant::create(element::i64, Shape{2}, {-1, 1}); - - // Get 1d indices [0..numel) auto to_insert_data = gather_node->input_value(0); - auto input_shape = std::make_shared(to_insert_data, element::i64); - auto numel = std::make_shared(input_shape, zero, false); - auto full_data_indices_1d = std::make_shared(zero, numel, one, element::i64); - - // Slice indices by same start, stop, slice, axes as initial Slice - auto full_data_indices = std::make_shared(full_data_indices_1d, input_shape, false); - Output data_indices = - std::make_shared(full_data_indices, gather_node->input_value(1), gather_node->input_value(2)); - - // Scatter in flattened tensor with indices and flattened data to be inserted - auto to_insert_data_1d = std::make_shared(to_insert_data, neg_one_1d, false); - auto data_indices_1d = std::make_shared(data_indices, scattering_shape, false); - auto to_be_inserted_data_1d = std::make_shared(value, neg_one_1d, false); - auto updated_data_1d = - std::make_shared(to_insert_data_1d, data_indices_1d, to_be_inserted_data_1d); - - // Reshape to initial shape - return std::make_shared(updated_data_1d, input_shape, false); + return std::make_shared(to_insert_data, + value, + gather_node->input_value(1), + gather_node->input_value(2)); } } // namespace -using BackpropCreatorFunction = std::function(const Output&, const Output&)>; +using ReversepropCreatorFunction = std::function(const Output&, const Output&)>; -Output TranslateSession::get_backprop_op(const std::shared_ptr& node, - const Output& direct_op_output, - const Output& value) { - std::map backprop_map = { - {"aten::slice", slice_backprop}, - {"aten::select", select_backprop}, +Output TranslateSession::get_reverseprop_op(const std::shared_ptr& node, + const Output& direct_op_output, + const Output& value) { + std::map backprop_map = { + {"aten::slice", slice_reverseprop}, + {"aten::select", select_reverseprop}, }; Output backprop_node; diff --git a/src/frontends/pytorch/src/translate_session.hpp b/src/frontends/pytorch/src/translate_session.hpp index 44ce6232caaa00..de65d1c4ed9eae 100644 --- a/src/frontends/pytorch/src/translate_session.hpp +++ b/src/frontends/pytorch/src/translate_session.hpp @@ -34,10 +34,10 @@ class TranslateSession { const TensorMap& external_tensor_map = {}, const std::shared_ptr& input_model = nullptr); - /// \brief Returns backprop operations for direct operation - Output get_backprop_op(const std::shared_ptr& node, - const Output& direct_op_output, - const Output& value); + /// \brief Returns reverseprop operations for direct operation + Output get_reverseprop_op(const std::shared_ptr& node, + const Output& direct_op_output, + const Output& value); /// \brief Writes pytorch tensor index into openvino tensor void encode_tensor_name(Output tensor_desc, From 51da30b48ddd3bd82dd51ecf58ae6fe6f9a81ad4 Mon Sep 17 00:00:00 2001 From: Paul Youngsoo Ahn Date: Sat, 11 Nov 2023 17:06:08 +0900 Subject: [PATCH 266/275] Support LSTMSequence with -1 sequence length (#20935) * [GPU] Support LSTMSequence w/ -1 seq_length Co-authored-by:Taylor Yeonbok Lee Co-authored-by:Andrew Park * Fix GetInputInfo to retrieve input pid from LSTMCell * LSTMCell use ov::PartialShape instead of cldnn::tensor * implement lstm_elt_inst::calc_output_layouts * implement lstm_elt_impl::static_canonicalize_shapes * Add functional tests * Fix unit test failure --------- Co-authored-by: Andrew Park --- .../src/graph/impls/ocl/lstm_elt.cpp | 46 +++ .../src/graph/include/lstm_elt_inst.h | 2 + src/plugins/intel_gpu/src/graph/lstm_elt.cpp | 19 ++ src/plugins/intel_gpu/src/plugin/ops/rnn.cpp | 172 ++++++---- .../intel_gpu/src/plugin/program_builder.cpp | 8 +- .../subgraph_tests/tensor_iterator.cpp | 322 ++++++++++++++++++ 6 files changed, 497 insertions(+), 72 deletions(-) create mode 100644 src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp index faa210c545636b..0bccdd999b2889 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_elt.cpp @@ -71,6 +71,52 @@ struct lstm_elt_impl : typed_primitive_impl_ocl { return {params, optional_params}; } + + static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) { + if (impl_params.get_input_layout().get_partial_shape().size() != 2) { + return primitive_impl::static_canonicalize_shapes(impl_params); + } + auto updated_impl_params = canonicalize_fused_shapes(impl_params); + + auto& input_layout = updated_impl_params.input_layouts[0]; + auto& weights_layout = updated_impl_params.input_layouts[1]; + auto& output_layout = updated_impl_params.output_layouts[0]; + + auto input_pshape = input_layout.get_partial_shape(); + auto weights_pshape = weights_layout.get_partial_shape(); + auto output_pshape = output_layout.get_partial_shape(); + + auto lstm_input_size = static_cast(input_pshape[1].get_length()); + auto lstm_batch_size = static_cast(input_pshape[0].get_length()); + auto lstm_hidden_size = static_cast(lstm_input_size / 4); + + GPU_DEBUG_LOG << "lstm_input_size : " << lstm_input_size << std::endl; + GPU_DEBUG_LOG << "lstm_batch_size : " << lstm_batch_size << std::endl; + GPU_DEBUG_LOG << "lstm_hidden_size : " << lstm_hidden_size << std::endl; + + GPU_DEBUG_LOG << "origin input_pshape : " << input_layout.to_short_string() << std::endl; + GPU_DEBUG_LOG << "origin weights_layout : " << weights_layout.to_short_string() << std::endl; + + input_pshape = {lstm_batch_size, 1, 1, lstm_input_size}; + input_layout.set_partial_shape(input_pshape); + + weights_pshape = {lstm_batch_size, 1, 1, lstm_hidden_size}; // {batch, direction, 1, hidden_size} + weights_layout.format = format::adjust_to_rank(weights_layout.format, weights_pshape.size()); + weights_layout.set_partial_shape(weights_pshape); + + updated_impl_params.weights_layout = weights_layout; + + GPU_DEBUG_LOG << "input_layout : " << input_layout.to_short_string() << std::endl; + GPU_DEBUG_LOG << "weights_layout : " << weights_layout.to_short_string() << std::endl; + GPU_DEBUG_LOG << "output_layout : " << output_layout.to_short_string() << std::endl; + + OPENVINO_ASSERT(input_pshape.size() == 4 && weights_pshape.size() == 4, "input and weights shape should be rank 4"); + return updated_impl_params; + } + + kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override { + return static_canonicalize_shapes(impl_params); + } }; namespace detail { diff --git a/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h b/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h index 640c6259b6f864..07b75fafed5e9f 100644 --- a/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/lstm_elt_inst.h @@ -38,6 +38,8 @@ class typed_primitive_inst : public typed_primitive_inst_base + static std::vector calc_output_layouts(lstm_elt_node const& node, kernel_impl_params const& impl_param); static layout calc_output_layout(lstm_elt_node const& node, kernel_impl_params const& impl_param); static std::string to_string(lstm_elt_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/lstm_elt.cpp b/src/plugins/intel_gpu/src/graph/lstm_elt.cpp index 1831a3fd986796..32d6a52ab4e0b2 100644 --- a/src/plugins/intel_gpu/src/graph/lstm_elt.cpp +++ b/src/plugins/intel_gpu/src/graph/lstm_elt.cpp @@ -27,6 +27,25 @@ layout lstm_elt_inst::calc_output_layout(lstm_elt_node const& node, kernel_impl_ return result; } +template +std::vector lstm_elt_inst::calc_output_layouts(lstm_elt_node const& node, kernel_impl_params const& impl_param) { + std::vector output_layouts; + + // input partial shape [batch, input_size (= hidden_size * 4)] + auto input_layout = impl_param.get_input_layout(); + auto input_pshape = input_layout.get_partial_shape(); + OPENVINO_ASSERT(static_cast(impl_param.desc->output_data_types[0]) == false, "Output data type forcing is not supported for lstm_elt_node!"); + OPENVINO_ASSERT(input_pshape.rank().get_length() == 2, "input_layout rank should be 2 on dynamic shape."); + + auto lstm_input_size = static_cast(input_pshape[1].get_length()); + auto lstm_batch_size = static_cast(input_pshape[0].get_length()); + auto lstm_hidden_size = static_cast(lstm_input_size / 4); + + return {cldnn::layout{ov::PartialShape{lstm_batch_size, 2, 1, lstm_hidden_size}, input_layout.data_type, input_layout.format}}; +} + +template std::vector lstm_elt_inst::calc_output_layouts(lstm_elt_node const& node, const kernel_impl_params& impl_param); + std::string lstm_elt_inst::to_string(lstm_elt_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp b/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp index 12f61c033ee294..8453227bfd2dbf 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp @@ -76,10 +76,9 @@ static void CreateLSTMCellOp(ProgramBuilder& p, const std::shared_ptrget_input_shape(0); const auto out_dims0 = op->get_output_shape(0); - if (in_dims0.size() != 2 || - op->get_input_shape(1).size() != 2 || - op->get_input_shape(2).size() != 2) - OPENVINO_THROW("Wrong input shapes for LSTMCell op ", op->get_friendly_name()); + OPENVINO_ASSERT((op->get_input_shape(0).size() == 2 && + op->get_input_shape(1).size() == 2 && + op->get_input_shape(2).size() == 2), "Wrong input shapes for LSTMCell op ", op->get_friendly_name()); lstm_input_size = static_cast(in_dims0.back()); lstm_batch_size = static_cast(in_dims0.at(in_dims0.size()-2)); @@ -91,69 +90,102 @@ static void CreateLSTMCellOp(ProgramBuilder& p, const std::shared_ptrget_clip(); - // LSTM primitive works with single precision for all in/out/weights tensors - auto lstm_dtype = cldnn::element_type_to_data_type(op->get_output_element_type(0)); - - cldnn::primitive_id inReshapeID = layerName + "_inReshape"; - cldnn::primitive_id permuteID = layerName + "_inputReorder"; - cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape"; - cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder"; - cldnn::primitive_id gemmReshapeID = layerName + "_gemmReshape"; - cldnn::primitive_id gemmReorderID = layerName + "_gemmReorder"; - cldnn::primitive_id input_concatID = layerName + "_inputConcat"; - - cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 }; - cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 }; - cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape); - cldnn::layout hiddenLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inStateShape); - p.add_primitive(*op, cldnn::reshape(inReshapeID, inputs[0], inputShape)); - p.add_primitive(*op, cldnn::reorder(permuteID, inReshapeID, inputLayout)); - - - std::string hiddenInResh = inHiddenReshapeID + "_1"; - std::string hiddenInStr = inHiddenReorderID + "_1"; - std::string cellInResh = inHiddenReshapeID + "_2"; - std::string cellInStr = inHiddenReorderID + "_2"; - p.add_primitive(*op, cldnn::reshape(hiddenInResh, inputs[1], inStateShape)); - p.add_primitive(*op, cldnn::reorder(hiddenInStr, cldnn::input_info(hiddenInResh), hiddenLayout)); - p.add_primitive(*op, cldnn::reshape(cellInResh, inputs[2], inStateShape)); - p.add_primitive(*op, cldnn::reorder(cellInStr, cldnn::input_info(cellInResh), hiddenLayout)); - p.add_primitive(*op, cldnn::concatenation(input_concatID, - { permuteID, hiddenInStr }, - 3)); - - cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 }; - cldnn::layout gemmLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, gemmSz); - cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 }; - cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0}; - - std::string lstm_fc_id = layerName + "_fully_connected"; - std::string lstm_elt_id = layerName + "_lstm_elt"; - - cldnn::primitive_id WRconcatID = layerName + "_WRconcat"; - p.add_primitive(*op, cldnn::concatenation(WRconcatID, { weight, recurrent }, 1)); - - cldnn::primitive_id FCInputReshapeID = "Reshape_bf_" + lstm_fc_id + "_for_input"; - cldnn::tensor FCInputReshapeSz = { lstm_batch_size, inputShape.spatial[0] + inStateShape.spatial[0], 1, 1 }; - p.add_primitive(*op, cldnn::reshape(FCInputReshapeID, cldnn::input_info(input_concatID), FCInputReshapeSz)); - - p.add_primitive(*op, cldnn::fully_connected(lstm_fc_id, cldnn::input_info(FCInputReshapeID), WRconcatID, bias.pid)); - p.add_primitive(*op, cldnn::reshape(gemmReshapeID, cldnn::input_info(lstm_fc_id), gemmSz)); - p.add_primitive(*op, cldnn::reorder(gemmReorderID, cldnn::input_info(gemmReshapeID), gemmLayout)); - p.add_primitive(*op, cldnn::lstm_elt(lstm_elt_id, cldnn::input_info(gemmReorderID), cellInStr, clip, 0, activations, - activation_params, cldnn::lstm_weights_order::fizo, 0)); - - - cldnn::tensor outSz = cldnn::tensor{ lstm_batch_size, lstm_hidden_size, 1, 1 }; - cldnn::primitive_id outputHiddenCropID = layerName + "_hc"; - cldnn::primitive_id outputHiddenID = layerName + ".out0"; - p.add_primitive(*op, cldnn::crop(outputHiddenCropID, cldnn::input_info(lstm_elt_id), hiddenSz, cldnn::tensor{0, 0, 0, 0})); - p.add_primitive(*op, cldnn::reshape(outputHiddenID, cldnn::input_info(outputHiddenCropID), outSz), {layerName}); + if (p.use_new_shape_infer()) { + cldnn::primitive_id input_concatID = layerName + "_inputConcat"; + p.add_primitive(*op, cldnn::concatenation(input_concatID, { inputs[0], inputs[1] }, 1)); + + cldnn::primitive_id lstm_fc_id = layerName + "_fully_connected"; + cldnn::primitive_id lstm_elt_id = layerName + "_lstm_elt"; + cldnn::primitive_id wr_concat_id = layerName + "_WRconcat"; + p.add_primitive(*op, cldnn::concatenation(wr_concat_id, { inputs[3], inputs[4] }, 1)); + p.add_primitive(*op, cldnn::fully_connected(lstm_fc_id, cldnn::input_info(input_concatID), wr_concat_id, bias.pid)); + p.add_primitive(*op, cldnn::lstm_elt(lstm_elt_id, cldnn::input_info(lstm_fc_id), inputs[2].pid, clip, 0, activations, + activation_params, cldnn::lstm_weights_order::fizo, 0)); + + auto outSz = op->get_output_partial_shape(0).to_shape(); + std::vector outSzPt; + for (auto i : outSz) { + outSzPt.push_back(i); + } - cldnn::primitive_id outputCellCropID = layerName + "_cc"; - cldnn::primitive_id outputCellID = layerName + ".out1"; - p.add_primitive(*op, cldnn::crop(outputCellCropID, cldnn::input_info(lstm_elt_id), hiddenSz, cellCropSz)); - p.add_primitive(*op, cldnn::reshape(outputCellID, cldnn::input_info(outputCellCropID), outSz)); + cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 }; + + cldnn::primitive_id outputHiddenCropID = layerName + "_hc"; + cldnn::primitive_id outputHiddenID = layerName + ".out0"; + p.add_primitive(*op, cldnn::crop(outputHiddenCropID, cldnn::input_info(lstm_elt_id), hiddenSz, cldnn::tensor{0, 0, 0, 0})); + p.add_primitive(*op, cldnn::reshape(outputHiddenID, cldnn::input_info(outputHiddenCropID), + false, outSzPt, op->get_output_partial_shape(0)), {layerName}); + + cldnn::primitive_id outputCellCropID = layerName + "_cc"; + cldnn::primitive_id outputCellID = layerName + ".out1"; + p.add_primitive(*op, cldnn::crop(outputCellCropID, cldnn::input_info(lstm_elt_id), hiddenSz, cldnn::tensor{0, 1, 0, 0})); + p.add_primitive(*op, cldnn::reshape(outputCellID, cldnn::input_info(outputCellCropID), + false, outSzPt, op->get_output_partial_shape(1))); + } else { + // LSTM primitive works with single precision for all in/out/weights tensors + auto lstm_dtype = cldnn::element_type_to_data_type(op->get_output_element_type(0)); + + cldnn::primitive_id inReshapeID = layerName + "_inReshape"; + cldnn::primitive_id permuteID = layerName + "_inputReorder"; + cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape"; + cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder"; + cldnn::primitive_id gemmReshapeID = layerName + "_gemmReshape"; + cldnn::primitive_id gemmReorderID = layerName + "_gemmReorder"; + cldnn::primitive_id input_concatID = layerName + "_inputConcat"; + + cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 }; + cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 }; + cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape); + cldnn::layout hiddenLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inStateShape); + p.add_primitive(*op, cldnn::reshape(inReshapeID, inputs[0], inputShape)); + p.add_primitive(*op, cldnn::reorder(permuteID, inReshapeID, inputLayout)); + + + std::string hiddenInResh = inHiddenReshapeID + "_1"; + std::string hiddenInStr = inHiddenReorderID + "_1"; + std::string cellInResh = inHiddenReshapeID + "_2"; + std::string cellInStr = inHiddenReorderID + "_2"; + p.add_primitive(*op, cldnn::reshape(hiddenInResh, inputs[1], inStateShape)); + p.add_primitive(*op, cldnn::reorder(hiddenInStr, cldnn::input_info(hiddenInResh), hiddenLayout)); + p.add_primitive(*op, cldnn::reshape(cellInResh, inputs[2], inStateShape)); + p.add_primitive(*op, cldnn::reorder(cellInStr, cldnn::input_info(cellInResh), hiddenLayout)); + p.add_primitive(*op, cldnn::concatenation(input_concatID, + { permuteID, hiddenInStr }, + 3)); + + cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 }; + cldnn::layout gemmLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, gemmSz); + cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 }; + cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0}; + + std::string lstm_fc_id = layerName + "_fully_connected"; + std::string lstm_elt_id = layerName + "_lstm_elt"; + + cldnn::primitive_id WRconcatID = layerName + "_WRconcat"; + p.add_primitive(*op, cldnn::concatenation(WRconcatID, { weight, recurrent }, 1)); + + cldnn::primitive_id FCInputReshapeID = "Reshape_bf_" + lstm_fc_id + "_for_input"; + cldnn::tensor FCInputReshapeSz = { lstm_batch_size, inputShape.spatial[0] + inStateShape.spatial[0], 1, 1 }; + p.add_primitive(*op, cldnn::reshape(FCInputReshapeID, cldnn::input_info(input_concatID), FCInputReshapeSz)); + + p.add_primitive(*op, cldnn::fully_connected(lstm_fc_id, cldnn::input_info(FCInputReshapeID), WRconcatID, bias.pid)); + p.add_primitive(*op, cldnn::reshape(gemmReshapeID, cldnn::input_info(lstm_fc_id), gemmSz)); + p.add_primitive(*op, cldnn::reorder(gemmReorderID, cldnn::input_info(gemmReshapeID), gemmLayout)); + p.add_primitive(*op, cldnn::lstm_elt(lstm_elt_id, cldnn::input_info(gemmReorderID), cellInStr, clip, 0, activations, + activation_params, cldnn::lstm_weights_order::fizo, 0)); + + + cldnn::tensor outSz = cldnn::tensor{ lstm_batch_size, lstm_hidden_size, 1, 1 }; + cldnn::primitive_id outputHiddenCropID = layerName + "_hc"; + cldnn::primitive_id outputHiddenID = layerName + ".out0"; + p.add_primitive(*op, cldnn::crop(outputHiddenCropID, cldnn::input_info(lstm_elt_id), hiddenSz, cldnn::tensor{0, 0, 0, 0})); + p.add_primitive(*op, cldnn::reshape(outputHiddenID, cldnn::input_info(outputHiddenCropID), outSz), {layerName}); + + cldnn::primitive_id outputCellCropID = layerName + "_cc"; + cldnn::primitive_id outputCellID = layerName + ".out1"; + p.add_primitive(*op, cldnn::crop(outputCellCropID, cldnn::input_info(lstm_elt_id), hiddenSz, cellCropSz)); + p.add_primitive(*op, cldnn::reshape(outputCellID, cldnn::input_info(outputCellCropID), outSz)); + } } static void CreateLSTMSequenceOp(ProgramBuilder& p, const std::shared_ptr& op) { @@ -217,12 +249,12 @@ static void CreateLSTMSequenceOp(ProgramBuilder& p, const std::shared_ptr WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) }; - cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape"; - auto reshapeInPrim = cldnn::reshape(WRreshapeID, cldnn::input_info(WRconcatID), tensor_from_dims(WRreshapeSize)); + cldnn::primitive_id WRreshapeID = wr_concat_id + "_reshape"; + auto reshapeInPrim = cldnn::reshape(WRreshapeID, cldnn::input_info(wr_concat_id), tensor_from_dims(WRreshapeSize)); p.add_primitive(*op, reshapeInPrim); for (int i = 0; i < lstm_sequence_len; ++i) { diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 4ccdd52ee33158..590345e05f907c 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -7,6 +7,7 @@ #include "openvino/op/constant.hpp" #include "openvino/op/split.hpp" #include "openvino/op/variadic_split.hpp" +#include "openvino/op/lstm_cell.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/plugin/transformations_pipeline.hpp" @@ -250,10 +251,13 @@ std::vector ProgramBuilder::GetInputInfo(const std::shared_pt for (size_t i = 0; i < op->get_input_size(); i++) { auto prevOp = op->get_input_node_ptr(i); std::string prevName = layer_type_name_ID(prevOp); + // Note: Currently Split/Variadic Split are divided to multiple crops + // LSTMCell contains its own body network, and each output has a unique pid + // But there is no need to maintain output port index for the next node e.g. Result bool is_legacy_multiple_outputs = !allow_new_shape_infer - // Note:: Currently Split/Variadic Split are divided to multiple crops || ov::is_type(prevOp) - || ov::is_type(prevOp); + || ov::is_type(prevOp) + || ov::is_type(prevOp); if (prevOp->get_output_size() > 1 && is_legacy_multiple_outputs) { prevName += ".out" + std::to_string(op->get_input_source_output(i).get_index()); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp new file mode 100644 index 00000000000000..70def82640a4d5 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/tensor_iterator.cpp @@ -0,0 +1,322 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include "ov_models/utils/ov_helpers.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ov_models/builders.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_constants.hpp" +#include "shared_test_classes/base/utils/ranges.hpp" +#include +#include "shared_test_classes/base/utils/compare_results.hpp" +#include "openvino/pass/constant_folding.hpp" +#include +#include "shared_test_classes/base/utils/generate_inputs.hpp" + +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +/* +* Generate TensorIterator with LSTMCell +* @param ngPrc precision of model +* @param initShape initial shape {N, L(sequence length), I} +* @param N batch size +* @param I input size +* @param H hidden layer +*/ +static std::shared_ptr makeTIwithLSTMcell(ov::element::Type_t ngPRC, ov::PartialShape initShape, + size_t N, size_t I, size_t H, size_t sequence_axis, + ngraph::op::RecurrentSequenceDirection seq_direction) { + auto SENT = std::make_shared(ngPRC, initShape); + SENT->set_friendly_name("SENT"); + + // initial_hidden_state + auto H_init = std::make_shared(ngPRC, ov::Shape{N, 1, H}); + H_init->set_friendly_name("H_init"); + // initial_cell_state + auto C_init = std::make_shared(ngPRC, ov::Shape{N, 1, H}); + C_init->set_friendly_name("C_init"); + + auto H_t = std::make_shared(ngPRC, ov::Shape{N, 1, H}); + H_t->set_friendly_name("H_t"); + auto C_t = std::make_shared(ngPRC, ov::Shape{N, 1, H}); + C_t->set_friendly_name("C_t"); + + // Body + // input data + auto X = std::make_shared(ngPRC, ov::Shape{N, 1, I}); + X->set_friendly_name("X"); + + // the weights for matrix multiplication, gate order: fico + std::vector dataW(4 * H * I, 0); + auto W_body = std::make_shared(ngPRC, ov::Shape{4 * H, I}, dataW); + W_body->set_friendly_name("W_body"); + + // the recurrence weights for matrix multiplication, gate order: fico + std::vector dataR(4 * H * H, 0); + auto R_body = std::make_shared(ngPRC, ov::Shape{4 * H, H}, dataR); + R_body->set_friendly_name("R_body"); + + std::vector inShape = {N, H}; + auto constantH = std::make_shared(ov::element::i64, ov::Shape{2}, inShape); + constantH->set_friendly_name("constantH"); + + inShape = {N, I}; + auto constantX = std::make_shared(ov::element::i64, ov::Shape{2}, inShape); + constantX->set_friendly_name("constantX"); + + auto LSTM_cell = + std::make_shared(std::make_shared(X, constantX, false), + std::make_shared(H_t, constantH, false), + std::make_shared(C_t, constantH, false), + W_body, + R_body, + H); + LSTM_cell->set_friendly_name("LSTM_cell"); + + inShape = {N, 1, H}; + auto constantHo = std::make_shared(ov::element::i64, ov::Shape{3}, inShape); + constantHo->set_friendly_name("constantHo"); + + auto H_o = std::make_shared(LSTM_cell->output(0), constantHo, false); + H_o->set_friendly_name("H_o_reshape"); + auto C_o = std::make_shared(LSTM_cell->output(1), constantHo, false); + C_o->set_friendly_name("C_o_reshape"); + auto body = std::make_shared(ov::OutputVector{H_o, C_o}, ov::ParameterVector{X, H_t, C_t}); + body->set_friendly_name("body"); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_friendly_name("tensor_iterator"); + tensor_iterator->set_body(body); + // H_t is Hinit on the first iteration, Ho after that + tensor_iterator->set_merged_input(H_t, H_init, H_o); + tensor_iterator->set_merged_input(C_t, C_init, C_o); + + // Set PortMap + if (seq_direction == ngraph::op::RecurrentSequenceDirection::FORWARD) { + tensor_iterator->set_sliced_input(X, SENT, 0, 1, 1, -1, sequence_axis); + } else if (seq_direction == ngraph::op::RecurrentSequenceDirection::REVERSE) { + tensor_iterator->set_sliced_input(X, SENT, -1, -1, 1, 0, sequence_axis); + } else { + OPENVINO_THROW("Bidirectional case is not supported."); + } + + // Output 0 is last Ho, result 0 of body + auto out0 = tensor_iterator->get_iter_value(H_o, -1); + // Output 1 is last Co, result 1 of body + auto out1 = tensor_iterator->get_iter_value(C_o, -1); + + auto results = + ov::ResultVector{std::make_shared(out0), std::make_shared(out1)}; + auto fn_ptr = std::make_shared(results, ov::ParameterVector{SENT, H_init, C_init}); + fn_ptr->set_friendly_name("TIwithLSTMcell"); + return fn_ptr; +} + +/* +* Generate LSTMSequence +* @param ngPrc precision of model +* @param initShape initial shape {N, L(sequence length), I} +* @param N batch size +* @param I input size +* @param H hidden layer +*/ +static std::shared_ptr makeLSTMSequence(ov::element::Type_t ngPRC, ov::PartialShape initShape, + size_t N, size_t I, size_t H, size_t sequence_axis, + ngraph::op::RecurrentSequenceDirection seq_direction) { + auto X = std::make_shared(ngPRC, initShape); + auto Y = std::make_shared(ngPRC, ov::Shape{N, 1, H}); + auto Z = std::make_shared(ngPRC, ov::Shape{N, 1, H}); + auto shape_of = std::make_shared(X); + auto indices = ov::op::v0::Constant::create(ov::element::i32, {1}, {1}); + auto axis = ov::op::v0::Constant::create(ov::element::i32, {}, {0}); + auto seq_lengths = std::make_shared(shape_of, indices, axis); + + auto w_val = std::vector(4 * H * I, 0); + auto r_val = std::vector(4 * H * H, 0); + auto b_val = std::vector(4 * H, 0); + auto W = ov::op::v0::Constant::create(ngPRC, ov::Shape{N, 4 * H, I}, w_val); + auto R = ov::op::v0::Constant::create(ngPRC, ov::Shape{N, 4 * H, H}, r_val); + auto B = ov::op::v0::Constant::create(ngPRC, ov::Shape{N, 4 * H}, b_val); + + auto rnn_sequence = std::make_shared(X, + Y, + Z, + seq_lengths, + W, + R, + B, + 128, + seq_direction); + auto Y_out = std::make_shared(rnn_sequence->output(0)); + auto Ho = std::make_shared(rnn_sequence->output(1)); + auto Co = std::make_shared(rnn_sequence->output(2)); + Y_out->set_friendly_name("Y_out"); + Ho->set_friendly_name("Ho"); + Co->set_friendly_name("Co"); + + auto fn_ptr = std::make_shared(ov::NodeVector{Y_out, Ho, Co}, ov::ParameterVector{X, Y, Z}); + fn_ptr->set_friendly_name("LSTMSequence"); + return fn_ptr; +} + +enum class LSTMType { + LSTMCell = 0, + LSTMSequence = 1 // will be updated at next step. +}; + +using DynamicTensorIteratorParams = typename std::tuple< + LSTMType, // LSTM type (LSTMCell, LSTMSequence) + InputShape, // input shapes (N[batch], L[seq_length], I[input_size]) + int32_t, // hidden size + ngraph::op::RecurrentSequenceDirection, // sequence direction + std::string, // device name + InferenceEngine::Precision, // precision + ov::AnyMap // configuration + >; + +/** + * Test case with Dynamic SHAPE version of loop operation. + * Total iteration count is dynamic. + */ +class DynamicTensorIteratorTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + LSTMType type; + InputShape data_shapes; + int32_t hidden_size; + ngraph::op::RecurrentSequenceDirection seq_direction; + std::string target_device; + InferenceEngine::Precision data_precision; + ov::Any configuration; + std::tie(type, data_shapes, + hidden_size, + seq_direction, + target_device, + data_precision, + configuration) = obj.param; + std::ostringstream result; + result << "TestType=" << (type == LSTMType::LSTMCell? "LSTMCell" : "LSTMSequence") << "_"; + result << "IS=("; + result << ov::test::utils::partialShape2str({data_shapes.first}) << "_"; + result << ov::test::utils::vec2str(data_shapes.second) << "_"; + result << ")_"; + result << "hidden_size=" << hidden_size << "_"; + result << "direction=" << seq_direction << "_"; + result << "netPRC=" << data_precision << "_"; + result << "targetDevice=" << target_device << "_"; + return result.str(); + } + +private: + InputShape data_shapes; + ngraph::op::RecurrentSequenceDirection seq_direction; + InferenceEngine::Precision data_prc; + size_t hidden_size; + size_t batch_size; + size_t input_size; + LSTMType type; + +protected: + void SetUp() override { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::AnyMap configuration_new; + std::tie(type, data_shapes, + hidden_size, + seq_direction, + targetDevice, + data_prc, + configuration_new) = GetParam(); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc); + if (targetDevice == ov::test::utils::DEVICE_GPU) { + configuration = configuration_new; + } + + + size_t sequence_axis = 1; + auto init_shape = data_shapes.first; + init_input_shapes({data_shapes}); + batch_size = static_cast(init_shape[0].get_length()); + input_size = static_cast(init_shape[init_shape.size()-1].get_length()); + if (type == LSTMType::LSTMCell) + function = makeTIwithLSTMcell(ngPrc, init_shape, batch_size, input_size, hidden_size, sequence_axis, seq_direction); + else + function = makeLSTMSequence(ngPrc, init_shape, batch_size, input_size, hidden_size, sequence_axis, seq_direction); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + ov::Shape default_shape{batch_size, 1, hidden_size}; + auto inputMap = ov::test::utils::getInputMap(); + auto itTargetShape = targetInputStaticShapes.begin(); + for (const auto ¶m : function->get_parameters()) { + std::shared_ptr inputNode = param; + for (size_t i = 0; i < param->get_output_size(); i++) { + for (const auto &node : param->get_output_target_inputs(i)) { + std::shared_ptr nodePtr = node.get_node()->shared_from_this(); + auto it = inputMap.find(nodePtr->get_type_info()); + ASSERT_NE(it, inputMap.end()); + for (size_t port = 0; port < nodePtr->get_input_size(); ++port) { + if (itTargetShape != targetInputStaticShapes.end()) { + if (nodePtr->get_input_node_ptr(port)->shared_from_this() == inputNode->shared_from_this()) { + inputs.insert({param, it->second(nodePtr, port, param->get_element_type(), *itTargetShape)}); + break; + } + } else { + inputs.insert({param, it->second(nodePtr, port, param->get_element_type(), default_shape)}); + } + } + } + } + if (itTargetShape != targetInputStaticShapes.end()) + itTargetShape++; + } + } +}; + + +TEST_P(DynamicTensorIteratorTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); +} + +std::vector input_shapes = { + InputShape(ov::PartialShape({1, -1, 512}), {{1, 30, 512}, {1, 10, 512}, {1, 5, 512}}) +}; + +std::vector hidden_sizes = { + 128 +}; + +ov::AnyMap net_configuration = { + {GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO} +}; + +std::vector net_precision = { + InferenceEngine::Precision::FP32, +}; + +std::vector reccurent_sequence_direction = { + ngraph::op::RecurrentSequenceDirection::FORWARD, + ngraph::op::RecurrentSequenceDirection::REVERSE, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_DynamicTensorIterator_LSTMCell, DynamicTensorIteratorTest, + testing::Combine( + /* lstm_type */ testing::ValuesIn({LSTMType::LSTMCell}), + /* data_shape */ testing::ValuesIn(input_shapes), + /* hidden_size */ testing::ValuesIn(hidden_sizes), + /* direction */ testing::ValuesIn(reccurent_sequence_direction), + /* device */ testing::Values(ov::test::utils::DEVICE_GPU), + /* data_prc */ testing::ValuesIn(net_precision), + /* configuration */ testing::Values(net_configuration)), + DynamicTensorIteratorTest::getTestCaseName); +} // namespace GPULayerTestsDefinitions From 306137f86b36c855289e04462b0f6a878938edd8 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Mon, 13 Nov 2023 08:52:07 +0400 Subject: [PATCH 267/275] [GPU] Make ShapePredictor instance unique for each InferRequest instead of the cldnn::network (#21019) --- .../include/intel_gpu/graph/network.hpp | 5 ++-- .../intel_gpu/plugin/sync_infer_request.hpp | 1 + .../src/graph/impls/common/condition.cpp | 5 ++-- .../intel_gpu/src/graph/impls/common/loop.cpp | 1 + .../intel_gpu/src/graph/primitive_inst.cpp | 2 +- .../src/plugin/sync_infer_request.cpp | 24 +++++++++++++------ 6 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index ab5d6b5e0af140..1f73b1e6813114 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -247,7 +247,8 @@ struct network { const variables_state_info_map& get_variables_state_info() const; const ExecutionConfig& get_config() const { return _config; } - ShapePredictor& get_shape_predictor() { return *_shape_predictor; } + std::shared_ptr get_shape_predictor() { return _shape_predictor; } + void set_shape_predictor(std::shared_ptr shape_predictor) { _shape_predictor = shape_predictor; } #ifdef GPU_DEBUG_CONFIG int64_t get_current_iteration_num() { return iteration; } @@ -287,7 +288,7 @@ struct network { std::unordered_map _old_events; output_chains_map _output_chains; - std::unique_ptr _shape_predictor; + std::shared_ptr _shape_predictor; void build_exec_order(); void allocate_primitive_instance(program_node const& node); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp index 24109144496df6..964e2053e6f4e3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp @@ -83,6 +83,7 @@ class SyncInferRequest : public ov::ISyncInferRequest { std::shared_ptr m_graph; RemoteContextImpl::Ptr m_context = nullptr; std::shared_ptr m_stream_executor = nullptr; + std::shared_ptr m_shape_predictor = nullptr; bool m_enable_profiling = false; bool m_use_external_queue = false; diff --git a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp index 02c94ebf31e881..84e2811c42076c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/condition.cpp @@ -37,8 +37,9 @@ struct condition_impl : typed_primitive_impl { set_node_params(instance.get_node()); auto pred = condition_inst::get_pred_from_memory(instance.pred_memory_ptr(), instance.get_network().get_stream()); - network::ptr executed_net = pred? instance.get_net_true() : instance.get_net_false(); - auto branch = pred? instance.get_branch_true() : instance.get_branch_false(); + network::ptr executed_net = pred ? instance.get_net_true() : instance.get_net_false(); + auto branch = pred ? instance.get_branch_true() : instance.get_branch_false(); + executed_net->set_shape_predictor(instance.get_network().get_shape_predictor()); GPU_DEBUG_LOG << "predicate: " << (pred ? "True" : "False") << std::endl; // Set input memory of inner network before its execution diff --git a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp index 119a186b71a8b7..3be809c5a1445e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/common/loop.cpp @@ -121,6 +121,7 @@ struct loop_impl : typed_primitive_impl { auto ev = stream.create_user_event(false); + body_network->set_shape_predictor(outer_network.get_shape_predictor()); OPENVINO_ASSERT(!primitive->num_iteration_id.empty(), "loop operation should have num_iteration_id"); auto num_iterations = instance.get_num_iterations(); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index cfef12b8722323..4e47dc49de4d2a 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -427,7 +427,7 @@ event::ptr primitive_inst::realloc_if_needed() { } auto current_shape = actual_layout.get_shape(); - auto& sp = get_network().get_shape_predictor(); + auto& sp = *get_network().get_shape_predictor(); auto dt_size = ov::element::Type(actual_layout.data_type).bitwidth(); auto prealloc_info = sp.predict_preallocation_shape(id(), current_shape, dt_size, can_reuse_buffer); if (prealloc_info.first && sp.can_preallocate(ov::shape_size(prealloc_info.second) * dt_size)) { diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 574d78e3a5332d..aa9375839a283f 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -226,6 +226,7 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c : ov::ISyncInferRequest(compiled_model) , m_graph(compiled_model->get_graph(0)) , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) + , m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio))) , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling)) , m_use_external_queue(m_graph->use_external_queue()) { bool is_legacy_api = !compiled_model->is_new_api(); @@ -233,6 +234,17 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr& c allocate_inputs(); allocate_outputs(); allocate_states(); + + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) { + auto& mem_preallocation_params = debug_config->mem_preallocation_params; + m_shape_predictor.reset( + new cldnn::ShapePredictor(&m_graph->get_engine(), + mem_preallocation_params.next_iters_preallocation_count, + mem_preallocation_params.max_per_iter_size, + mem_preallocation_params.max_per_dim_diff, + mem_preallocation_params.buffers_preallocation_ratio)); + } } void SyncInferRequest::infer() { @@ -401,6 +413,7 @@ void SyncInferRequest::enqueue() { auto network = m_graph->get_network(); network->assign_variables_memories(); + network->set_shape_predictor(m_shape_predictor); m_internal_outputs.clear(); m_internal_outputs = network->execute(dependencies); @@ -476,8 +489,7 @@ void SyncInferRequest::wait() { need_reallocate = usm_host_tensor->get_impl()->get_original_memory()->size() < output_memory->size(); if (need_reallocate) { - auto& shape_predictor = m_graph->get_network()->get_shape_predictor(); - auto actual_memory_shape = predict_shape(name, mem_shape, output_tensor->get_element_type(), shape_predictor); + auto actual_memory_shape = predict_shape(name, mem_shape, output_tensor->get_element_type(), *m_shape_predictor); output_tensor->set_shape(actual_memory_shape); } } @@ -585,8 +597,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe auto actual_memory_shape = tensor_shape; if (is_dynamic) { - auto& shape_predictor = m_graph->get_network()->get_shape_predictor(); - actual_memory_shape = predict_shape(name, tensor_shape, element_type, shape_predictor); + actual_memory_shape = predict_shape(name, tensor_shape, element_type, *m_shape_predictor); } return { create_device_tensor(actual_memory_shape, element_type, need_lockable_mem), TensorOwner::PLUGIN }; @@ -746,7 +757,7 @@ std::vector SyncInferRequest::prepare_input(const std::string if (is_remote) { m_plugin_inputs[name] = user_tensor_wrapper; - } else if (is_usm_host_tensor && !convert_needed) { + } else if (is_usm_host_tensor && !convert_needed && can_use_usm_host(engine)) { m_plugin_inputs[name] = {usm_host_ptr->get_impl(), user_tensor_wrapper.owner}; is_remote = true; } @@ -762,8 +773,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto device_tensor = std::dynamic_pointer_cast(device_tensor_wrapper.ptr); if (is_dynamic) { if (device_tensor->get_original_memory()->size() < user_tensor->get_byte_size()) { - auto& shape_predictor = network->get_shape_predictor(); - auto actual_shape = predict_shape(name, user_tensor->get_shape(), device_tensor_et, shape_predictor); + auto actual_shape = predict_shape(name, user_tensor->get_shape(), device_tensor_et, *m_shape_predictor); GPU_DEBUG_TRACE_DETAIL << " actual memory shape: " << actual_shape.to_string() << std::endl; auto new_tensor = create_device_tensor(actual_shape, device_tensor_et, false); new_tensor->set_shape(user_tensor->get_shape()); From fef0df6ee100e19775ad22e766967f04ef58fac9 Mon Sep 17 00:00:00 2001 From: River Li Date: Mon, 13 Nov 2023 14:37:35 +0800 Subject: [PATCH 268/275] [CPU] Add deprecation message to extension API (#20970) --- src/plugins/intel_cpu/src/plugin.cpp | 8 +++++++- src/plugins/intel_cpu/src/plugin.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index ae37a2a1bf70b7..5b22beb98ba6ea 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -820,6 +820,12 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio return get_metric_legacy(name, options); } +OPENVINO_SUPPRESS_DEPRECATED_START +void Engine::add_extension(const InferenceEngine::IExtensionPtr& extension) { + extensionManager->AddExtension(extension); +} +OPENVINO_SUPPRESS_DEPRECATED_END + ov::SupportedOpsMap Engine::query_model(const std::shared_ptr& model, const ov::AnyMap& config) const { WeightsSharing::Ptr fake_w_cache; @@ -839,7 +845,7 @@ ov::SupportedOpsMap Engine::query_model(const std::shared_ptr& const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf); auto context = - std::make_shared(conf, nullptr, fake_w_cache, false); + std::make_shared(conf, extensionManager, fake_w_cache, false); auto supported = ov::get_supported_nodes( model, diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 4bb728f91376e0..2a44c35988a746 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -57,6 +57,10 @@ class Engine : public ov::IPlugin { "get_default_context is not supported by CPU plugin!"); }; + OPENVINO_SUPPRESS_DEPRECATED_START + void add_extension(const std::shared_ptr& extension) override; + OPENVINO_SUPPRESS_DEPRECATED_END + private: bool is_legacy_api() const; From c451a94572387492fc10663253cd8b2c3e4b5a5f Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 13 Nov 2023 07:44:54 +0100 Subject: [PATCH 269/275] Fix compile warning regarding ABI changes (#20990) and using std::true_type/false_type --- .../intel_gna/src/transformations/pwl_approximation.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp b/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp index a25974b35d17fe..b8fb9b5f58f05e 100644 --- a/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp +++ b/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp @@ -261,21 +261,21 @@ double lower_bound() { } template -double lower_bound(std::true_type, double exponent) { +double lower_bound(double exponent, std::true_type) { return Function::lower_bound(exponent); } template -double lower_bound(std::false_type, double exponent) { +double lower_bound(double exponent, std::false_type) { throw std::runtime_error("Not supported"); } template double lower_bound(double exponent) { return lower_bound( + exponent, std::integral_constant < bool, - std::is_same::value || std::is_same::value > (), - exponent); + std::is_same::value || std::is_same::value > ()); } template From 1a5b0b70f93ebceb92dcf6a047bacf4ef60982e7 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Mon, 13 Nov 2023 13:29:53 +0400 Subject: [PATCH 270/275] Remove `castOps2Nodes` and `convert2OutputVector` (#21015) --- .../functional/single_layer_tests/augru_cell.cpp | 8 +++++--- .../single_layer_tests/augru_sequence.cpp | 5 ++++- .../functional/single_layer_tests/concat.cpp | 9 +++++---- .../single_layer_tests/custom_op_internal_dyn.cpp | 9 +++++---- .../single_layer_tests/detection_output.cpp | 8 +++++--- .../functional/single_layer_tests/gru_cell.cpp | 8 +++++--- .../single_layer_tests/gru_sequence.cpp | 6 +++++- .../functional/single_layer_tests/lstm_cell.cpp | 7 +++++-- .../single_layer_tests/lstm_sequence.cpp | 6 +++++- .../functional/single_layer_tests/rnn_cell.cpp | 6 ++++-- .../single_layer_tests/rnn_sequence.cpp | 14 +++++++++----- .../src/custom_op_insert_convert_i64.cpp | 8 +++++--- .../preprocess_tests/precision_convert.cpp | 9 +++++---- .../dynamic/detection_output.cpp | 7 +++++-- .../single_layer/group_normalization.hpp | 11 ++++------- .../shared_test_classes/single_layer/softmax.hpp | 7 ++----- .../src/single_layer/batch_norm.cpp | 4 +--- .../src/single_layer/batch_to_space.cpp | 4 +--- .../src/single_layer/concat.cpp | 9 +++++---- .../src/single_layer/convolution.cpp | 4 +--- .../src/single_layer/convolution_backprop.cpp | 6 ++---- .../single_layer/convolution_backprop_data.cpp | 6 ++---- .../src/single_layer/ctc_greedy_decoder.cpp | 4 +--- .../single_layer/ctc_greedy_decoder_seq_len.cpp | 4 +--- .../src/single_layer/ctc_loss.cpp | 4 +--- .../src/single_layer/deformable_convolution.cpp | 2 -- .../src/single_layer/deformable_psroi_pooling.cpp | 14 +++++--------- .../src/single_layer/detection_output.cpp | 8 +++++--- .../shared_test_classes/src/single_layer/dft.cpp | 3 +-- .../src/single_layer/einsum.cpp | 8 +++++--- .../experimental_detectron_detection_output.cpp | 5 ++--- ..._detectron_generate_proposals_single_image.cpp | 5 ++--- ...xperimental_detectron_prior_grid_generator.cpp | 5 ++--- ...experimental_detectron_roifeatureextractor.cpp | 6 ++++-- .../experimental_detectron_topkrois.cpp | 7 +++---- .../src/single_layer/fake_quantize.cpp | 5 ++--- .../src/single_layer/gather.cpp | 15 +++++---------- .../src/single_layer/gather_elements.cpp | 4 +--- .../src/single_layer/gather_nd.cpp | 8 ++------ .../src/single_layer/generate_proposals.cpp | 5 ++--- .../shared_test_classes/src/single_layer/grn.cpp | 4 +--- .../src/single_layer/group_convolution.cpp | 4 +--- .../group_convolution_backprop_data.cpp | 10 +++------- .../src/single_layer/is_inf.cpp | 3 +-- .../src/single_layer/log_softmax.cpp | 5 +---- .../shared_test_classes/src/single_layer/lrn.cpp | 4 +--- .../src/single_layer/lstm_cell_basic.cpp | 6 ++++-- .../src/single_layer/mat_mul.cpp | 4 +--- .../src/single_layer/matrix_nms.cpp | 4 +--- .../src/single_layer/multiclass_nms.cpp | 9 ++++++--- .../shared_test_classes/src/single_layer/mvn.cpp | 8 +++----- .../src/single_layer/one_hot.cpp | 4 +--- .../shared_test_classes/src/single_layer/pad.cpp | 4 +--- .../src/single_layer/pooling.cpp | 12 +++--------- .../src/single_layer/proposal.cpp | 3 +-- .../src/single_layer/psroi_pooling.cpp | 6 ++---- .../shared_test_classes/src/single_layer/rdft.cpp | 4 +--- .../src/single_layer/reduce_ops.cpp | 4 +--- .../src/single_layer/reshape.cpp | 4 +--- .../src/single_layer/roi_align.cpp | 8 ++------ .../src/single_layer/roi_pooling.cpp | 6 ++---- .../src/single_layer/scatter_ND_update.cpp | 3 +-- .../src/single_layer/scatter_elements_update.cpp | 3 +-- .../src/single_layer/scatter_update.cpp | 3 +-- .../src/single_layer/shape_of.cpp | 3 +-- .../src/single_layer/shuffle_channels.cpp | 4 +--- .../src/single_layer/space_to_batch.cpp | 4 +--- .../src/single_layer/space_to_depth.cpp | 3 +-- .../src/single_layer/split.cpp | 4 +--- .../src/single_layer/strided_slice.cpp | 4 +--- .../shared_test_classes/src/single_layer/tile.cpp | 4 +--- .../shared_test_classes/src/single_layer/topk.cpp | 4 +--- .../src/single_layer/transpose.cpp | 4 +--- .../src/single_layer/variadic_split.cpp | 2 -- 74 files changed, 184 insertions(+), 250 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_cell.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_cell.cpp index 1666eb68bc5435..4d15d8e65f638d 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_cell.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_cell.cpp @@ -94,12 +94,14 @@ class AUGRUCellCPUTest : public testing::WithParamInterface(netPrecision, shape)); + auto param = std::make_shared(netPrecision, shape); + params.push_back(param); + paramsOuts.push_back(param); } std::vector WRB = {{3 * hiddenSize, inputSize}, {3 * hiddenSize, hiddenSize}, {(linearBeforeReset ? 4 : 3) * hiddenSize}}; - auto augruCellOp = ngraph::builder::makeAUGRU( - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hiddenSize/*, activations, {}, {}, clip, linearBeforeReset*/); + auto augruCellOp = ngraph::builder::makeAUGRU(paramsOuts, WRB, hiddenSize/*, activations, {}, {}, clip, linearBeforeReset*/); function = makeNgraphFunction(netPrecision, params, augruCellOp, "AUGRUCell"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_sequence.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_sequence.cpp index 610fa5e6d9bc29..e49c564559eeea 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_sequence.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/augru_sequence.cpp @@ -130,10 +130,13 @@ class AUGRUSequenceCPUTest : public testing::WithParamInterfaceset_element_type(ElementType::i64); } } + ov::OutputVector paramsOuts; + for (const auto& param : params) + paramsOuts.push_back(param); std::vector WRB = {{numDirections, 3 * hiddenSize, inputSize}, {numDirections, 3 * hiddenSize, hiddenSize}, {numDirections, (linearBeforeReset ? 4 : 3) * hiddenSize}, {batchSize}}; - auto augruSequenceOp = ngraph::builder::makeAUGRU(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + auto augruSequenceOp = ngraph::builder::makeAUGRU(paramsOuts, WRB, hiddenSize, true, diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp index 3a0da3008ffd66..e805d802abeb7d 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/concat.cpp @@ -79,12 +79,13 @@ class ConcatLayerCPUTest : public testing::WithParamInterface(netPrecision, shape)); + auto param = std::make_shared(netPrecision, shape); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto concat = std::make_shared(paramOuts, axis); + auto concat = std::make_shared(paramsOuts, axis); function = makeNgraphFunction(netPrecision, params, concat, "ConcatCPU"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/custom_op_internal_dyn.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/custom_op_internal_dyn.cpp index 56e2b9ededea63..f7fffe774d90f0 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/custom_op_internal_dyn.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/custom_op_internal_dyn.cpp @@ -86,12 +86,13 @@ class CustomOpCPUTest : public SubgraphBaseTest { init_input_shapes({inputShapes}); ov::ParameterVector inputParams; + ov::OutputVector paramsOuts; for (auto&& shape : inputDynamicShapes) { - inputParams.push_back(std::make_shared(ngraph::element::f32, shape)); + auto param = std::make_shared(ngraph::element::f32, shape); + inputParams.push_back(param); + paramsOuts.push_back(param); } - - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - auto customOp = std::make_shared(paramOuts); + auto customOp = std::make_shared(paramsOuts); auto shapeOf = std::make_shared(customOp->output(1)); ngraph::ResultVector results{std::make_shared(customOp->output(0)), diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/detection_output.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/detection_output.cpp index 814ff07a29c746..9d8f027ef20102 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/detection_output.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/detection_output.cpp @@ -198,11 +198,13 @@ class DetectionOutputLayerCPUTest : public testing::WithParamInterface(ov::element::f32, shape)); + auto param = std::make_shared(ov::element::f32, shape); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs); + auto detOut = ngraph::builder::makeDetectionOutput(paramsOuts, attrs); ngraph::ResultVector results{std::make_shared(detOut)}; function = std::make_shared(results, params, "DetectionOutputDynamic"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_cell.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_cell.cpp index be63ddbc5dd984..94768354adbc6d 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_cell.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_cell.cpp @@ -94,12 +94,14 @@ class GRUCellCPUTest : public testing::WithParamInterface(netPrecision, shape)); + auto param = std::make_shared(netPrecision, shape); + params.push_back(param); + paramsOuts.push_back(param); } std::vector WRB = {{3 * hiddenSize, inputSize}, {3 * hiddenSize, hiddenSize}, {(linearBeforeReset ? 4 : 3) * hiddenSize}}; - auto gruCellOp = ngraph::builder::makeGRU( - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hiddenSize, activations, {}, {}, clip, linearBeforeReset); + auto gruCellOp = ngraph::builder::makeGRU(paramsOuts, WRB, hiddenSize, activations, {}, {}, clip, linearBeforeReset); function = makeNgraphFunction(netPrecision, params, gruCellOp, "GRUCell"); } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_sequence.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_sequence.cpp index 265566c51f02f5..60596e8bd697c7 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_sequence.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/gru_sequence.cpp @@ -122,9 +122,13 @@ class GRUSequenceCPUTest : public testing::WithParamInterface WRB = {{numDirections, 3 * hiddenSize, inputSize}, {numDirections, 3 * hiddenSize, hiddenSize}, {numDirections, (linearBeforeReset ? 4 : 3) * hiddenSize}, {batchSize}}; - auto gruSequenceOp = ngraph::builder::makeGRU(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + auto gruSequenceOp = ngraph::builder::makeGRU(paramsOuts, WRB, hiddenSize, activations, diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_cell.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_cell.cpp index ae934271631ec2..b03a7a35e53666 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_cell.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_cell.cpp @@ -93,10 +93,13 @@ class LSTMCellLayerCPUTest : public testing::WithParamInterface(netPrecision, shape)); + auto param = std::make_shared(netPrecision, shape); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + std::vector WRB = {{4 * hiddenSize, inputSize}, {4 * hiddenSize, hiddenSize}, {4 * hiddenSize}}; auto lstmCellOp = ngraph::builder::makeLSTM(paramsOuts, WRB, hiddenSize, activations, {}, {}, clip); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_sequence.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_sequence.cpp index 218a45c1661076..206c29731b7353 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_sequence.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/lstm_sequence.cpp @@ -130,9 +130,13 @@ class LSTMSequenceCPUTest : public testing::WithParamInterface WRB = {{numDirections, 4 * hiddenSize, inputSize}, {numDirections, 4 * hiddenSize, hiddenSize}, {numDirections, 4 * hiddenSize}, {batchSize}}; - auto lstmSequenceOp = ngraph::builder::makeLSTM(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + auto lstmSequenceOp = ngraph::builder::makeLSTM(paramsOuts, WRB, hiddenSize, activations, diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_cell.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_cell.cpp index 98b5e2b73bb203..af94855c414e1d 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_cell.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_cell.cpp @@ -89,10 +89,12 @@ class RNNCellCPUTest : public testing::WithParamInterface, } ov::ParameterVector params; + ov::OutputVector paramsOuts; for (auto&& shape : inputDynamicShapes) { - params.push_back(std::make_shared(netPrecision, shape)); + auto param = std::make_shared(netPrecision, shape); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector WRB = {{hiddenSize, inputSize}, {hiddenSize, hiddenSize}, {hiddenSize}}; auto rnnCellOp = ngraph::builder::makeRNN(paramsOuts, WRB, hiddenSize, activations, {}, {}, clip); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_sequence.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_sequence.cpp index 1fdd66344f95fc..2efa7fae3b70ea 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_sequence.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/rnn_sequence.cpp @@ -106,10 +106,10 @@ class RNNSequenceCPUTest : public testing::WithParamInterface(netPrecision, shape)); - } + ov::ParameterVector params; + for (auto&& shape : inputDynamicShapes) { + params.push_back(std::make_shared(netPrecision, shape)); + } const size_t batchSize = inputDynamicShapes[0][0].is_static() ? inputDynamicShapes[0][0].get_length() : inputDynamicShapes[1][0].is_static() ? inputDynamicShapes[1][0].get_length() : inputDynamicShapes.size() > 2 && inputDynamicShapes[2][0].is_static() ? inputDynamicShapes[2][0].get_length() : @@ -124,9 +124,13 @@ class RNNSequenceCPUTest : public testing::WithParamInterface WRB = {{numDirections, hiddenSize, inputSize}, {numDirections, hiddenSize, hiddenSize}, {numDirections, hiddenSize}, {batchSize}}; - auto rnn_sequence = ngraph::builder::makeRNN(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + auto rnn_sequence = ngraph::builder::makeRNN(paramsOuts, WRB, hiddenSize, activations, diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp index 845b68932b42c1..518676a15e63b3 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_insert_convert_i64.cpp @@ -101,11 +101,13 @@ class CustomOpConvertI64CPUTest : public testing::WithParamInterface(inType, shape)); + auto param = std::make_shared(inType, shape); + inputParams.push_back(param); + paramsOuts.push_back(param); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - auto customOp = std::make_shared(paramOuts); + auto customOp = std::make_shared(paramsOuts); ov::ResultVector results{std::make_shared(customOp)}; function = std::make_shared(results, inputParams, "customOpTest"); diff --git a/src/plugins/intel_gna/tests/functional/preprocess_tests/precision_convert.cpp b/src/plugins/intel_gna/tests/functional/preprocess_tests/precision_convert.cpp index f1ddaaa3446495..2dffe679072639 100644 --- a/src/plugins/intel_gna/tests/functional/preprocess_tests/precision_convert.cpp +++ b/src/plugins/intel_gna/tests/functional/preprocess_tests/precision_convert.cpp @@ -55,12 +55,13 @@ class PreprocessGNATest : public testing::WithParamInterface(net_type, shape)); + auto param = std::make_shared(net_type, shape); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto concat = std::make_shared(paramOuts, 1); + auto concat = std::make_shared(paramsOuts, 1); ngraph::ResultVector results{std::make_shared(concat)}; function = std::make_shared(results, params, "concat"); } diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp index 481d769cc97606..4690403ee2f530 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp @@ -212,8 +212,11 @@ class DetectionOutputLayerGPUTest : public testing::WithParamInterface(detOut)}; function = std::make_shared(results, params, "DetectionOutputDynamic"); } else { - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs); + ov::OutputVector paramsOuts; + for (auto&& param : params) { + paramsOuts.push_back(param); + } + auto detOut = ngraph::builder::makeDetectionOutput(paramsOuts, attrs); ngraph::ResultVector results{std::make_shared(detOut)}; function = std::make_shared(results, params, "DetectionOutputDynamic"); } diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/group_normalization.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/group_normalization.hpp index 759f47786d98be..27873d36c80098 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/group_normalization.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/group_normalization.hpp @@ -59,16 +59,13 @@ class GroupNormalizationTest : public testing::WithParamInterface(ngPrc, shape)); - } - const auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); const auto groupNormalization = std::make_shared( - paramOuts.at(0), - paramOuts.at(1), - paramOuts.at(2), + params.at(0), + params.at(1), + params.at(2), num_groups, epsilon); const ngraph::ResultVector results{std::make_shared(groupNormalization)}; diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/softmax.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/softmax.hpp index a3eeeb4e212e54..a8cfd99e833ff2 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/softmax.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/softmax.hpp @@ -65,13 +65,10 @@ class SoftMaxLayerTestBase : public testing::WithParamInterface(ngPrc, shape)); - } - const auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - const auto softMax = std::make_shared(paramOuts.at(0), axis); + const auto softMax = std::make_shared(params.at(0), axis); const ngraph::ResultVector results{std::make_shared(softMax)}; // TODO: This workaround is needed as there is no full support for f16 type in the reference implementation diff --git a/src/tests/functional/shared_test_classes/src/single_layer/batch_norm.cpp b/src/tests/functional/shared_test_classes/src/single_layer/batch_norm.cpp index 5cda6786168d66..1a02639b7e8ccf 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/batch_norm.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/batch_norm.cpp @@ -38,10 +38,8 @@ void BatchNormLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto batchNorm = ngraph::builder::makeBatchNormInference(paramOuts[0], epsilon); + auto batchNorm = ngraph::builder::makeBatchNormInference(params[0], epsilon); ngraph::ResultVector results{std::make_shared(batchNorm)}; function = std::make_shared(results, params, "BatchNormInference"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/batch_to_space.cpp b/src/tests/functional/shared_test_classes/src/single_layer/batch_to_space.cpp index f95cff80060304..853168a82657e4 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/batch_to_space.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/batch_to_space.cpp @@ -36,9 +36,7 @@ void BatchToSpaceLayerTest::SetUp() { std::tie(blockShape, cropsBegin, cropsEnd, inputShape, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto b2s = ngraph::builder::makeBatchToSpace(paramOuts[0], ngPrc, blockShape, cropsBegin, cropsEnd); + auto b2s = ngraph::builder::makeBatchToSpace(params[0], ngPrc, blockShape, cropsBegin, cropsEnd); ngraph::ResultVector results{std::make_shared(b2s)}; function = std::make_shared(results, params, "BatchToSpace"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/concat.cpp b/src/tests/functional/shared_test_classes/src/single_layer/concat.cpp index 2b8978316648f6..d57b4c66c8908c 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/concat.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/concat.cpp @@ -33,12 +33,13 @@ void ConcatLayerTest::SetUp() { std::tie(axis, inputShape, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params; + ov::OutputVector paramsOuts; for (auto&& shape : inputShape) { - params.push_back(std::make_shared(ngPrc, ov::Shape(shape))); + auto param = std::make_shared(ngPrc, ov::Shape(shape)); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto concat = std::make_shared(paramOuts, axis); + auto concat = std::make_shared(paramsOuts, axis); ngraph::ResultVector results{std::make_shared(concat)}; function = std::make_shared(results, params, "concat"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/convolution.cpp b/src/tests/functional/shared_test_classes/src/single_layer/convolution.cpp index 3d43ebf255d948..6dd2b56dce0210 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/convolution.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/convolution.cpp @@ -52,8 +52,6 @@ void ConvolutionLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); std::vector filter_weights; if (targetDevice == ov::test::utils::DEVICE_GNA) { auto filter_size = std::accumulate(std::begin(kernel), std::end(kernel), 1, std::multiplies()); @@ -61,7 +59,7 @@ void ConvolutionLayerTest::SetUp() { -0.1f, 0.1f); } auto conv = std::dynamic_pointer_cast( - ngraph::builder::makeConvolution(paramOuts[0], ngPrc, kernel, stride, padBegin, + ngraph::builder::makeConvolution(params[0], ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, false, filter_weights)); ngraph::ResultVector results{std::make_shared(conv)}; function = std::make_shared(results, params, "convolution"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp b/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp index 6719f30f00ffc7..f7f1597e0ea300 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp @@ -54,15 +54,13 @@ void ConvolutionBackpropLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto convBackpropData = std::dynamic_pointer_cast( - ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + ngraph::builder::makeConvolutionBackpropData(params[0], ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, false, outPadding)); if (!outputShape.empty()) { auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); convBackpropData = std::dynamic_pointer_cast( - ngraph::builder::makeConvolutionBackpropData(paramOuts[0], outShape, ngPrc, kernel, stride, padBegin, + ngraph::builder::makeConvolutionBackpropData(params[0], outShape, ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels)); } ngraph::ResultVector results{std::make_shared(convBackpropData)}; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp index f995093b5aa17b..ca86a0333b19b0 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp @@ -56,15 +56,13 @@ void ConvolutionBackpropDataLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto convBackpropData = std::dynamic_pointer_cast( - ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + ngraph::builder::makeConvolutionBackpropData(params[0], ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, false, outPadding)); if (!outputShape.empty()) { auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); convBackpropData = std::dynamic_pointer_cast( - ngraph::builder::makeConvolutionBackpropData(paramOuts[0], outShape, ngPrc, kernel, stride, padBegin, + ngraph::builder::makeConvolutionBackpropData(params[0], outShape, ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels)); } ngraph::ResultVector results{std::make_shared(convBackpropData)}; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp b/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp index c92af89b30287b..c51d8040205bd0 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder.cpp @@ -45,11 +45,9 @@ void CTCGreedyDecoderLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector paramsIn {std::make_shared(ngPrc, ov::Shape(inputShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(paramsIn)); auto ctcGreedyDecoder = std::dynamic_pointer_cast( - ngraph::builder::makeCTCGreedyDecoder(paramOuts[0], mergeRepeated)); + ngraph::builder::makeCTCGreedyDecoder(paramsIn[0], mergeRepeated)); ngraph::ResultVector results{ std::make_shared(ctcGreedyDecoder) }; function = std::make_shared(results, paramsIn, "CTCGreedyDecoder"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder_seq_len.cpp b/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder_seq_len.cpp index 55309e52fd5c3f..546e3aa3864213 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder_seq_len.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/ctc_greedy_decoder_seq_len.cpp @@ -56,8 +56,6 @@ void CTCGreedyDecoderSeqLenLayerTest::SetUp() { auto ngDataPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dataPrecision); auto ngIdxPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(indicesPrecision); ov::ParameterVector paramsIn {std::make_shared(ngDataPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(paramsIn)); const auto sequenceLenNode = [&] { const size_t B = inputShape[0]; @@ -83,7 +81,7 @@ void CTCGreedyDecoderSeqLenLayerTest::SetUp() { blankIndex = std::min(blankIndex, C - 1); auto ctcGreedyDecoderSeqLen = std::dynamic_pointer_cast( - ngraph::builder::makeCTCGreedyDecoderSeqLen(paramOuts[0], sequenceLenNode, + ngraph::builder::makeCTCGreedyDecoderSeqLen(paramsIn[0], sequenceLenNode, blankIndex, mergeRepeated, ngIdxPrc)); ngraph::ResultVector results; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp b/src/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp index f8c3eefd345f5e..378eb629172b33 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/ctc_loss.cpp @@ -51,10 +51,8 @@ void CTCLossLayerTest::SetUp() { auto ngIntPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(intPrecision); ov::ParameterVector params {std::make_shared(ngFpPrc, ov::Shape(logitsShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto ctcLoss = std::dynamic_pointer_cast( - ngraph::builder::makeCTCLoss(paramOuts[0], logitsLength, labels, labelsLength, blankIndex, + ngraph::builder::makeCTCLoss(params[0], logitsLength, labels, labelsLength, blankIndex, ngFpPrc, ngIntPrc, preprocessCollapseRepeated, ctcMergeRepeated, unique)); ngraph::ResultVector results{std::make_shared(ctcLoss)}; function = std::make_shared(results, params, "CTCLoss"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/deformable_convolution.cpp b/src/tests/functional/shared_test_classes/src/single_layer/deformable_convolution.cpp index 08e6439081e5a1..ae68e2f3d713e5 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/deformable_convolution.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/deformable_convolution.cpp @@ -76,8 +76,6 @@ void DeformableConvolutionLayerTest::SetUp() { for (auto&& shape : {inputShape, offsets, filter}) { params.push_back(std::make_shared(ngPrc, ov::Shape(shape))); } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto data = std::make_shared(ngPrc, ngraph::Shape(inputShape)); data->set_friendly_name("a_data"); auto offset_vals = std::make_shared(ngPrc, ngraph::Shape(offsets)); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/deformable_psroi_pooling.cpp b/src/tests/functional/shared_test_classes/src/single_layer/deformable_psroi_pooling.cpp index bba40e3789f638..3349348e5115c4 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/deformable_psroi_pooling.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/deformable_psroi_pooling.cpp @@ -97,10 +97,8 @@ namespace LayerTestsDefinitions { if (offsetsShape.empty()) { // Test without optional third input (offsets) params = ov::ParameterVector{std::make_shared(ngPrc, ov::Shape(dataShape)), std::make_shared(ngPrc, ov::Shape(roisShape))}; - inputs = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - defomablePSROIPooling = std::make_shared(inputs[0], - inputs[1], + defomablePSROIPooling = std::make_shared(params[0], + params[1], outputDim, spatialScale_, groupSize, @@ -113,11 +111,9 @@ namespace LayerTestsDefinitions { params = ov::ParameterVector{std::make_shared(ngPrc, ov::Shape(dataShape)), std::make_shared(ngPrc, ov::Shape(roisShape)), std::make_shared(ngPrc, ov::Shape(offsetsShape))}; - inputs = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - defomablePSROIPooling = std::make_shared(inputs[0], - inputs[1], - inputs[2], + defomablePSROIPooling = std::make_shared(params[0], + params[1], + params[2], outputDim, spatialScale_, groupSize, diff --git a/src/tests/functional/shared_test_classes/src/single_layer/detection_output.cpp b/src/tests/functional/shared_test_classes/src/single_layer/detection_output.cpp index c9dfdaefa737c7..b08ac1fe252286 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/detection_output.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/detection_output.cpp @@ -152,11 +152,13 @@ void DetectionOutputLayerTest::SetUp() { } ov::ParameterVector params; + ov::OutputVector paramsOuts; for (auto&& shape : inShapes) { - params.push_back(std::make_shared(ov::element::f32, ov::Shape(shape))); + auto param = std::make_shared(ov::element::f32, ov::Shape(shape)); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs); + auto detOut = ngraph::builder::makeDetectionOutput(paramsOuts, attrs); ngraph::ResultVector results{std::make_shared(detOut)}; function = std::make_shared(results, params, "DetectionOutput"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/dft.cpp b/src/tests/functional/shared_test_classes/src/single_layer/dft.cpp index 384e9f86cab98b..9e4e872befa35d 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/dft.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/dft.cpp @@ -37,8 +37,7 @@ void DFTLayerTest::SetUp() { auto paramData = std::make_shared(inType, ngraph::Shape(inputShapes)); paramVector.push_back(paramData); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); - auto dft = ngraph::builder::makeDFT(paramOuts[0], axes, signalSize, opType); + auto dft = ngraph::builder::makeDFT(paramVector[0], axes, signalSize, opType); ngraph::ResultVector results{std::make_shared(dft)}; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/einsum.cpp b/src/tests/functional/shared_test_classes/src/single_layer/einsum.cpp index 76beb95cbed2f7..3c09030ab1b837 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/einsum.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/einsum.cpp @@ -34,12 +34,14 @@ void EinsumLayerTest::SetUp() { const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(precision); ov::ParameterVector params; + ov::OutputVector paramsOuts; for (auto&& shape : inputShapes) { - params.push_back(std::make_shared(ngPrc, ov::Shape(shape))); + auto param = std::make_shared(ngPrc, ov::Shape(shape)); + params.push_back(param); + paramsOuts.push_back(param); } - const auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - const std::shared_ptr einsum = ngraph::builder::makeEinsum(paramOuts, equation); + const std::shared_ptr einsum = ngraph::builder::makeEinsum(paramsOuts, equation); const ngraph::ResultVector results{std::make_shared(einsum)}; function = std::make_shared(results, params, "einsum"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp index 3d3ce94024ce63..63e95b4c33bb94 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_detection_output.cpp @@ -87,10 +87,9 @@ void ExperimentalDetectronDetectionOutputLayerTest::SetUp() { init_input_shapes(inputShapes); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + auto experimentalDetectron = std::make_shared( params[0], // input_rois params[1], // input_deltas diff --git a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_generate_proposals_single_image.cpp b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_generate_proposals_single_image.cpp index e2a46810a849b5..6cbfc16588cea3 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_generate_proposals_single_image.cpp @@ -76,10 +76,9 @@ void ExperimentalDetectronGenerateProposalsSingleImageLayerTest::SetUp() { init_input_shapes(inputShapes); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + auto experimentalDetectron = std::make_shared( params[0], // im_info params[1], // anchors diff --git a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_prior_grid_generator.cpp b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_prior_grid_generator.cpp index 5d2d952d4f5c59..7a86475fc2782b 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_prior_grid_generator.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_prior_grid_generator.cpp @@ -59,10 +59,9 @@ void ExperimentalDetectronPriorGridGeneratorLayerTest::SetUp() { init_input_shapes(param.inputShapes); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + auto experimentalDetectron = std::make_shared( params[0], // priors params[1], // feature_map diff --git a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_roifeatureextractor.cpp b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_roifeatureextractor.cpp index e789f18f90d792..80847e7dd98879 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_roifeatureextractor.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_roifeatureextractor.cpp @@ -67,10 +67,12 @@ void ExperimentalDetectronROIFeatureExtractorLayerTest::SetUp() { attrs.pyramid_scales = pyramidScales; ov::ParameterVector params; + ov::OutputVector paramsOuts; for (auto&& shape : inputDynamicShapes) { - params.push_back(std::make_shared(netPrecision, shape)); + auto param = std::make_shared(netPrecision, shape); + params.push_back(param); + paramsOuts.push_back(param); } - auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto experimentalDetectronROIFeatureExtractor = std::make_shared(paramsOuts, attrs); function = std::make_shared(ov::OutputVector{experimentalDetectronROIFeatureExtractor->output(0), experimentalDetectronROIFeatureExtractor->output(1)}, diff --git a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_topkrois.cpp b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_topkrois.cpp index a659240e8d97da..c067bc4e3b1722 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_topkrois.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/experimental_detectron_topkrois.cpp @@ -51,11 +51,10 @@ void ExperimentalDetectronTopKROIsLayerTest::SetUp() { init_input_shapes(inputShapes); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto experimentalDetectronTopKROIs = std::make_shared(paramOuts[0], paramOuts[1], maxRois); + + auto experimentalDetectronTopKROIs = std::make_shared(params[0], params[1], maxRois); function = std::make_shared(ov::OutputVector {experimentalDetectronTopKROIs->output(0)}, "ExperimentalDetectronTopKROIs"); } } // namespace subgraph diff --git a/src/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp b/src/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp index bcbf3caba4fd14..04dfe2540390ae 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp @@ -69,7 +69,6 @@ void FakeQuantizeLayerTest::SetUp() { } auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); UpdateSeed(); @@ -81,10 +80,10 @@ void FakeQuantizeLayerTest::SetUp() { } std::cout << "\033[0;32m" << "[ ] " << "\033[0;0m" << "ngraphSeed = " << ngraphSeed << std::endl; - fakeQNode = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape, ngraphSeed); + fakeQNode = ngraph::builder::makeFakeQuantize(params[0], ngPrc, levels, constShape, ngraphSeed); } else { fakeQNode = ngraph::builder::makeFakeQuantize( - paramOuts[0], + params[0], ngPrc, levels, constShape, diff --git a/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp b/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp index 97538823dc52ae..bd7f75e20b48d8 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/gather.cpp @@ -16,10 +16,9 @@ void GatherLayerTestBase::SetUp(const gatherParamsTuple& params) { ASSERT_EQ(ngraph::shape_size(indicesShape), indices.size()) << "Indices vector size and provided indices shape doesn't fit each other"; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector functionParams {std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); auto indicesNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape(indicesShape), indices); auto axisNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}); - auto gather = std::make_shared(paramOuts[0], indicesNode, axisNode); + auto gather = std::make_shared(functionParams[0], indicesNode, axisNode); ngraph::ResultVector results{std::make_shared(gather)}; function = std::make_shared(results, functionParams, "gather"); } @@ -84,11 +83,10 @@ void Gather7LayerTest::SetUp() { int batchIdx = std::get<1>(axis_batchIdx); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector functionParams {std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); auto indicesNode = ngraph::builder::makeConstant(ngraph::element::i64, indicesShape, {}, true, inputShape[axis < 0 ? axis + inputShape.size() : axis] - 1, 0); auto axisNode = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis }); - auto gather = std::make_shared(paramOuts[0], indicesNode, axisNode, batchIdx); + auto gather = std::make_shared(functionParams[0], indicesNode, axisNode, batchIdx); ngraph::ResultVector results{ std::make_shared(gather) }; function = std::make_shared(results, functionParams, "gather"); } @@ -126,12 +124,11 @@ void Gather8LayerTest::SetUp() { int batchIdx = std::get<1>(axis_batchIdx); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector functionParams {std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); auto indicesNode = ngraph::builder::makeConstant(ngraph::element::i64, indicesShape, {}, true, inputShape[axis < 0 ? axis + inputShape.size() : axis] - 1, -static_cast(inputShape[axis < 0 ? axis + inputShape.size() : axis])); auto axisNode = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis }); - auto gather = std::make_shared(paramOuts[0], indicesNode, axisNode, batchIdx); + auto gather = std::make_shared(functionParams[0], indicesNode, axisNode, batchIdx); ngraph::ResultVector results{ std::make_shared(gather) }; function = std::make_shared(results, functionParams, "gather"); } @@ -169,11 +166,10 @@ void Gather8IndiceScalarLayerTest::SetUp() { int batchIdx = std::get<1>(axis_batchIdx); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector functionParams {std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); auto indicesNode = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {inputShape[axis] - 1})->output(0); auto axisNode = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis }); - auto gather = std::make_shared(paramOuts[0], indicesNode, axisNode, batchIdx); + auto gather = std::make_shared(functionParams[0], indicesNode, axisNode, batchIdx); ngraph::ResultVector results{ std::make_shared(gather) }; function = std::make_shared(results, functionParams, "gather"); } @@ -222,10 +218,9 @@ void Gather8withIndicesDataLayerTest::SetUp() { int batchIdx = std::get<1>(axis_batchIdx); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector functionParams {std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(functionParams)); auto indicesNode = ngraph::builder::makeConstant(ngraph::element::i64, indicesShape, indicesData); auto axisNode = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis }); - auto gather = std::make_shared(paramOuts[0], indicesNode, axisNode, batchIdx); + auto gather = std::make_shared(functionParams[0], indicesNode, axisNode, batchIdx); ngraph::ResultVector results{ std::make_shared(gather) }; function = std::make_shared(results, functionParams, "gather"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp b/src/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp index 3a1d19ddbe11e2..e89eb4a3eced33 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp @@ -41,10 +41,8 @@ void GatherElementsLayerTest::SetUp() { auto ngIPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iPrecision); ov::ParameterVector params {std::make_shared(ngDPrc, ov::Shape(dataShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto gather = std::dynamic_pointer_cast( - ngraph::builder::makeGatherElements(paramOuts[0], indicesShape, ngIPrc, axis)); + ngraph::builder::makeGatherElements(params[0], indicesShape, ngIPrc, axis)); ngraph::ResultVector results{std::make_shared(gather)}; function = std::make_shared(results, params, "gatherEl"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/gather_nd.cpp b/src/tests/functional/shared_test_classes/src/single_layer/gather_nd.cpp index 07c3c8d2266445..a0d9d40705ab3d 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/gather_nd.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/gather_nd.cpp @@ -45,9 +45,7 @@ void GatherNDLayerTest::SetUp() { auto ngIPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iPrecision); ov::ParameterVector params {std::make_shared(ngDPrc, ov::Shape(dataShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto dataNode = paramOuts[0]; + auto dataNode = params[0]; auto gather = std::dynamic_pointer_cast( ngraph::builder::makeGatherND(dataNode, indicesShape, ngIPrc, batchDims)); ngraph::ResultVector results{std::make_shared(gather)}; @@ -71,9 +69,7 @@ void GatherND8LayerTest::SetUp() { auto ngIPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iPrecision); ov::ParameterVector params {std::make_shared(ngDPrc, ov::Shape(dataShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto dataNode = paramOuts[0]; + auto dataNode = params[0]; auto gather = std::dynamic_pointer_cast( ngraph::builder::makeGatherND8(dataNode, indicesShape, ngIPrc, batchDims)); ngraph::ResultVector results{ std::make_shared(gather) }; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp b/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp index d4339568d56c47..0f491ce972a7c8 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp @@ -93,10 +93,9 @@ void GenerateProposalsLayerTest::SetUp() { init_input_shapes(inputShapes); ov::ParameterVector params; - for (auto&& shape : inputDynamicShapes) { + for (auto&& shape : inputDynamicShapes) params.push_back(std::make_shared(netPrecision, shape)); - } - auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + auto generateProposals = std::make_shared( params[0], // im_info params[1], // anchors diff --git a/src/tests/functional/shared_test_classes/src/single_layer/grn.cpp b/src/tests/functional/shared_test_classes/src/single_layer/grn.cpp index c487eb91b84ffc..54fe36b363170b 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/grn.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/grn.cpp @@ -37,9 +37,7 @@ void GrnLayerTest::SetUp() { std::tie(netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, bias, targetDevice) = GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector paramsIn {std::make_shared(ngPrc, ov::Shape(inputShapes))}; - auto paramsOut = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(paramsIn)); - auto grn = std::make_shared(paramsOut[0], bias); + auto grn = std::make_shared(paramsIn[0], bias); ngraph::ResultVector results{ std::make_shared(grn) }; function = std::make_shared(results, paramsIn, "Grn"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/group_convolution.cpp b/src/tests/functional/shared_test_classes/src/single_layer/group_convolution.cpp index fab24869746a00..14408094c21dff 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/group_convolution.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/group_convolution.cpp @@ -51,10 +51,8 @@ void GroupConvolutionLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto groupConv = std::dynamic_pointer_cast( - ngraph::builder::makeGroupConvolution(paramOuts[0], ngPrc, kernel, stride, padBegin, + ngraph::builder::makeGroupConvolution(params[0], ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, numGroups)); ngraph::ResultVector results{std::make_shared(groupConv)}; function = std::make_shared(results, params, "groupConvolution"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/group_convolution_backprop_data.cpp b/src/tests/functional/shared_test_classes/src/single_layer/group_convolution_backprop_data.cpp index c23c392907e906..de4f784b4c6ec4 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/group_convolution_backprop_data.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/group_convolution_backprop_data.cpp @@ -53,10 +53,8 @@ void GroupConvBackpropDataLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto groupConvBackpropData = std::dynamic_pointer_cast( - ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + ngraph::builder::makeGroupConvolutionBackpropData(params[0], ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, numGroups)); ngraph::ResultVector results{std::make_shared(groupConvBackpropData)}; function = std::make_shared(results, params, "GroupConvolutionBackpropData"); @@ -109,17 +107,15 @@ void GroupConvBackpropLayerTest::SetUp() { std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, outPadding) = groupConvBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); std::shared_ptr groupConvBackpropData; if (!outputShape.empty()) { auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); groupConvBackpropData = std::dynamic_pointer_cast( - ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], outShape, ngPrc, kernel, stride, padBegin, + ngraph::builder::makeGroupConvolutionBackpropData(params[0], outShape, ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, numGroups, false, outPadding)); } else { groupConvBackpropData = std::dynamic_pointer_cast( - ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + ngraph::builder::makeGroupConvolutionBackpropData(params[0], ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels, numGroups, false, outPadding)); } ngraph::ResultVector results{std::make_shared(groupConvBackpropData)}; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/is_inf.cpp b/src/tests/functional/shared_test_classes/src/single_layer/is_inf.cpp index 2372892b6c8a3c..2e1e6e35e6ca20 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/is_inf.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/is_inf.cpp @@ -62,10 +62,9 @@ void IsInfLayerTest::SetUp() { parameters.push_back(std::make_shared(dataPrc, shape)); } parameters[0]->set_friendly_name("Data"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(parameters)); ov::op::v10::IsInf::Attributes attributes {detectNegative, detectPositive}; - auto isInf = std::make_shared(paramOuts[0], attributes); + auto isInf = std::make_shared(parameters[0], attributes); ov::ResultVector results; for (int i = 0; i < isInf->get_output_size(); i++) { results.push_back(std::make_shared(isInf->output(i))); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/log_softmax.cpp b/src/tests/functional/shared_test_classes/src/single_layer/log_softmax.cpp index 391dab70f985cc..843c8945aab6ac 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/log_softmax.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/log_softmax.cpp @@ -41,10 +41,7 @@ void LogSoftmaxLayerTest::SetUp() { const ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - const auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - - const auto logSoftmax = std::make_shared(paramOuts.at(0), axis); + const auto logSoftmax = std::make_shared(params.at(0), axis); const ngraph::ResultVector results {std::make_shared(logSoftmax)}; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/lrn.cpp b/src/tests/functional/shared_test_classes/src/single_layer/lrn.cpp index cb1ac313ad3950..b594de81572777 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/lrn.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/lrn.cpp @@ -42,11 +42,9 @@ void LrnLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShapes))}; - auto paramIn = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto axes_node = std::make_shared(ngraph::element::i64, ngraph::Shape{axes.size()}, axes.data()); - auto lrn = std::make_shared(paramIn[0], axes_node, alpha, beta, bias, size); + auto lrn = std::make_shared(params[0], axes_node, alpha, beta, bias, size); ngraph::ResultVector results {std::make_shared(lrn)}; function = std::make_shared(results, params, "lrn"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/lstm_cell_basic.cpp b/src/tests/functional/shared_test_classes/src/single_layer/lstm_cell_basic.cpp index 2f835e374709b3..795e3b8ef9228c 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/lstm_cell_basic.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/lstm_cell_basic.cpp @@ -64,10 +64,12 @@ void LSTMCellBasicTest::SetUp() { ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShapes[0])), std::make_shared(ngPrc, ov::Shape(inputShapes[1])), std::make_shared(ngPrc, ov::Shape(inputShapes[2]))}; + ov::OutputVector paramsOuts; + for (auto&& param : params) + paramsOuts.push_back(param); std::vector WRB = {inputShapes[3], inputShapes[4], inputShapes[5]}; - auto lstm_cell = ngraph::builder::makeLSTM(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), - WRB, hidden_size, activations, {}, {}, clip); + auto lstm_cell = ngraph::builder::makeLSTM(paramsOuts, WRB, hidden_size, activations, {}, {}, clip); ngraph::ResultVector results{std::make_shared(lstm_cell->output(0)), std::make_shared(lstm_cell->output(1))}; function = std::make_shared(results, params, "lstm_cell"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp b/src/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp index 59a61c87cd071d..a4a0b0c97ef0a2 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp @@ -67,10 +67,8 @@ void MatMulTest::SetUp() { if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) { params.push_back(std::dynamic_pointer_cast(secondaryInput)); } - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto MatMul = std::dynamic_pointer_cast( - ngraph::builder::makeMatMul(paramOuts[0], secondaryInput, shapeRelatedParams.input1.second, shapeRelatedParams.input2.second)); + ngraph::builder::makeMatMul(params[0], secondaryInput, shapeRelatedParams.input1.second, shapeRelatedParams.input2.second)); ngraph::ResultVector results{std::make_shared(MatMul)}; function = std::make_shared(results, params, "MatMul"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp b/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp index e5c70c114211e8..c4677606a469e5 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp @@ -319,9 +319,7 @@ void MatrixNmsLayerTest::SetUp() { for (auto&& shape : inputDynamicShapes) { params.push_back(std::make_shared(paramsPrec, shape)); } - const auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto nms = std::make_shared(paramOuts[0], paramOuts[1], m_attrs); + auto nms = std::make_shared(params[0], params[1], m_attrs); function = std::make_shared(nms, params, "MatrixNMS"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp b/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp index 7e37b235da1a76..d40ceabf01c57d 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp @@ -378,8 +378,6 @@ void MulticlassNmsLayerTest::SetUp() { params.push_back(std::make_shared(paramsPrec, shape)); } } - const auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); m_attrs.iou_threshold = iouThr; m_attrs.score_threshold = scoreThr; @@ -392,7 +390,12 @@ void MulticlassNmsLayerTest::SetUp() { m_attrs.background_class = backgroundClass; m_attrs.normalized = normalized; - const auto nms = CreateNmsOp(paramOuts); + std::shared_ptr nms; + if (params.size() > 2) { + nms = std::make_shared(params[0], params[1], params[2], m_attrs); + } else { + nms = std::make_shared(params[0], params[1], m_attrs); + } function = std::make_shared(nms, params, "MulticlassNMS"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/mvn.cpp b/src/tests/functional/shared_test_classes/src/single_layer/mvn.cpp index cd87ad4116d00f..1a6f5c9707d7ef 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/mvn.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/mvn.cpp @@ -38,10 +38,9 @@ void Mvn1LayerTest::SetUp() { std::tie(inputShapes, inputPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = this->GetParam(); auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); ov::ParameterVector param {std::make_shared(inType, ov::Shape(inputShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); - auto mvn = std::dynamic_pointer_cast(ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps)); + auto mvn = std::dynamic_pointer_cast(ngraph::builder::makeMVN(param[0], acrossChanels, normalizeVariance, eps)); if (!axes.empty()) { - mvn = std::dynamic_pointer_cast(ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps)); + mvn = std::dynamic_pointer_cast(ngraph::builder::makeMVN(param[0], axes, normalizeVariance, eps)); } ngraph::ResultVector results{std::make_shared(mvn)}; function = std::make_shared(results, param, "MVN1"); @@ -82,9 +81,8 @@ void Mvn6LayerTest::SetUp() { auto axesType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(axesPrecision); ov::ParameterVector param {std::make_shared(dataType, ov::Shape(inputShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); auto axesNode = ngraph::builder::makeConstant(axesType, ngraph::Shape{axes.size()}, axes); - auto mvn = ngraph::builder::makeMVN6(paramOuts[0], axesNode, normalizeVariance, eps, epsMode); + auto mvn = ngraph::builder::makeMVN6(param[0], axesNode, normalizeVariance, eps, epsMode); ngraph::ResultVector results{std::make_shared(mvn)}; function = std::make_shared(results, param, "MVN6"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/one_hot.cpp b/src/tests/functional/shared_test_classes/src/single_layer/one_hot.cpp index 7b011bc74fcf40..e33e0ea7d84d27 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/one_hot.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/one_hot.cpp @@ -42,10 +42,8 @@ void OneHotLayerTest::SetUp() { this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto onehot = ngraph::builder::makeOneHot(paramOuts[0], depth_type, depth_val, set_type, on_val, off_val, axis); + auto onehot = ngraph::builder::makeOneHot(params[0], depth_type, depth_val, set_type, on_val, off_val, axis); ngraph::ResultVector results{std::make_shared(onehot)}; function = std::make_shared(results, params, "OneHot"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp b/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp index 2c92716bed8eba..09f98d4f157b79 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp @@ -44,9 +44,7 @@ void PadLayerTest::SetUp() { this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto pad = CreatePadOp(paramOuts[0], padsBegin, padsEnd, argPadValue, padMode); + auto pad = CreatePadOp(params[0], padsBegin, padsEnd, argPadValue, padMode); ngraph::ResultVector results{std::make_shared(pad)}; function = std::make_shared(results, params, "pad"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/pooling.cpp b/src/tests/functional/shared_test_classes/src/single_layer/pooling.cpp index 833db88094f6b0..e4cbedff108df1 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/pooling.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/pooling.cpp @@ -145,10 +145,8 @@ void PoolingLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - std::shared_ptr pooling = ngraph::builder::makePooling(paramOuts[0], + std::shared_ptr pooling = ngraph::builder::makePooling(params[0], stride, padBegin, padEnd, @@ -179,10 +177,8 @@ void GlobalPoolingLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - std::shared_ptr pooling = ngraph::builder::makePooling(paramOuts[0], + std::shared_ptr pooling = ngraph::builder::makePooling(params[0], stride, padBegin, padEnd, @@ -211,10 +207,8 @@ void MaxPoolingV8LayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - std::shared_ptr maxPool = ngraph::builder::makeMaxPoolingV8(paramOuts[0], stride, dilation, padBegin, padEnd, + std::shared_ptr maxPool = ngraph::builder::makeMaxPoolingV8(params[0], stride, dilation, padBegin, padEnd, kernel, roundingType, padType, indexElementType, axis); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/proposal.cpp b/src/tests/functional/shared_test_classes/src/single_layer/proposal.cpp index 1fd68c0f4de1ae..cbf03c898b492f 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/proposal.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/proposal.cpp @@ -151,10 +151,9 @@ void ProposalLayerTest::SetUp() { std::make_shared(ngPrc, ov::Shape(boxesShape))}; params[0]->set_friendly_name("a_scores"); params[1]->set_friendly_name("b_boxes"); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); auto proposal = std::dynamic_pointer_cast( - ngraph::builder::makeProposal(paramOuts[0], paramOuts[1], img_info, ngPrc, + ngraph::builder::makeProposal(params[0], params[1], img_info, ngPrc, base_size, pre_nms_topn, post_nms_topn, diff --git a/src/tests/functional/shared_test_classes/src/single_layer/psroi_pooling.cpp b/src/tests/functional/shared_test_classes/src/single_layer/psroi_pooling.cpp index 06c0cfea5f3c4a..9f1cf2313cf60a 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/psroi_pooling.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/psroi_pooling.cpp @@ -109,10 +109,8 @@ void PSROIPoolingLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShape)), std::make_shared(ngPrc, ov::Shape(coordsShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - std::shared_ptr psroiPooling = std::make_shared(paramOuts[0], - paramOuts[1], + std::shared_ptr psroiPooling = std::make_shared(params[0], + params[1], outputDim, groupSize_, spatialScale_, diff --git a/src/tests/functional/shared_test_classes/src/single_layer/rdft.cpp b/src/tests/functional/shared_test_classes/src/single_layer/rdft.cpp index c9eda392c5ec86..a953a84086dccd 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/rdft.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/rdft.cpp @@ -37,9 +37,7 @@ void RDFTLayerTest::SetUp() { auto paramData = std::make_shared(inType, ngraph::Shape(inputShapes)); paramVector.push_back(paramData); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); - auto rdft = ngraph::builder::makeRDFT(paramOuts[0], axes, signalSize, opType); - + auto rdft = ngraph::builder::makeRDFT(paramVector[0], axes, signalSize, opType); ngraph::ResultVector results{std::make_shared(rdft)}; function = std::make_shared(results, paramVector, "RDFT"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/reduce_ops.cpp b/src/tests/functional/shared_test_classes/src/single_layer/reduce_ops.cpp index 6a3afb561447b2..f9c40c7b60fe2a 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/reduce_ops.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/reduce_ops.cpp @@ -42,8 +42,6 @@ void ReduceOpsLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); std::vector shapeAxes; switch (opType) { @@ -62,7 +60,7 @@ void ReduceOpsLayerTest::SetUp() { auto reductionAxesNode = std::dynamic_pointer_cast( std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes)); - const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType); + const auto reduce = ngraph::builder::makeReduce(params[0], reductionAxesNode, keepDims, reductionType); const ngraph::ResultVector results{std::make_shared(reduce)}; function = std::make_shared(results, params, "Reduce"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/reshape.cpp b/src/tests/functional/shared_test_classes/src/single_layer/reshape.cpp index 00a6e0f6e797b1..31578cd9379062 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/reshape.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/reshape.cpp @@ -37,12 +37,10 @@ void ReshapeLayerTest::SetUp() { this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector paramsIn {std::make_shared(ngPrc, ov::Shape(inputShapes))}; - auto paramIn = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(paramsIn)); auto constNode = std::make_shared( ngraph::element::Type_t::i64, ngraph::Shape{outFormShapes.size()}, outFormShapes); auto reshape = std::dynamic_pointer_cast( - std::make_shared(paramIn[0], constNode, specialZero)); + std::make_shared(paramsIn[0], constNode, specialZero)); ngraph::ResultVector results{std::make_shared(reshape)}; function = std::make_shared(results, paramsIn, "Reshape"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp b/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp index b3a3d51cc7d569..97d39c93f0b548 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/roi_align.cpp @@ -92,8 +92,6 @@ void ROIAlignLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector proposalVector; std::vector roiIdxVector; proposalVector.resize(coordsShape[0] * 4); @@ -106,7 +104,7 @@ void ROIAlignLayerTest::SetUp() { auto coords = std::make_shared(ngPrc, coordsShape, proposalVector.data()); auto roisIdx = std::make_shared(ngraph::element::i32, idxShape, roiIdxVector.data()); - std::shared_ptr roiAlign = std::make_shared(paramOuts[0], + std::shared_ptr roiAlign = std::make_shared(params[0], coords, roisIdx, pooledH, @@ -173,8 +171,6 @@ void ROIAlignV9LayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = - ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); std::vector proposalVector; std::vector roiIdxVector; proposalVector.resize(coordsShape[0] * 4); @@ -194,7 +190,7 @@ void ROIAlignV9LayerTest::SetUp() { auto roisIdx = std::make_shared(ngraph::element::i32, idxShape, roiIdxVector.data()); std::shared_ptr roiAlign = std::make_shared( - paramOuts[0], + params[0], coords, roisIdx, pooledH, diff --git a/src/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp b/src/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp index c48711552eeb96..43e8e8a3e6a004 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp @@ -74,10 +74,8 @@ namespace LayerTestsDefinitions { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(inputShape)), std::make_shared(ngPrc, ov::Shape(coordsShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - std::shared_ptr roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], - paramOuts[1], + std::shared_ptr roi_pooling = ngraph::builder::makeROIPooling(params[0], + params[1], poolShape, spatial_scale, pool_method); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/scatter_ND_update.cpp b/src/tests/functional/shared_test_classes/src/single_layer/scatter_ND_update.cpp index c04acb446bf1f5..748c47ee592e34 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/scatter_ND_update.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/scatter_ND_update.cpp @@ -66,8 +66,7 @@ void ScatterNDUpdateLayerTest::SetUp() { paramVector.push_back(inputParams); auto updateParams = std::make_shared(inPrc, ngraph::Shape(updateShape)); paramVector.push_back(updateParams); - auto paramVectorOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); - auto s2d = ngraph::builder::makeScatterNDUpdate(paramVectorOuts[0], idxPrc, indicesShape, indicesValue, paramVectorOuts[1]); + auto s2d = ngraph::builder::makeScatterNDUpdate(paramVector[0], idxPrc, indicesShape, indicesValue, paramVector[1]); ngraph::ResultVector results{std::make_shared(s2d)}; function = std::make_shared(results, paramVector, "ScatterNDUpdate"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/scatter_elements_update.cpp b/src/tests/functional/shared_test_classes/src/single_layer/scatter_elements_update.cpp index c2541ca4cd4aba..cf4503616d98d4 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/scatter_elements_update.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/scatter_elements_update.cpp @@ -57,8 +57,7 @@ void ScatterElementsUpdateLayerTest::SetUp() { paramVector.push_back(inputParams); auto updateParams = std::make_shared(inPrc, ngraph::Shape(indicesShape)); paramVector.push_back(updateParams); - auto paramVectorOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); - auto s2d = ngraph::builder::makeScatterElementsUpdate(paramVectorOuts[0], idxPrc, indicesShape, indicesValue, paramVectorOuts[1], axis); + auto s2d = ngraph::builder::makeScatterElementsUpdate(paramVector[0], idxPrc, indicesShape, indicesValue, paramVector[1], axis); ngraph::ResultVector results{std::make_shared(s2d)}; function = std::make_shared(results, paramVector, "ScatterElementsUpdate"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/scatter_update.cpp b/src/tests/functional/shared_test_classes/src/single_layer/scatter_update.cpp index 81754190f3be81..397885c77e1cc0 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/scatter_update.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/scatter_update.cpp @@ -76,8 +76,7 @@ void ScatterUpdateLayerTest::SetUp() { paramVector.push_back(inputParams); auto updateParams = std::make_shared(inPrc, ngraph::Shape(updateShape)); paramVector.push_back(updateParams); - auto paramVectorOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); - auto s2d = ngraph::builder::makeScatterUpdate(paramVectorOuts[0], idxPrc, indicesShape, indicesValue, paramVectorOuts[1], axis); + auto s2d = ngraph::builder::makeScatterUpdate(paramVector[0], idxPrc, indicesShape, indicesValue, paramVector[1], axis); ngraph::ResultVector results{std::make_shared(s2d)}; function = std::make_shared(results, paramVector, "ScatterUpdate"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/shape_of.cpp b/src/tests/functional/shared_test_classes/src/single_layer/shape_of.cpp index e3030504c115e9..8d92da3a114f0d 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/shape_of.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/shape_of.cpp @@ -27,8 +27,7 @@ namespace LayerTestsDefinitions { auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); auto outType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc); ov::ParameterVector param {std::make_shared(inType, ov::Shape(inputShapes))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(param)); - auto shapeOf = std::make_shared(paramOuts[0], outType); + auto shapeOf = std::make_shared(param[0], outType); ngraph::ResultVector results{std::make_shared(shapeOf)}; function = std::make_shared(results, param, "shapeOf"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/shuffle_channels.cpp b/src/tests/functional/shared_test_classes/src/single_layer/shuffle_channels.cpp index b3f5093ad25560..73c7a85379a3c5 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/shuffle_channels.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/shuffle_channels.cpp @@ -40,10 +40,8 @@ void ShuffleChannelsLayerTest::SetUp() { std::tie(axis, group) = shuffleChannelsParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto shuffleChannels = std::dynamic_pointer_cast( - ngraph::builder::makeShuffleChannels(paramOuts[0], axis, group)); + ngraph::builder::makeShuffleChannels(params[0], axis, group)); ngraph::ResultVector results{std::make_shared(shuffleChannels)}; function = std::make_shared(results, params, "shuffleChannels"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/space_to_batch.cpp b/src/tests/functional/shared_test_classes/src/single_layer/space_to_batch.cpp index cc49d3d8615924..2aa57296a4f583 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/space_to_batch.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/space_to_batch.cpp @@ -37,9 +37,7 @@ void SpaceToBatchLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto s2b = ngraph::builder::makeSpaceToBatch(paramOuts[0], ngPrc, blockShape, padsBegin, padsEnd); + auto s2b = ngraph::builder::makeSpaceToBatch(params[0], ngPrc, blockShape, padsBegin, padsEnd); ngraph::ResultVector results{std::make_shared(s2b)}; function = std::make_shared(results, params, "SpaceToBatch"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/space_to_depth.cpp b/src/tests/functional/shared_test_classes/src/single_layer/space_to_depth.cpp index 34b4e89fd1ad68..7537c09ce05215 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/space_to_depth.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/space_to_depth.cpp @@ -46,8 +46,7 @@ void SpaceToDepthLayerTest::SetUp() { std::tie(inShape, inputPrecision, mode, blockSize, targetDevice) = this->GetParam(); auto inPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); ov::ParameterVector params {std::make_shared(inPrc, ov::Shape(inShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto s2d = ngraph::builder::makeSpaceToDepth(paramOuts[0], mode, blockSize); + auto s2d = ngraph::builder::makeSpaceToDepth(params[0], mode, blockSize); ngraph::ResultVector results{std::make_shared(s2d)}; function = std::make_shared(results, params, "SpaceToDepth"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/split.cpp b/src/tests/functional/shared_test_classes/src/single_layer/split.cpp index 89bef7fd197c92..aad4a993587c5f 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/split.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/split.cpp @@ -44,9 +44,7 @@ void SplitLayerTest::SetUp() { } auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto split = std::dynamic_pointer_cast(ngraph::builder::makeSplit(paramOuts[0], + auto split = std::dynamic_pointer_cast(ngraph::builder::makeSplit(params[0], ngPrc, numSplits, axis)); ngraph::ResultVector results; for (int i = 0; i < outIndices.size(); i++) { diff --git a/src/tests/functional/shared_test_classes/src/single_layer/strided_slice.cpp b/src/tests/functional/shared_test_classes/src/single_layer/strided_slice.cpp index 4633baab271c6c..1332fce405f562 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/strided_slice.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/strided_slice.cpp @@ -44,9 +44,7 @@ void StridedSliceLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params {std::make_shared(ngPrc, ov::Shape(ssParams.inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto ss = ngraph::builder::makeStridedSlice(paramOuts[0], ssParams.begin, ssParams.end, ssParams.strides, ngPrc, ssParams.beginMask, + auto ss = ngraph::builder::makeStridedSlice(params[0], ssParams.begin, ssParams.end, ssParams.strides, ngPrc, ssParams.beginMask, ssParams.endMask, ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask); ngraph::ResultVector results{std::make_shared(ss)}; function = std::make_shared(results, params, "StridedSlice"); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/tile.cpp b/src/tests/functional/shared_test_classes/src/single_layer/tile.cpp index e04da580b8f2af..e571efb475f23d 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/tile.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/tile.cpp @@ -34,9 +34,7 @@ void TileLayerTest::SetUp() { std::tie(tileParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - auto tile = ngraph::builder::makeTile(paramOuts[0], tileParams); + auto tile = ngraph::builder::makeTile(params[0], tileParams); ngraph::ResultVector results{std::make_shared(tile)}; function = std::make_shared(results, params, "tile"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp b/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp index 575cc092bf66b7..b4897117cbfb37 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/topk.cpp @@ -39,12 +39,10 @@ void TopKLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramIn = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto k = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{}, &keepK); auto topk = std::dynamic_pointer_cast( - std::make_shared(paramIn[0], k, axis, mode, sort)); + std::make_shared(params[0], k, axis, mode, sort)); ngraph::ResultVector results; for (size_t i = 0; i < topk->get_output_size(); i++) { diff --git a/src/tests/functional/shared_test_classes/src/single_layer/transpose.cpp b/src/tests/functional/shared_test_classes/src/single_layer/transpose.cpp index eb507056b0bcfc..2fc4131e834f76 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/transpose.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/transpose.cpp @@ -32,14 +32,12 @@ void TransposeLayerTest::SetUp() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); const auto inOrderShape = inputOrder.empty() ? ngraph::Shape({0}) : ngraph::Shape({inputShape.size()}); const auto inputOrderOp = std::make_shared(ngraph::element::i64, inOrderShape, inputOrder); - const auto transpose = std::make_shared(paramOuts.at(0), inputOrderOp); + const auto transpose = std::make_shared(params.at(0), inputOrderOp); const ngraph::ResultVector results{std::make_shared(transpose)}; function = std::make_shared(results, params, "Transpose"); } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/variadic_split.cpp b/src/tests/functional/shared_test_classes/src/single_layer/variadic_split.cpp index 507d72cbe0e4dc..c424b57bc99a73 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/variadic_split.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/variadic_split.cpp @@ -36,8 +36,6 @@ namespace LayerTestsDefinitions { std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); auto VariadicSplit = std::dynamic_pointer_cast(ngraph::builder::makeVariadicSplit(params[0], numSplits, axis)); ngraph::ResultVector results; From 5fa53aa715f967ecd886e9778a776cfb179ba1e2 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 13 Nov 2023 11:01:35 +0100 Subject: [PATCH 271/275] Input and output order Keras tests. (#20902) * Input/output order Keras tests. * Added precommit mark. * Added xfail. * Small correction. * Check input/outputs by names in FW. * Moved output order tests to Python API group. * Corrected comments. --- .../test_tf_output_order.py | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tests/layer_tests/ovc_python_api_tests/test_tf_output_order.py diff --git a/tests/layer_tests/ovc_python_api_tests/test_tf_output_order.py b/tests/layer_tests/ovc_python_api_tests/test_tf_output_order.py new file mode 100644 index 00000000000000..34f323ca38b30b --- /dev/null +++ b/tests/layer_tests/ovc_python_api_tests/test_tf_output_order.py @@ -0,0 +1,99 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import tempfile +from pathlib import Path + +import numpy as np +import pytest +import tensorflow as tf + +from common import constants + + +def create_net_list(input_names, input_shapes): + tf.keras.backend.clear_session() + + # Create TensorFlow 2 model with multiple outputs. + # Outputs are list. + + inputs = [] + outputs = [] + for ind in range(len(input_names)): + input = tf.keras.Input(shape=input_shapes[ind][1:], name=input_names[ind]) + inputs.append(input) + outputs.append(tf.keras.layers.Activation(tf.nn.sigmoid)(input)) + + return tf.keras.Model(inputs=inputs, outputs=outputs) + + +def create_net_dict(input_names, input_shapes): + tf.keras.backend.clear_session() + + # Create TensorFlow 2 model with multiple outputs. + # Outputs are dictionary. + + inputs = [] + outputs = {} + for ind in range(len(input_names)): + input = tf.keras.Input(shape=input_shapes[ind][1:], name=input_names[ind]) + inputs.append(input) + outputs["name" + str(ind)] = tf.keras.layers.Activation(tf.nn.sigmoid)(input) + + return tf.keras.Model(inputs=inputs, outputs=outputs) + + +def check_outputs_by_order(fw_output, ov_output, eps): + # Compare outputs by indices + for idx, output in enumerate(fw_output): + fw_out = output.numpy() + ov_out = ov_output[idx] + assert fw_out.shape == ov_out.shape, "Output with index {} has shape different from original FW.".format(idx) + diff = np.max(np.abs(fw_out - ov_out)) + assert diff < eps, "Output with index {} has inference result different from original FW.".format(idx) + + +def check_outputs_by_names(fw_output, ov_output, eps): + # Compare outputs by names + for name, output in fw_output.items(): + fw_out = output.numpy() + ov_out = ov_output[name] + assert fw_out.shape == ov_out.shape, "Output with name {} has shape different from original FW.".format(name) + diff = np.max(np.abs(fw_out - ov_out)) + assert diff < eps, "Output with name {} has inference result different from original FW.".format(name) + + +class TestTFInputOutputOrder(): + def setup_method(self): + Path(constants.out_path).mkdir(parents=True, exist_ok=True) + self.tmp_dir = tempfile.TemporaryDirectory(dir=constants.out_path).name + + @pytest.mark.parametrize("save_to_file, create_model_method, compare_model_method", [ + (False, create_net_list, check_outputs_by_order), + (False, create_net_dict, check_outputs_by_names), + pytest.param(True, create_net_list, check_outputs_by_order, marks=pytest.mark.xfail(reason='124436')), + pytest.param(True, create_net_dict, check_outputs_by_names, marks=pytest.mark.xfail(reason='124436')), + ]) + def test_order(self, ie_device, precision, save_to_file, create_model_method, compare_model_method): + from openvino import convert_model, compile_model + input_names = ["k", "b", "m", "c", "x"] + input_shapes = [[1, 1], [1, 3], [1, 2], [1, 5], [1, 4]] + epsilon = 0.001 + + fw_model = create_model_method(input_names, input_shapes) + + if save_to_file: + tf.keras.models.save_model(fw_model, self.tmp_dir + "./model") + ov_model = convert_model(self.tmp_dir + "./model") + else: + ov_model = convert_model(fw_model) + + cmp_model = compile_model(ov_model, ie_device) + test_inputs = [] + for shape in input_shapes: + test_inputs.append(np.random.rand(*shape)) + + fw_output = fw_model(test_inputs) + ov_output = cmp_model(test_inputs) + + compare_model_method(fw_output, ov_output, epsilon) From 212a9bc852ec820cb0001816b306c304c39f6d16 Mon Sep 17 00:00:00 2001 From: Jan Iwaszkiewicz Date: Mon, 13 Nov 2023 11:11:12 +0100 Subject: [PATCH 272/275] [PyOV] Support Constant creation from empty numpy arrays (#20967) * [PyOV] Support Constant creation from empty numpy arrays * Return 0 when shape is 0 * Add missing file... --- .../python/src/pyopenvino/core/common.cpp | 10 +++++++- .../python/tests/test_graph/test_basic.py | 23 +++++++++++++++++++ src/core/src/op/constant.cpp | 4 +++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index 7b473929a63396..e467af8b42171c 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -155,11 +155,17 @@ py::array array_from_tensor(ov::Tensor&& t, bool is_shared) { template <> ov::op::v0::Constant create_copied(py::array& array) { + // Do not copy data from the array, only return empty tensor based on type. + if (array.size() == 0) { + return ov::op::v0::Constant(array_helpers::get_ov_type(array), array_helpers::get_shape(array)); + } // Convert to contiguous array if not already in C-style. if (!array_helpers::is_contiguous(array)) { array = array_helpers::as_contiguous(array, array_helpers::get_ov_type(array)); } // Create actual Constant and a constructor is copying data. + // If ndim is equal to 0, creates scalar Constant. + // If size is equal to 0, creates empty Constant. return ov::op::v0::Constant(array_helpers::get_ov_type(array), array_helpers::get_shape(array), array.ndim() == 0 ? array.data() : array.data(0)); @@ -175,9 +181,11 @@ template <> ov::op::v0::Constant create_shared(py::array& array) { // Check if passed array has C-style contiguous memory layout. // If memory is going to be shared it needs to be contiguous before passing to the constructor. + // If ndim is equal to 0, creates scalar Constant. + // If size is equal to 0, creates empty Constant. if (array_helpers::is_contiguous(array)) { auto memory = std::make_shared>( - static_cast(array.ndim() == 0 ? array.mutable_data() : array.mutable_data(0)), + static_cast((array.ndim() == 0 || array.size() == 0) ? array.mutable_data() : array.mutable_data(0)), array.ndim() == 0 ? array.itemsize() : array.nbytes(), array); return ov::op::v0::Constant(array_helpers::get_ov_type(array), array_helpers::get_shape(array), memory); diff --git a/src/bindings/python/tests/test_graph/test_basic.py b/src/bindings/python/tests/test_graph/test_basic.py index 8cf2af85d9bbed..6768fdad5e23c6 100644 --- a/src/bindings/python/tests/test_graph/test_basic.py +++ b/src/bindings/python/tests/test_graph/test_basic.py @@ -243,6 +243,29 @@ def test_constant_get_data_unsigned_integer(data_type): assert np.allclose(input_data, retrieved_data) +@pytest.mark.parametrize( + "shared_flag", + [ + (True), + (False), + ], +) +@pytest.mark.parametrize( + "init_value", + [ + (np.array([])), + (np.array([], dtype=np.int32)), + (np.empty(shape=(0))), + ], +) +def test_constant_from_empty_array(shared_flag, init_value): + const = ov.op.Constant(init_value, shared_memory=shared_flag) + assert tuple(const.shape) == init_value.shape + assert const.get_element_type().to_dtype() == init_value.dtype + assert const.get_byte_size() == init_value.nbytes + assert np.allclose(const.data, init_value) + + def test_set_argument(): data1 = np.array([1, 2, 3]) data2 = np.array([4, 5, 6]) diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index 1ecee02aa65067..e694865cc4ab2e 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -170,7 +170,9 @@ std::string Constant::convert_value_to_string(size_t index) const { } size_t Constant::get_byte_size() const { - return m_data->size(); + // Returns 0 when shape is "empty" (equals 0). + // TODO: refactor shape_size(m_shape) calculations and store it as a member. + return shape_size(m_shape) ? m_data->size() : 0; } const void* Constant::get_data_ptr() const { From fb3751717f2876c88336799acb254f27e000d3b4 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 13 Nov 2023 11:24:54 +0100 Subject: [PATCH 273/275] [Inference] Return state ptr by value (#21011) * Return state ptr by value * Fix mock class --- src/inference/dev_api/openvino/runtime/ivariable_state.hpp | 2 +- src/inference/src/dev/converter_utils.cpp | 7 ++----- src/inference/src/dev/ivariable_state.cpp | 2 +- .../intel_gpu/include/intel_gpu/plugin/variable_state.hpp | 2 +- src/plugins/intel_gpu/src/plugin/variable_state.cpp | 2 +- .../mocks/openvino/runtime/mock_ivariable_state.hpp | 2 +- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/ivariable_state.hpp b/src/inference/dev_api/openvino/runtime/ivariable_state.hpp index c25219ec4a8a33..62bcfac1087947 100644 --- a/src/inference/dev_api/openvino/runtime/ivariable_state.hpp +++ b/src/inference/dev_api/openvino/runtime/ivariable_state.hpp @@ -49,7 +49,7 @@ class OPENVINO_RUNTIME_API IVariableState : public std::enable_shared_from_this< * @brief Returns the value of the variable state. * @return The value of the variable state */ - virtual const ov::SoPtr& get_state() const; + virtual ov::SoPtr get_state() const; protected: /** diff --git a/src/inference/src/dev/converter_utils.cpp b/src/inference/src/dev/converter_utils.cpp index a698911f45140d..2ea29d5c5bd35d 100644 --- a/src/inference/src/dev/converter_utils.cpp +++ b/src/inference/src/dev/converter_utils.cpp @@ -651,7 +651,6 @@ namespace InferenceEngine { class IVariableStateWrapper : public ov::IVariableState { private: std::shared_ptr m_state; - mutable ov::SoPtr m_converted_state; public: explicit IVariableStateWrapper(const std::shared_ptr& state) @@ -666,10 +665,8 @@ class IVariableStateWrapper : public ov::IVariableState { m_state->SetState(ov::tensor_to_blob(state)); } - const ov::SoPtr& get_state() const override { - m_converted_state = ov::make_tensor(std::const_pointer_cast(m_state->GetState())); - - return m_converted_state; + ov::SoPtr get_state() const override { + return ov::make_tensor(std::const_pointer_cast(m_state->GetState())); } }; diff --git a/src/inference/src/dev/ivariable_state.cpp b/src/inference/src/dev/ivariable_state.cpp index f46f420b7024c5..23dfe65189e15d 100644 --- a/src/inference/src/dev/ivariable_state.cpp +++ b/src/inference/src/dev/ivariable_state.cpp @@ -22,6 +22,6 @@ void ov::IVariableState::set_state(const ov::SoPtr& state) { m_state = state; } -const ov::SoPtr& ov::IVariableState::get_state() const { +ov::SoPtr ov::IVariableState::get_state() const { return m_state; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp index 2661abb284452e..5e62e5d9e4c9d5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp @@ -16,7 +16,7 @@ class VariableState : public ov::IVariableState { void reset() override; void set_state(const ov::SoPtr& state) override; - const ov::SoPtr& get_state() const override; + ov::SoPtr get_state() const override; private: cldnn::network::VariableState::Ptr m_variable_state; diff --git a/src/plugins/intel_gpu/src/plugin/variable_state.cpp b/src/plugins/intel_gpu/src/plugin/variable_state.cpp index 46a3fdae22b7ca..fcc0e206fdc22c 100644 --- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp @@ -40,7 +40,7 @@ void VariableState::set_state(const ov::SoPtr& state) { m_variable_state->is_set = true; } -const ov::SoPtr& VariableState::get_state() const { +ov::SoPtr VariableState::get_state() const { auto internal_memory = m_variable_state->memory; const bool blocking = true; internal_memory->copy_to(m_engine.get_service_stream(), m_state->data(), blocking); diff --git a/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_ivariable_state.hpp b/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_ivariable_state.hpp index 8d8f9f18a201a0..c106881086f3d2 100644 --- a/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_ivariable_state.hpp +++ b/src/tests/test_utils/unit_test_utils/mocks/openvino/runtime/mock_ivariable_state.hpp @@ -19,7 +19,7 @@ class MockIVariableState : public ov::IVariableState { MOCK_METHOD(const std::string&, get_name, (), (const)); MOCK_METHOD(void, reset, ()); MOCK_METHOD(void, set_state, (const ov::SoPtr&)); - MOCK_METHOD(const ov::SoPtr&, get_state, (), (const)); + MOCK_METHOD(ov::SoPtr, get_state, (), (const)); }; } // namespace ov From 6650b8750adc4ee5eb1a55988b8defdb3150a71f Mon Sep 17 00:00:00 2001 From: Jan Iwaszkiewicz Date: Mon, 13 Nov 2023 11:25:58 +0100 Subject: [PATCH 274/275] [PyOV] Support Tensor creation from empty numpy arrays (#20964) --- .../python/src/pyopenvino/core/common.cpp | 13 +- .../python/tests/test_runtime/test_tensor.py | 502 ++++++++++-------- 2 files changed, 298 insertions(+), 217 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index e467af8b42171c..603583bb1825cd 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -201,12 +201,18 @@ ov::op::v0::Constant create_shared(ov::Tensor& tensor) { template <> ov::Tensor create_copied(py::array& array) { + // Create actual Tensor. + auto tensor = ov::Tensor(array_helpers::get_ov_type(array), array_helpers::get_shape(array)); + // If size of an array is equal to 0, the array is empty. + // Alternative could be `array.nbytes()`. + // Do not copy data from it, only return empty tensor based on type. + if (array.size() == 0) { + return tensor; + } // Convert to contiguous array if not already in C-style. if (!array_helpers::is_contiguous(array)) { array = array_helpers::as_contiguous(array, array_helpers::get_ov_type(array)); } - // Create actual Tensor and copy data. - auto tensor = ov::Tensor(array_helpers::get_ov_type(array), array_helpers::get_shape(array)); // If ndim of py::array is 0, array is a numpy scalar. That results in size to be equal to 0. std::memcpy(tensor.data(), array.ndim() == 0 ? array.data() : array.data(0), @@ -220,9 +226,10 @@ ov::Tensor create_shared(py::array& array) { // If memory is going to be shared it needs to be contiguous before passing to the constructor. if (array_helpers::is_contiguous(array)) { // If ndim of py::array is 0, array is a numpy scalar. + // If size of an array is equal to 0, the array is empty. return ov::Tensor(array_helpers::get_ov_type(array), array_helpers::get_shape(array), - array.ndim() == 0 ? array.mutable_data() : array.mutable_data(0)); + (array.ndim() == 0 || array.size() == 0) ? array.mutable_data() : array.mutable_data(0)); } // If passed array is not C-style, throw an error. OPENVINO_THROW("SHARED MEMORY MODE FOR THIS TENSOR IS NOT APPLICABLE! Passed numpy array must be C contiguous."); diff --git a/src/bindings/python/tests/test_runtime/test_tensor.py b/src/bindings/python/tests/test_runtime/test_tensor.py index 6b9f3272b81f62..00b7cb1e2470b2 100644 --- a/src/bindings/python/tests/test_runtime/test_tensor.py +++ b/src/bindings/python/tests/test_runtime/test_tensor.py @@ -8,9 +8,8 @@ import numpy as np -import openvino.runtime as ov +import openvino as ov import openvino.runtime.opset11 as ops -from openvino import Tensor from openvino.helpers import pack_data, unpack_data import pytest @@ -18,28 +17,31 @@ from tests.utils.helpers import generate_image, generate_relu_compiled_model -@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ - (ov.Type.f32, np.float32), - (ov.Type.f64, np.float64), - (ov.Type.f16, np.float16), - (ov.Type.bf16, np.float16), - (ov.Type.i8, np.int8), - (ov.Type.u8, np.uint8), - (ov.Type.i32, np.int32), - (ov.Type.u32, np.uint32), - (ov.Type.i16, np.int16), - (ov.Type.u16, np.uint16), - (ov.Type.i64, np.int64), - (ov.Type.u64, np.uint64), - (ov.Type.boolean, bool), - (ov.Type.u1, np.uint8), - (ov.Type.u4, np.uint8), - (ov.Type.i4, np.int8), -]) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (ov.Type.f32, np.float32), + (ov.Type.f64, np.float64), + (ov.Type.f16, np.float16), + (ov.Type.bf16, np.float16), + (ov.Type.i8, np.int8), + (ov.Type.u8, np.uint8), + (ov.Type.i32, np.int32), + (ov.Type.u32, np.uint32), + (ov.Type.i16, np.int16), + (ov.Type.u16, np.uint16), + (ov.Type.i64, np.int64), + (ov.Type.u64, np.uint64), + (ov.Type.boolean, bool), + (ov.Type.u1, np.uint8), + (ov.Type.u4, np.uint8), + (ov.Type.i4, np.int8), + ], +) def test_init_with_ngraph(ov_type, numpy_dtype): ov_tensors = [] - ov_tensors.append(Tensor(type=ov_type, shape=ov.Shape([1, 3, 32, 32]))) - ov_tensors.append(Tensor(type=ov_type, shape=[1, 3, 32, 32])) + ov_tensors.append(ov.Tensor(type=ov_type, shape=ov.Shape([1, 3, 32, 32]))) + ov_tensors.append(ov.Tensor(type=ov_type, shape=[1, 3, 32, 32])) assert np.all(list(ov_tensor.shape) == [1, 3, 32, 32] for ov_tensor in ov_tensors) assert np.all(ov_tensor.element_type == ov_type for ov_tensor in ov_tensors) assert np.all(ov_tensor.data.dtype == numpy_dtype for ov_tensor in ov_tensors) @@ -53,29 +55,32 @@ def test_subprocess(): assert not status.returncode -@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ - (ov.Type.f32, np.float32), - (ov.Type.f64, np.float64), - (ov.Type.f16, np.float16), - (ov.Type.i8, np.int8), - (ov.Type.u8, np.uint8), - (ov.Type.i32, np.int32), - (ov.Type.u32, np.uint32), - (ov.Type.i16, np.int16), - (ov.Type.u16, np.uint16), - (ov.Type.i64, np.int64), - (ov.Type.u64, np.uint64), - (ov.Type.boolean, bool), -]) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (ov.Type.f32, np.float32), + (ov.Type.f64, np.float64), + (ov.Type.f16, np.float16), + (ov.Type.i8, np.int8), + (ov.Type.u8, np.uint8), + (ov.Type.i32, np.int32), + (ov.Type.u32, np.uint32), + (ov.Type.i16, np.int16), + (ov.Type.u16, np.uint16), + (ov.Type.i64, np.int64), + (ov.Type.u64, np.uint64), + (ov.Type.boolean, bool), + ], +) def test_init_with_numpy_dtype(ov_type, numpy_dtype): shape = (1, 3, 127, 127) ov_shape = ov.Shape(shape) ov_tensors = [] - ov_tensors.append(Tensor(type=numpy_dtype, shape=shape)) - ov_tensors.append(Tensor(type=np.dtype(numpy_dtype), shape=shape)) - ov_tensors.append(Tensor(type=np.dtype(numpy_dtype), shape=np.array(shape))) - ov_tensors.append(Tensor(type=numpy_dtype, shape=ov_shape)) - ov_tensors.append(Tensor(type=np.dtype(numpy_dtype), shape=ov_shape)) + ov_tensors.append(ov.Tensor(type=numpy_dtype, shape=shape)) + ov_tensors.append(ov.Tensor(type=np.dtype(numpy_dtype), shape=shape)) + ov_tensors.append(ov.Tensor(type=np.dtype(numpy_dtype), shape=np.array(shape))) + ov_tensors.append(ov.Tensor(type=numpy_dtype, shape=ov_shape)) + ov_tensors.append(ov.Tensor(type=np.dtype(numpy_dtype), shape=ov_shape)) assert np.all(tuple(ov_tensor.shape) == shape for ov_tensor in ov_tensors) assert np.all(ov_tensor.element_type == ov_type for ov_tensor in ov_tensors) assert np.all(isinstance(ov_tensor.data, np.ndarray) for ov_tensor in ov_tensors) @@ -83,25 +88,28 @@ def test_init_with_numpy_dtype(ov_type, numpy_dtype): assert np.all(ov_tensor.data.shape == shape for ov_tensor in ov_tensors) -@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ - (ov.Type.f32, np.float32), - (ov.Type.f64, np.float64), - (ov.Type.f16, np.float16), - (ov.Type.i8, np.int8), - (ov.Type.u8, np.uint8), - (ov.Type.i32, np.int32), - (ov.Type.u32, np.uint32), - (ov.Type.i16, np.int16), - (ov.Type.u16, np.uint16), - (ov.Type.i64, np.int64), - (ov.Type.u64, np.uint64), - (ov.Type.boolean, bool), -]) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (ov.Type.f32, np.float32), + (ov.Type.f64, np.float64), + (ov.Type.f16, np.float16), + (ov.Type.i8, np.int8), + (ov.Type.u8, np.uint8), + (ov.Type.i32, np.int32), + (ov.Type.u32, np.uint32), + (ov.Type.i16, np.int16), + (ov.Type.u16, np.uint16), + (ov.Type.i64, np.int64), + (ov.Type.u64, np.uint64), + (ov.Type.boolean, bool), + ], +) def test_init_with_numpy_shared_memory(ov_type, numpy_dtype): arr = generate_image().astype(numpy_dtype) shape = arr.shape arr = np.ascontiguousarray(arr) - ov_tensor = Tensor(array=arr, shared_memory=True) + ov_tensor = ov.Tensor(array=arr, shared_memory=True) assert tuple(ov_tensor.shape) == shape assert ov_tensor.element_type == ov_type assert isinstance(ov_tensor.data, np.ndarray) @@ -120,24 +128,27 @@ def test_init_with_numpy_shared_memory(ov_type, numpy_dtype): assert tuple(ov_tensor.get_strides()) == arr.strides -@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ - (ov.Type.f32, np.float32), - (ov.Type.f64, np.float64), - (ov.Type.f16, np.float16), - (ov.Type.i8, np.int8), - (ov.Type.u8, np.uint8), - (ov.Type.i32, np.int32), - (ov.Type.u32, np.uint32), - (ov.Type.i16, np.int16), - (ov.Type.u16, np.uint16), - (ov.Type.i64, np.int64), - (ov.Type.u64, np.uint64), - (ov.Type.boolean, bool), -]) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (ov.Type.f32, np.float32), + (ov.Type.f64, np.float64), + (ov.Type.f16, np.float16), + (ov.Type.i8, np.int8), + (ov.Type.u8, np.uint8), + (ov.Type.i32, np.int32), + (ov.Type.u32, np.uint32), + (ov.Type.i16, np.int16), + (ov.Type.u16, np.uint16), + (ov.Type.i64, np.int64), + (ov.Type.u64, np.uint64), + (ov.Type.boolean, bool), + ], +) def test_init_with_numpy_copy_memory(ov_type, numpy_dtype): arr = generate_image().astype(numpy_dtype) shape = arr.shape - ov_tensor = Tensor(array=arr, shared_memory=False) + ov_tensor = ov.Tensor(array=arr, shared_memory=False) assert tuple(ov_tensor.shape) == shape assert ov_tensor.element_type == ov_type assert isinstance(ov_tensor.data, np.ndarray) @@ -154,10 +165,10 @@ def test_init_with_node_output_port(): param2 = ops.parameter(ov.Shape([1, 3, 32, 32]), dtype=np.float64) param3 = ops.parameter(ov.PartialShape.dynamic(), dtype=np.float64) ones_arr = np.ones(shape=(1, 3, 32, 32), dtype=np.float64) - tensor1 = Tensor(param1.output(0)) - tensor2 = Tensor(param2.output(0), ones_arr) - tensor3 = Tensor(param3.output(0)) - tensor4 = Tensor(param3.output(0), ones_arr) + tensor1 = ov.Tensor(param1.output(0)) + tensor2 = ov.Tensor(param2.output(0), ones_arr) + tensor3 = ov.Tensor(param3.output(0)) + tensor4 = ov.Tensor(param3.output(0), ones_arr) assert tensor1.shape == param1.shape assert tensor1.element_type == param1.get_element_type() assert tensor2.shape == param2.shape @@ -173,8 +184,8 @@ def test_init_with_node_constoutput_port(device): output = compiled_model.output(0) ones_arr = np.ones(shape=(1, 3, 32, 32), dtype=np.float32) - tensor1 = Tensor(output) - tensor2 = Tensor(output, ones_arr) + tensor1 = ov.Tensor(output) + tensor2 = ov.Tensor(output, ones_arr) output_node = output.get_node() assert tensor1.shape == output_node.shape @@ -190,10 +201,10 @@ def test_init_with_output_port_different_shapes(): ones_arr = np.ones(shape=(2, 2), dtype=np.float32) with pytest.warns(RuntimeWarning): - Tensor(param1.output(0), ones_arr) + ov.Tensor(param1.output(0), ones_arr) with pytest.raises(RuntimeError) as e: - Tensor(param2.output(0), ones_arr) + ov.Tensor(param2.output(0), ones_arr) assert "Shape of the port exceeds shape of the array." in str(e.value) @@ -201,62 +212,68 @@ def test_init_with_output_port_different_types(): param1 = ops.parameter(ov.Shape([2]), dtype=np.int16) ones_arr = np.ones(shape=(2, 2), dtype=np.int8) with pytest.warns(RuntimeWarning): - tensor = Tensor(param1.output(0), ones_arr) + tensor = ov.Tensor(param1.output(0), ones_arr) assert not np.array_equal(tensor.data, ones_arr) def test_init_with_roi_tensor(): array = np.random.normal(size=[1, 3, 48, 48]) - ov_tensor1 = Tensor(array) - ov_tensor2 = Tensor(ov_tensor1, [0, 0, 24, 24], [1, 3, 48, 48]) + ov_tensor1 = ov.Tensor(array) + ov_tensor2 = ov.Tensor(ov_tensor1, [0, 0, 24, 24], [1, 3, 48, 48]) assert list(ov_tensor2.shape) == [1, 3, 24, 24] assert ov_tensor2.element_type == ov_tensor2.element_type assert np.shares_memory(ov_tensor1.data, ov_tensor2.data) assert np.array_equal(ov_tensor1.data[0:1, :, 24:, 24:], ov_tensor2.data) -@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ - (ov.Type.f32, np.float32), - (ov.Type.f64, np.float64), - (ov.Type.f16, np.float16), - (ov.Type.bf16, np.float16), - (ov.Type.i8, np.int8), - (ov.Type.u8, np.uint8), - (ov.Type.i32, np.int32), - (ov.Type.u32, np.uint32), - (ov.Type.i16, np.int16), - (ov.Type.u16, np.uint16), - (ov.Type.i64, np.int64), - (ov.Type.u64, np.uint64), - (ov.Type.boolean, bool), -]) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (ov.Type.f32, np.float32), + (ov.Type.f64, np.float64), + (ov.Type.f16, np.float16), + (ov.Type.bf16, np.float16), + (ov.Type.i8, np.int8), + (ov.Type.u8, np.uint8), + (ov.Type.i32, np.int32), + (ov.Type.u32, np.uint32), + (ov.Type.i16, np.int16), + (ov.Type.u16, np.uint16), + (ov.Type.i64, np.int64), + (ov.Type.u64, np.uint64), + (ov.Type.boolean, bool), + ], +) def test_write_to_buffer(ov_type, numpy_dtype): - ov_tensor = Tensor(ov_type, ov.Shape([1, 3, 32, 32])) + ov_tensor = ov.Tensor(ov_type, ov.Shape([1, 3, 32, 32])) ones_arr = np.ones([1, 3, 32, 32], numpy_dtype) ov_tensor.data[:] = ones_arr assert np.array_equal(ov_tensor.data, ones_arr) -@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ - (ov.Type.f32, np.float32), - (ov.Type.f64, np.float64), - (ov.Type.f16, np.float16), - (ov.Type.bf16, np.float16), - (ov.Type.i8, np.int8), - (ov.Type.u8, np.uint8), - (ov.Type.i32, np.int32), - (ov.Type.u32, np.uint32), - (ov.Type.i16, np.int16), - (ov.Type.u16, np.uint16), - (ov.Type.i64, np.int64), - (ov.Type.u64, np.uint64), - (ov.Type.boolean, bool), -]) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (ov.Type.f32, np.float32), + (ov.Type.f64, np.float64), + (ov.Type.f16, np.float16), + (ov.Type.bf16, np.float16), + (ov.Type.i8, np.int8), + (ov.Type.u8, np.uint8), + (ov.Type.i32, np.int32), + (ov.Type.u32, np.uint32), + (ov.Type.i16, np.int16), + (ov.Type.u16, np.uint16), + (ov.Type.i64, np.int64), + (ov.Type.u64, np.uint64), + (ov.Type.boolean, bool), + ], +) def test_set_shape(ov_type, numpy_dtype): shape = ov.Shape([1, 3, 32, 32]) ref_shape = ov.Shape([1, 3, 48, 48]) ref_shape_np = [1, 3, 28, 28] - ov_tensor = Tensor(ov_type, shape) + ov_tensor = ov.Tensor(ov_type, shape) ov_tensor.set_shape(ref_shape) assert list(ov_tensor.shape) == list(ref_shape) @@ -277,14 +294,17 @@ def test_set_shape(ov_type, numpy_dtype): assert np.array_equal(ov_tensor.data, zeros) -@pytest.mark.parametrize("ref_shape", [ - [1, 3, 24, 24], - [1, 3, 32, 32], -]) +@pytest.mark.parametrize( + "ref_shape", + [ + [1, 3, 24, 24], + [1, 3, 32, 32], + ], +) def test_can_set_smaller_or_same_shape_on_preallocated_memory(ref_shape): ones_arr = np.ones(shape=(1, 3, 32, 32), dtype=np.float32) ones_arr = np.ascontiguousarray(ones_arr) - ov_tensor = Tensor(ones_arr, shared_memory=True) + ov_tensor = ov.Tensor(ones_arr, shared_memory=True) assert np.shares_memory(ones_arr, ov_tensor.data) ov_tensor.shape = ref_shape assert list(ov_tensor.shape) == ref_shape @@ -293,7 +313,7 @@ def test_can_set_smaller_or_same_shape_on_preallocated_memory(ref_shape): def test_cannot_set_bigger_shape_on_preallocated_memory(): ones_arr = np.ones(shape=(1, 3, 32, 32), dtype=np.float32) ones_arr = np.ascontiguousarray(ones_arr) - ov_tensor = Tensor(ones_arr, shared_memory=True) + ov_tensor = ov.Tensor(ones_arr, shared_memory=True) ref_shape = [1, 3, 48, 48] assert np.shares_memory(ones_arr, ov_tensor.data) with pytest.raises(RuntimeError) as e: @@ -305,7 +325,7 @@ def test_cannot_set_bigger_shape_on_preallocated_memory(): def test_can_reset_shape_after_decreasing_on_preallocated_memory(): ones_arr = np.ones(shape=(1, 3, 32, 32), dtype=np.float32) ones_arr = np.ascontiguousarray(ones_arr) - ov_tensor = Tensor(ones_arr, shared_memory=True) + ov_tensor = ov.Tensor(ones_arr, shared_memory=True) ref_shape_1 = [1, 3, 24, 24] ref_shape_2 = [1, 3, 32, 32] assert np.shares_memory(ones_arr, ov_tensor.data) @@ -316,71 +336,89 @@ def test_can_reset_shape_after_decreasing_on_preallocated_memory(): def test_can_set_shape_other_dims(): - ov_tensor = Tensor(np.float32, [1, 3, 48, 48]) + ov_tensor = ov.Tensor(np.float32, [1, 3, 48, 48]) ref_shape_1 = [3, 28, 28] ov_tensor.shape = ref_shape_1 assert list(ov_tensor.shape) == ref_shape_1 -@pytest.mark.parametrize("ov_type", [ - (ov.Type.u1), - (ov.Type.u4), - (ov.Type.i4), -]) +@pytest.mark.parametrize( + "ov_type", + [ + (ov.Type.u1), + (ov.Type.u4), + (ov.Type.i4), + ], +) def test_cannot_create_roi_from_packed_tensor(ov_type): - ov_tensor = Tensor(ov_type, [1, 3, 48, 48]) + ov_tensor = ov.Tensor(ov_type, [1, 3, 48, 48]) with pytest.raises(RuntimeError) as e: - Tensor(ov_tensor, [0, 0, 24, 24], [1, 3, 48, 48]) + ov.Tensor(ov_tensor, [0, 0, 24, 24], [1, 3, 48, 48]) assert "ROI Tensor for types with bitwidths less then 8 bit is not implemented" in str(e.value) -@pytest.mark.parametrize("ov_type", [ - (ov.Type.u1), - (ov.Type.u4), - (ov.Type.i4), -]) +@pytest.mark.parametrize( + "ov_type", + [ + (ov.Type.u1), + (ov.Type.u4), + (ov.Type.i4), + ], +) def test_cannot_get_strides_for_packed_tensor(ov_type): - ov_tensor = Tensor(ov_type, [1, 3, 48, 48]) + ov_tensor = ov.Tensor(ov_type, [1, 3, 48, 48]) with pytest.raises(RuntimeError) as e: ov_tensor.get_strides() assert "Could not get strides for types with bitwidths less then 8 bit." in str(e.value) -@pytest.mark.parametrize("dtype", [ - (np.uint8), - (np.int8), - (np.uint16), - (np.uint32), - (np.uint64), -]) -@pytest.mark.parametrize("ov_type", [ - (ov.Type.u1), - (ov.Type.u4), - (ov.Type.i4), -]) +@pytest.mark.parametrize( + "dtype", + [ + (np.uint8), + (np.int8), + (np.uint16), + (np.uint32), + (np.uint64), + ], +) +@pytest.mark.parametrize( + "ov_type", + [ + (ov.Type.u1), + (ov.Type.u4), + (ov.Type.i4), + ], +) def test_init_with_packed_buffer(dtype, ov_type): shape = [1, 3, 32, 32] fit = np.dtype(dtype).itemsize * 8 / ov_type.bitwidth assert np.prod(shape) % fit == 0 size = int(np.prod(shape) // fit) buffer = np.random.normal(size=size).astype(dtype) - ov_tensor = Tensor(buffer, shape, ov_type) + ov_tensor = ov.Tensor(buffer, shape, ov_type) assert ov_tensor.data.nbytes == ov_tensor.byte_size assert np.array_equal(ov_tensor.data.view(dtype), buffer) -@pytest.mark.parametrize("shape", [ - ([1, 3, 28, 28]), - ([1, 3, 27, 27]), -]) -@pytest.mark.parametrize(("low", "high", "ov_type", "dtype"), [ - (0, 2, ov.Type.u1, np.uint8), - (0, 16, ov.Type.u4, np.uint8), - (-8, 7, ov.Type.i4, np.int8), - (0, 16, ov.Type.nf4, np.uint8), -]) +@pytest.mark.parametrize( + "shape", + [ + ([1, 3, 28, 28]), + ([1, 3, 27, 27]), + ], +) +@pytest.mark.parametrize( + ("low", "high", "ov_type", "dtype"), + [ + (0, 2, ov.Type.u1, np.uint8), + (0, 16, ov.Type.u4, np.uint8), + (-8, 7, ov.Type.i4, np.int8), + (0, 16, ov.Type.nf4, np.uint8), + ], +) def test_packing(shape, low, high, ov_type, dtype): - ov_tensor = Tensor(ov_type, shape) + ov_tensor = ov.Tensor(ov_type, shape) data = np.random.uniform(low, high, shape).astype(dtype) packed_data = pack_data(data, ov_tensor.element_type) ov_tensor.data[:] = packed_data @@ -388,40 +426,46 @@ def test_packing(shape, low, high, ov_type, dtype): assert np.array_equal(unpacked, data) -@pytest.mark.parametrize("dtype", [ - (np.uint8), - (np.int8), - (np.int16), - (np.uint16), - (np.int32), - (np.uint32), - (np.int64), - (np.uint64), - (np.float16), - (np.float32), - (np.float64), -]) -@pytest.mark.parametrize("element_type", [ - (ov.Type.u8), - (ov.Type.i8), - (ov.Type.i16), - (ov.Type.u16), - (ov.Type.i32), - (ov.Type.u32), - (ov.Type.i64), - (ov.Type.u64), -]) +@pytest.mark.parametrize( + "dtype", + [ + (np.uint8), + (np.int8), + (np.int16), + (np.uint16), + (np.int32), + (np.uint32), + (np.int64), + (np.uint64), + (np.float16), + (np.float32), + (np.float64), + ], +) +@pytest.mark.parametrize( + "element_type", + [ + (ov.Type.u8), + (ov.Type.i8), + (ov.Type.i16), + (ov.Type.u16), + (ov.Type.i32), + (ov.Type.u32), + (ov.Type.i64), + (ov.Type.u64), + ], +) def test_viewed_tensor(dtype, element_type): buffer = np.random.normal(size=(2, 16)).astype(dtype) fit = (dtype().nbytes * 8) / element_type.bitwidth - tensor = Tensor(buffer, (buffer.shape[0], int(buffer.shape[1] * fit)), element_type) - assert np.array_equal(tensor.data, buffer.view(ov.utils.types.get_dtype(element_type))) + tensor = ov.Tensor(buffer, (buffer.shape[0], int(buffer.shape[1] * fit)), element_type) + assert np.array_equal(tensor.data, buffer.view(ov.runtime.utils.types.get_dtype(element_type))) def test_viewed_tensor_default_type(): buffer = np.random.normal(size=(2, 16)) new_shape = (4, 8) - tensor = Tensor(buffer, new_shape) + tensor = ov.Tensor(buffer, new_shape) assert np.array_equal(tensor.data, buffer.reshape(new_shape)) @@ -434,24 +478,27 @@ def test_stride_calculation(): assert ov_tensor is not None assert np.array_equal(ov_tensor.data, arr) - elements = (ov_tensor.shape[1] * ov_tensor.shape[2] * ov_tensor.shape[3]) + elements = ov_tensor.shape[1] * ov_tensor.shape[2] * ov_tensor.shape[3] assert ov_tensor.strides[0] == elements * ov_tensor.get_element_type().size -@pytest.mark.parametrize(("element_type", "dtype"), [ - (ov.Type.f32, np.float32), - (ov.Type.f64, np.float64), - (ov.Type.f16, np.float16), - (ov.Type.bf16, np.float16), - (ov.Type.i8, np.int8), - (ov.Type.u8, np.uint8), - (ov.Type.i32, np.int32), - (ov.Type.u32, np.uint32), - (ov.Type.i16, np.int16), - (ov.Type.u16, np.uint16), - (ov.Type.i64, np.int64), - (ov.Type.u64, np.uint64), -]) +@pytest.mark.parametrize( + ("element_type", "dtype"), + [ + (ov.Type.f32, np.float32), + (ov.Type.f64, np.float64), + (ov.Type.f16, np.float16), + (ov.Type.bf16, np.float16), + (ov.Type.i8, np.int8), + (ov.Type.u8, np.uint8), + (ov.Type.i32, np.int32), + (ov.Type.u32, np.uint32), + (ov.Type.i16, np.int16), + (ov.Type.u16, np.uint16), + (ov.Type.i64, np.int64), + (ov.Type.u64, np.uint64), + ], +) def test_copy_to(dtype, element_type): tensor = ov.Tensor(shape=ov.Shape([3, 2, 2]), type=element_type) target_tensor = ov.Tensor(shape=ov.Shape([3, 2, 2]), type=element_type) @@ -469,20 +516,47 @@ def test_copy_to(dtype, element_type): assert np.array_equal(tensor.data, target_tensor.data) -@pytest.mark.parametrize("element_type", [ - (ov.Type.f32), - (ov.Type.f64), - (ov.Type.f16), - (ov.Type.bf16), - (ov.Type.i8), - (ov.Type.u8), - (ov.Type.i32), - (ov.Type.u32), - (ov.Type.i16), - (ov.Type.u16), - (ov.Type.i64), - (ov.Type.u64), -]) +@pytest.mark.parametrize( + "element_type", + [ + (ov.Type.f32), + (ov.Type.f64), + (ov.Type.f16), + (ov.Type.bf16), + (ov.Type.i8), + (ov.Type.u8), + (ov.Type.i32), + (ov.Type.u32), + (ov.Type.i16), + (ov.Type.u16), + (ov.Type.i64), + (ov.Type.u64), + ], +) def test_is_continuous(element_type): tensor = ov.Tensor(shape=ov.Shape([3, 2, 2]), type=element_type) assert tensor.is_continuous() + + +@pytest.mark.parametrize( + "shared_flag", + [ + (True), + (False), + ], +) +@pytest.mark.parametrize( + "init_value", + [ + (np.array([])), + (np.array([], dtype=np.int32)), + (np.empty(shape=(0))), + ], +) +def test_init_from_empty_array(shared_flag, init_value): + tensor = ov.Tensor(init_value, shared_memory=shared_flag) + assert tensor.is_continuous() + assert tuple(tensor.shape) == init_value.shape + assert tensor.element_type.to_dtype() == init_value.dtype + assert tensor.byte_size == init_value.nbytes + assert np.array_equal(tensor.data, init_value) From 7c595f877388dd7ad45634a03d0d95cd7f3c4e60 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Mon, 13 Nov 2023 14:29:18 +0400 Subject: [PATCH 275/275] [TF FE] Refactor ops translators to check complex type and speed-up compilation (#21025) * [TF FE] Refactor AddN translator Signed-off-by: Kazantsev, Roman * Refactor ArgMin/ArgMax Signed-off-by: Kazantsev, Roman * Add default check for binary operations Signed-off-by: Kazantsev, Roman * Refactor Cast translator Signed-off-by: Kazantsev, Roman * Refactor Const translator Signed-off-by: Kazantsev, Roman * Refactor Einsum, Elu, and Fill operations Signed-off-by: Kazantsev, Roman * Refactor MatMul translator Signed-off-by: Kazantsev, Roman * Refactor Relu6 and Placeholder Signed-off-by: Kazantsev, Roman * Refactor Round, Square, and unary operation translators Signed-off-by: Kazantsev, Roman * Update src/frontends/tensorflow_common/src/op/relu_6.cpp * Update src/frontends/tensorflow_common/src/op/const.cpp --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow_common/src/op/addN.cpp | 27 +++++++++---------- .../tensorflow_common/src/op/arg_min_max.cpp | 17 +++++++----- .../tensorflow_common/src/op/binary_op.cpp | 11 ++++---- .../tensorflow_common/src/op/cast.cpp | 12 +++++---- .../tensorflow_common/src/op/const.cpp | 8 +++--- .../tensorflow_common/src/op/einsum.cpp | 11 +++++--- .../tensorflow_common/src/op/elu.cpp | 9 ++++--- .../tensorflow_common/src/op/fill.cpp | 12 +++++---- .../tensorflow_common/src/op/matmul.cpp | 19 ++++++++----- .../tensorflow_common/src/op/placeholder.cpp | 16 ++++++----- .../tensorflow_common/src/op/relu_6.cpp | 10 ++++--- .../tensorflow_common/src/op/round.cpp | 11 +++++--- .../tensorflow_common/src/op/square.cpp | 8 +++--- .../tensorflow_common/src/op/unary_op.cpp | 2 ++ src/frontends/tensorflow_common/src/utils.cpp | 17 +++++++----- 15 files changed, 114 insertions(+), 76 deletions(-) diff --git a/src/frontends/tensorflow_common/src/op/addN.cpp b/src/frontends/tensorflow_common/src/op/addN.cpp index e7b260b5070cf7..0daf83f29712fd 100644 --- a/src/frontends/tensorflow_common/src/op/addN.cpp +++ b/src/frontends/tensorflow_common/src/op/addN.cpp @@ -2,13 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/add.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov; +using namespace ov::op; namespace ov { namespace frontend { @@ -16,18 +15,16 @@ namespace tensorflow { namespace op { OutputVector translate_add_n_op(const NodeContext& node) { - OutputVector ng_arg_vec; - for (size_t i = 0; i < node.get_input_size(); i++) { - ng_arg_vec.push_back(node.get_input(static_cast(i))); + default_op_checks(node, 1, {"AddN", "ADD_N"}); + int num_size = static_cast(node.get_input_size()); + + Output result = node.get_input(0); + for (int ind = 1; ind < num_size; ++ind) { + result = make_shared(result, node.get_input(ind)); } - auto res = std::accumulate(std::next(ng_arg_vec.begin()), - ng_arg_vec.end(), - ng_arg_vec.at(0), - [](const Output& a, const Output& b) -> shared_ptr { - return make_shared(a, b); - }); - set_node_name(node.get_name(), res.get_node_shared_ptr()); - return {res}; + + set_node_name(node.get_name(), result.get_node_shared_ptr()); + return {result}; } } // namespace op } // namespace tensorflow diff --git a/src/frontends/tensorflow_common/src/op/arg_min_max.cpp b/src/frontends/tensorflow_common/src/op/arg_min_max.cpp index 42cfe8867dac53..c9d72018e1e976 100644 --- a/src/frontends/tensorflow_common/src/op/arg_min_max.cpp +++ b/src/frontends/tensorflow_common/src/op/arg_min_max.cpp @@ -3,10 +3,12 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/topk.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,13 +16,14 @@ namespace tensorflow { namespace op { OutputVector translate_arg_min_max(const NodeContext& node, std::string mode) { + default_op_checks(node, 1, {"ArgMax", "ArgMin", "ARG_MAX", "ARG_MIN"}); auto input = node.get_input(0); // TensorFlow uses axis with default value equal to zero int64_t axis = 0; if (node.get_input_size() > 1) { TENSORFLOW_OP_VALIDATION(node, - std::dynamic_pointer_cast(node.get_input(1).get_node_shared_ptr()), + as_type_ptr(node.get_input(1).get_node_shared_ptr()), "ArgMax/ArgMin is not supported with non-constant axis input"); std::vector axes; get_const_input(node, 1, &axes); @@ -30,12 +33,12 @@ OutputVector translate_arg_min_max(const NodeContext& node, std::string mode) { auto output_type = node.get_attribute("output_type", element::i64); // compute indices of max/min values using TopK - auto k = make_shared(element::i64, Shape{}, 1); + auto k = make_shared(element::i64, Shape{}, 1); // TODO: define sort attribute for TensorFlow case - auto top_k = std::make_shared(input, k, axis, mode, "none", output_type); + auto top_k = std::make_shared(input, k, axis, mode, "none", output_type); - auto axis_to_remove = make_shared(element::i64, Shape{1}, std::vector({axis})); - auto res = make_shared(top_k->output(1), axis_to_remove); + auto axis_to_remove = make_shared(element::i64, Shape{1}, vector({axis})); + auto res = make_shared(top_k->output(1), axis_to_remove); set_node_name(node.get_name(), res); return {res}; } diff --git a/src/frontends/tensorflow_common/src/op/binary_op.cpp b/src/frontends/tensorflow_common/src/op/binary_op.cpp index a1a056f048d066..0ea26fb3cb9946 100644 --- a/src/frontends/tensorflow_common/src/op/binary_op.cpp +++ b/src/frontends/tensorflow_common/src/op/binary_op.cpp @@ -15,11 +15,12 @@ namespace op { OutputVector translate_binary_op(const NodeContext& node, const std::function(Output&, Output&)>& create_binary_op) { - auto ng_lhs = node.get_input(0); - auto ng_rhs = node.get_input(1); - auto ng_node = create_binary_op(ng_lhs, ng_rhs); - set_node_name(node.get_name(), ng_node.get_node_shared_ptr()); - return {ng_node}; + default_op_checks(node, 2, {}); + auto lhs = node.get_input(0); + auto rhs = node.get_input(1); + auto result = create_binary_op(lhs, rhs); + set_node_name(node.get_name(), result.get_node_shared_ptr()); + return {result}; } OutputVector translate_floor_div_op(const NodeContext& node) { diff --git a/src/frontends/tensorflow_common/src/op/cast.cpp b/src/frontends/tensorflow_common/src/op/cast.cpp index 7e0b268dc17927..da21bc7396d679 100644 --- a/src/frontends/tensorflow_common/src/op/cast.cpp +++ b/src/frontends/tensorflow_common/src/op/cast.cpp @@ -3,10 +3,10 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/convert.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,10 +14,12 @@ namespace tensorflow { namespace op { OutputVector translate_cast_op(const NodeContext& node) { - auto ng_input = node.get_input(0); + default_op_checks(node, 1, {"Cast", "CAST"}); + auto x = node.get_input(0); + + auto dst_type = node.get_attribute("DstT"); + auto res = make_shared(x, dst_type); - auto ng_et = node.get_attribute("DstT"); - auto res = make_shared(ng_input, ng_et); set_node_name(node.get_name(), res); return res->outputs(); } diff --git a/src/frontends/tensorflow_common/src/op/const.cpp b/src/frontends/tensorflow_common/src/op/const.cpp index 1228d42b486264..96d21e721365fc 100644 --- a/src/frontends/tensorflow_common/src/op/const.cpp +++ b/src/frontends/tensorflow_common/src/op/const.cpp @@ -5,10 +5,10 @@ #include "common_op_table.hpp" #include "helper_ops/string_constant.hpp" #include "helper_ops/unsupported_constant.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/constant.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; using namespace ov; namespace ov { @@ -17,6 +17,8 @@ namespace tensorflow { namespace op { OutputVector translate_const_op(const NodeContext& node) { + default_op_checks(node, 0, {"Const"}); + auto ov_type = node.get_attribute_as_any("dtype"); std::shared_ptr const_node; if (!ov_type.is() || ov_type.as() == ov::element::dynamic || @@ -28,7 +30,7 @@ OutputVector translate_const_op(const NodeContext& node) { } } else { auto tensor = node.get_attribute("value"); - const_node = std::make_shared(tensor); + const_node = std::make_shared(tensor); } set_node_name(node.get_name(), const_node); return {const_node}; diff --git a/src/frontends/tensorflow_common/src/op/einsum.cpp b/src/frontends/tensorflow_common/src/op/einsum.cpp index 591ca5dc5e2fed..431f1c4f51ba41 100644 --- a/src/frontends/tensorflow_common/src/op/einsum.cpp +++ b/src/frontends/tensorflow_common/src/op/einsum.cpp @@ -2,20 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/einsum.hpp" + #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { namespace tensorflow { namespace op { OutputVector translate_einsum_op(const NodeContext& node) { + default_op_checks(node, 1, {"Einsum"}); + auto op_type = node.get_op_type(); TENSORFLOW_OP_VALIDATION(node, op_type == "Einsum", "Internal error: incorrect usage of translate_einsum_op."); - auto equation = node.get_attribute("equation"); + auto equation = node.get_attribute("equation"); int input_size = static_cast(node.get_input_size()); OutputVector inputs; @@ -23,7 +26,7 @@ OutputVector translate_einsum_op(const NodeContext& node) { inputs.push_back(node.get_input(input_ind)); } - auto einsum = make_shared(inputs, equation); + auto einsum = make_shared(inputs, equation); set_node_name(node.get_name(), einsum); return {einsum}; } diff --git a/src/frontends/tensorflow_common/src/op/elu.cpp b/src/frontends/tensorflow_common/src/op/elu.cpp index a2833000141856..b2a35a99402ee3 100644 --- a/src/frontends/tensorflow_common/src/op/elu.cpp +++ b/src/frontends/tensorflow_common/src/op/elu.cpp @@ -2,12 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/elu.hpp" + #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" using namespace std; using namespace ov; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -15,9 +16,11 @@ namespace tensorflow { namespace op { OutputVector translate_elu_op(const NodeContext& node) { + default_op_checks(node, 1, {"Elu", "ELU"}); auto input = node.get_input(0); auto alpha = node.get_attribute("alpha", 1.0); - auto res = make_shared(input, alpha); + auto res = make_shared(input, alpha); + set_node_name(node.get_name(), res); return res->outputs(); } diff --git a/src/frontends/tensorflow_common/src/op/fill.cpp b/src/frontends/tensorflow_common/src/op/fill.cpp index bce098eb5bd35d..3edfbcf382cade 100644 --- a/src/frontends/tensorflow_common/src/op/fill.cpp +++ b/src/frontends/tensorflow_common/src/op/fill.cpp @@ -3,10 +3,10 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/broadcast.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,9 +14,11 @@ namespace tensorflow { namespace op { OutputVector translate_fill_op(const NodeContext& node) { - auto ng_dims = node.get_input(0); - auto ng_value = node.get_input(1); - auto res = make_shared(ng_value, ng_dims); + default_op_checks(node, 2, {"Fill", "FILL"}); + auto dims = node.get_input(0); + auto value = node.get_input(1); + + auto res = make_shared(value, dims); set_node_name(node.get_name(), res); return res->outputs(); } diff --git a/src/frontends/tensorflow_common/src/op/matmul.cpp b/src/frontends/tensorflow_common/src/op/matmul.cpp index 21a0591d109b69..5b3f57f6f8506a 100644 --- a/src/frontends/tensorflow_common/src/op/matmul.cpp +++ b/src/frontends/tensorflow_common/src/op/matmul.cpp @@ -2,11 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/matmul.hpp" + #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,29 +15,35 @@ namespace tensorflow { namespace op { OutputVector translate_mat_mul_op(const NodeContext& node) { + default_op_checks(node, 2, {"MatMul"}); + auto a = node.get_input(0); auto b = node.get_input(1); auto transpose_a = node.get_attribute("transpose_a", false); auto transpose_b = node.get_attribute("transpose_b", false); - auto res = make_shared(a, b, transpose_a, transpose_b); + auto res = make_shared(a, b, transpose_a, transpose_b); set_node_name(node.get_name(), res); return res->outputs(); } OutputVector translate_batch_mat_mul_op(const NodeContext& node) { + default_op_checks(node, 2, {"BatchMatMul", "BatchMatMulV2", "BATCH_MATMUL"}); + auto x = node.get_input(0); auto y = node.get_input(1); auto adj_x = node.get_attribute("adj_x", false); auto adj_y = node.get_attribute("adj_y", false); - auto result = make_shared(x, y, adj_x, adj_y); + auto result = make_shared(x, y, adj_x, adj_y); set_node_name(node.get_name(), result); return result->outputs(); } OutputVector translate_batch_mat_mul_with_type_op(const NodeContext& node) { + default_op_checks(node, 2, {"BatchMatMulV3"}); + auto x = node.get_input(0); auto y = node.get_input(1); @@ -46,10 +53,10 @@ OutputVector translate_batch_mat_mul_with_type_op(const NodeContext& node) { auto adj_y = node.get_attribute("adj_y", false); auto t_out = node.get_attribute("Tout", input_type); - auto result = make_shared(x, y, adj_x, adj_y)->output(0); + auto result = make_shared(x, y, adj_x, adj_y)->output(0); if (t_out != input_type) { - result = make_shared(result, t_out); + result = make_shared(result, t_out); } set_node_name(node.get_name(), result.get_node_shared_ptr()); diff --git a/src/frontends/tensorflow_common/src/op/placeholder.cpp b/src/frontends/tensorflow_common/src/op/placeholder.cpp index 4b5b6aa00d45fd..574e92eff5cf63 100644 --- a/src/frontends/tensorflow_common/src/op/placeholder.cpp +++ b/src/frontends/tensorflow_common/src/op/placeholder.cpp @@ -3,10 +3,10 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/parameter.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,24 +14,28 @@ namespace tensorflow { namespace op { OutputVector translate_placeholder_op(const NodeContext& node) { - auto dtype = node.get_attribute("dtype"); - auto shape = node.get_attribute("shape", ov::PartialShape::dynamic()); + default_op_checks(node, 0, {}); + + auto dtype = node.get_attribute("dtype"); + auto shape = node.get_attribute("shape", PartialShape::dynamic()); if (shape.rank().is_static() && shape.rank().get_length() == 0 && node.has_attribute("_output_shapes")) { // we know some cases when Placeholder operation has empty scalar `shape` attribute value // and non-empty `_output_shapes` attribute value. // `_output_shapes` attribute value turns to be correct in this case - auto output_shapes = node.get_attribute>("_output_shapes"); + auto output_shapes = node.get_attribute>("_output_shapes"); if (output_shapes.size() == 1 && output_shapes[0].rank().is_static()) { shape = output_shapes[0]; } } - auto res = std::make_shared(dtype, shape); + auto res = make_shared(dtype, shape); set_node_name(node.get_name(), res); return res->outputs(); } OutputVector translate_placeholder_with_default_op(const NodeContext& node) { + default_op_checks(node, 0, {}); + // For parity with legacy frontend, it creates a constant node with the default value // As a rule, PlaceholderWithDefault is mainly used for is_training variables in the model TENSORFLOW_OP_VALIDATION(node, diff --git a/src/frontends/tensorflow_common/src/op/relu_6.cpp b/src/frontends/tensorflow_common/src/op/relu_6.cpp index d30339ca4c7ea9..4fd7da65b01932 100644 --- a/src/frontends/tensorflow_common/src/op/relu_6.cpp +++ b/src/frontends/tensorflow_common/src/op/relu_6.cpp @@ -3,18 +3,20 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/clamp.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { namespace tensorflow { namespace op { -ov::OutputVector translate_relu_6_op(const NodeContext& node) { +OutputVector translate_relu_6_op(const NodeContext& node) { + default_op_checks(node, 1, {}); + auto data = node.get_input(0); - auto res = std::make_shared(data, 0.0, 6.0f); + auto res = make_shared(data, 0.0, 6.0f); set_node_name(node.get_name(), res); return res->outputs(); } diff --git a/src/frontends/tensorflow_common/src/op/round.cpp b/src/frontends/tensorflow_common/src/op/round.cpp index 1e911e68af40c0..d888860bc9900f 100644 --- a/src/frontends/tensorflow_common/src/op/round.cpp +++ b/src/frontends/tensorflow_common/src/op/round.cpp @@ -2,11 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/round.hpp" + #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,11 +15,13 @@ namespace tensorflow { namespace op { OutputVector translate_round_op(const NodeContext& node) { + default_op_checks(node, 1, {"Round", "ROUND"}); + auto input = node.get_input(0); // using default round mode "half_to_even" in openvino, // as TF has only that mode - auto round_mode = Round::RoundMode::HALF_TO_EVEN; - auto res = make_shared(input, round_mode); + auto round_mode = v5::Round::RoundMode::HALF_TO_EVEN; + auto res = make_shared(input, round_mode); set_node_name(node.get_name(), res); return res->outputs(); } diff --git a/src/frontends/tensorflow_common/src/op/square.cpp b/src/frontends/tensorflow_common/src/op/square.cpp index 94d9c4a623d1c7..1aacdf74926455 100644 --- a/src/frontends/tensorflow_common/src/op/square.cpp +++ b/src/frontends/tensorflow_common/src/op/square.cpp @@ -3,10 +3,10 @@ // #include "common_op_table.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/multiply.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -14,8 +14,10 @@ namespace tensorflow { namespace op { OutputVector translate_square_op(const NodeContext& node) { + default_op_checks(node, 1, {"Square", "SQUARE"}); + auto n = node.get_input(0); - auto res = make_shared(n, n); + auto res = make_shared(n, n); set_node_name(node.get_name(), res); return res->outputs(); } diff --git a/src/frontends/tensorflow_common/src/op/unary_op.cpp b/src/frontends/tensorflow_common/src/op/unary_op.cpp index 16c279bd63e9d4..588d1863e3dda7 100644 --- a/src/frontends/tensorflow_common/src/op/unary_op.cpp +++ b/src/frontends/tensorflow_common/src/op/unary_op.cpp @@ -50,6 +50,8 @@ namespace op { OutputVector translate_unary_op(const NodeContext& op, const function(Output)>& create_unary_op) { + default_op_checks(op, 1, {}); + auto input = op.get_input(0); auto res = create_unary_op(input); set_node_name(op.get_name(), res); diff --git a/src/frontends/tensorflow_common/src/utils.cpp b/src/frontends/tensorflow_common/src/utils.cpp index adf736d3b2cf84..2d5293a095ea70 100644 --- a/src/frontends/tensorflow_common/src/utils.cpp +++ b/src/frontends/tensorflow_common/src/utils.cpp @@ -248,12 +248,17 @@ void default_op_checks(const frontend::NodeContext& node, const vector& supported_ops, bool supported_complex) { auto op_type = node.get_op_type(); - TENSORFLOW_OP_VALIDATION(node, - find(supported_ops.begin(), supported_ops.end(), op_type) != supported_ops.end(), - op_type + " is not supported for conversion."); - TENSORFLOW_OP_VALIDATION(node, - node.get_input_size() >= min_input_size, - op_type + " must have at least " + to_string(min_input_size) + " inputs."); + + // we can skip these checks if translator wrapper can be used for multiple operations + // check only if supported_ops is defined + if (supported_ops.size() > 0) { + TENSORFLOW_OP_VALIDATION(node, + find(supported_ops.begin(), supported_ops.end(), op_type) != supported_ops.end(), + op_type + " is not supported for conversion."); + TENSORFLOW_OP_VALIDATION(node, + node.get_input_size() >= min_input_size, + op_type + " must have at least " + to_string(min_input_size) + " inputs."); + } // check if it supports complex type in case complex type input bool has_input_complex_type = false;