From 4edb040869da4db92d414d78b87bfe693884db53 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Fri, 1 Mar 2024 11:59:32 +0100
Subject: [PATCH 01/28] Fix timm filter and run timm models in parallel
 (#23116)

### Details:
 - *Update list of timm models to comply with new version of timm*
- *Run timm models and torchvision models in trace and export model in
parallel*

### Tickets:
 - *ticket-id*
---
 .../workflows/job_pytorch_models_tests.yml    | 16 +++++++++++--
 .../pytorch/hf_transformers_models            |  2 +-
 tests/model_hub_tests/pytorch/test_aliked.py  |  2 +-
 .../pytorch/test_detectron2.py                |  2 +-
 tests/model_hub_tests/pytorch/test_edsr.py    |  2 +-
 tests/model_hub_tests/pytorch/test_gfpgan.py  |  2 +-
 .../pytorch/test_hf_transformers.py           |  2 +-
 .../pytorch/test_speech-transformer.py        |  2 +-
 tests/model_hub_tests/pytorch/test_timm.py    | 24 ++++++++-----------
 .../pytorch/test_torchbench.py                |  2 +-
 .../pytorch/test_torchvision_models.py        | 15 ++----------
 tests/model_hub_tests/pytorch/test_tpsmm.py   |  2 +-
 tests/model_hub_tests/pytorch/timm_models     | 23 ++++++++++--------
 13 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml
index d2f263df95f796..16aa00873bc53b 100644
--- a/.github/workflows/job_pytorch_models_tests.yml
+++ b/.github/workflows/job_pytorch_models_tests.yml
@@ -106,6 +106,8 @@ jobs:
 
       - name: Install OpenVINO Python wheels
         run: |
+          # To enable pytest parallel features
+          python3 -m pip install pytest-xdist[psutil]
           python3 -m pip install ${INSTALL_DIR}/tools/openvino-*
           python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*
 
@@ -118,10 +120,20 @@ jobs:
         env:
           CPLUS_INCLUDE_PATH: ${{ env.Python_ROOT_DIR }}/include/python${{ env.PYTHON_VERSION }}
 
-      - name: PyTorch Models Tests
+      - name: PyTorch Models Tests Timm and Torchvision
         run: |
           export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH
-          python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v
+          python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_tests.html --self-contained-html -v -n 4 -k "TestTimmConvertModel or TestTorchHubConvertModel"
+        env:
+          TYPE: ${{ inputs.event == 'schedule' && 'nightly' || 'precommit'}}
+          TEST_DEVICE: CPU
+          OP_REPORT_FILE: ${{ env.INSTALL_TEST_DIR }}/TEST-torch_unsupported_ops.log
+
+      - name: PyTorch Models Tests Not Timm or Torchvision
+        if: always()
+        run: |
+          export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH
+          python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v -k "not (TestTimmConvertModel or TestTorchHubConvertModel)"
         env:
           TYPE: ${{ inputs.event == 'schedule' && 'nightly' || 'precommit'}}
           TEST_DEVICE: CPU
diff --git a/tests/model_hub_tests/pytorch/hf_transformers_models b/tests/model_hub_tests/pytorch/hf_transformers_models
index fddf1afdcfc172..f88ea4430520b4 100644
--- a/tests/model_hub_tests/pytorch/hf_transformers_models
+++ b/tests/model_hub_tests/pytorch/hf_transformers_models
@@ -128,7 +128,7 @@ hf-internal-testing/tiny-random-DonutSwinModel,donut-swin
 hf-internal-testing/tiny-random-EfficientFormerForImageClassification,efficientformer
 hf-internal-testing/tiny-random-flaubert,flaubert
 hf-internal-testing/tiny-random-FocalNetModel,focalnet
-hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,gpt_bigcode,xfail,Conversion is failed for: aten::mul
+hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,gpt_bigcode
 hf-internal-testing/tiny-random-GPTJModel,gptj
 hf-internal-testing/tiny-random-groupvit,groupvit
 hf-internal-testing/tiny-random-IBertModel,ibert
diff --git a/tests/model_hub_tests/pytorch/test_aliked.py b/tests/model_hub_tests/pytorch/test_aliked.py
index a2e93c7ce3c0b2..ad29187de068ce 100644
--- a/tests/model_hub_tests/pytorch/test_aliked.py
+++ b/tests/model_hub_tests/pytorch/test_aliked.py
@@ -84,7 +84,7 @@ def setup_class(self):
         subprocess.check_call(["sh", "build.sh"], cwd=os.path.join(
             self.repo_dir.name, "custom_ops"))
 
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         sys.path.append(self.repo_dir.name)
         from nets.aliked import ALIKED
 
diff --git a/tests/model_hub_tests/pytorch/test_detectron2.py b/tests/model_hub_tests/pytorch/test_detectron2.py
index 0c6c9eddb0f8ea..5c2ea9a1d9e153 100644
--- a/tests/model_hub_tests/pytorch/test_detectron2.py
+++ b/tests/model_hub_tests/pytorch/test_detectron2.py
@@ -23,7 +23,7 @@ def setup_class(self):
         subprocess.run([sys.executable, "-m", "pip", "install",
                        "git+https://github.com/facebookresearch/detectron2.git@017abbfa5f2c2a2afa045200c2af9ccf2fc6227f"])
 
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         from detectron2 import model_zoo, export
         from detectron2.modeling import build_model, PanopticFPN
         from detectron2.checkpoint import DetectionCheckpointer
diff --git a/tests/model_hub_tests/pytorch/test_edsr.py b/tests/model_hub_tests/pytorch/test_edsr.py
index 960af9fd5586cf..9d4a9d183e85e6 100644
--- a/tests/model_hub_tests/pytorch/test_edsr.py
+++ b/tests/model_hub_tests/pytorch/test_edsr.py
@@ -38,7 +38,7 @@
 
 
 class TestEdsrConvertModel(TestTorchConvertModel):
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         # image link from https://github.com/eugenesiow/super-image
         url = 'https://paperswithcode.com/media/datasets/Set5-0000002728-07a9793f_zA3bDjj.jpg'
         image = Image.open(requests.get(url, stream=True).raw)
diff --git a/tests/model_hub_tests/pytorch/test_gfpgan.py b/tests/model_hub_tests/pytorch/test_gfpgan.py
index ca22789023b648..90784926183906 100644
--- a/tests/model_hub_tests/pytorch/test_gfpgan.py
+++ b/tests/model_hub_tests/pytorch/test_gfpgan.py
@@ -28,7 +28,7 @@ def setup_class(self):
         subprocess.check_call(
             ["wget", "-nv", checkpoint_url], cwd=self.repo_dir.name)
 
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         sys.path.append(self.repo_dir.name)
         from gfpgan import GFPGANer
 
diff --git a/tests/model_hub_tests/pytorch/test_hf_transformers.py b/tests/model_hub_tests/pytorch/test_hf_transformers.py
index 954cd3bda0bdb7..4adbcf536c398d 100644
--- a/tests/model_hub_tests/pytorch/test_hf_transformers.py
+++ b/tests/model_hub_tests/pytorch/test_hf_transformers.py
@@ -98,7 +98,7 @@ def setup_class(self):
         self.image = Image.open(requests.get(url, stream=True).raw)
         self.cuda_available, self.gptq_postinit = None, None
 
-    def load_model_impl(self, name, type):
+    def load_model(self, name, type):
         import torch
         name_suffix = ''
         from transformers import AutoConfig
diff --git a/tests/model_hub_tests/pytorch/test_speech-transformer.py b/tests/model_hub_tests/pytorch/test_speech-transformer.py
index a9e7013bdd2cb4..e08526d7a7b553 100644
--- a/tests/model_hub_tests/pytorch/test_speech-transformer.py
+++ b/tests/model_hub_tests/pytorch/test_speech-transformer.py
@@ -24,7 +24,7 @@ def setup_class(self):
         checkpoint_url = "https://github.com/foamliu/Speech-Transformer/releases/download/v1.0/speech-transformer-cn.pt"
         subprocess.check_call(["wget", "-nv", checkpoint_url], cwd=self.repo_dir.name)
 
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         sys.path.append(self.repo_dir.name)
         from transformer.transformer import Transformer
 
diff --git a/tests/model_hub_tests/pytorch/test_timm.py b/tests/model_hub_tests/pytorch/test_timm.py
index 4803ff964184ec..dad964e7f3e41d 100644
--- a/tests/model_hub_tests/pytorch/test_timm.py
+++ b/tests/model_hub_tests/pytorch/test_timm.py
@@ -6,22 +6,23 @@
 import pytest
 import timm
 import torch
-from models_hub_common.constants import hf_hub_cache_dir
-from models_hub_common.utils import cleanup_dir, get_models_list
+from models_hub_common.utils import get_models_list
 
 from torch_utils import TestTorchConvertModel, process_pytest_marks
-from openvino import convert_model
-from torch.export import export
-from packaging import version
 
 
 def filter_timm(timm_list: list) -> list:
     unique_models = dict()
     filtered_list = []
-    ignore_list = ["base", "xxtiny", "xxs", "pico", "xtiny", "xs", "nano", "tiny", "s", "mini", "small", "lite",
-                   "medium", "m", "big", "large", "l", "xlarge", "xl", "huge", "xxlarge", "gigantic", "giant", "enormous"]
+    ignore_list = ["base", "atto", "femto", "xxtiny", "xxsmall", "xxs", "pico",
+                   "xtiny", "xmall", "xs", "nano", "tiny", "s", "mini", "small",
+                   "lite", "medium", "m", "big", "large", "l", "xlarge", "xl",
+                   "huge", "xxlarge", "gigantic", "giant", "enormous"]
     ignore_set = set(ignore_list)
     for name in sorted(timm_list):
+        if "x_" in name:
+            # x_small or xx_small should be merged to xsmall and xxsmall
+            name.replace("x_", "x")
         # first: remove datasets
         name_parts = name.split(".")
         _name = "_".join(name.split(".")[:-1]) if len(name_parts) > 1 else name
@@ -50,7 +51,7 @@ def get_all_models() -> list:
 
 
 class TestTimmConvertModel(TestTorchConvertModel):
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         m = timm.create_model(model_name, pretrained=True)
         cfg = timm.get_pretrained_cfg(model_name)
         shape = [1] + list(cfg.input_size)
@@ -69,11 +70,6 @@ def infer_fw_model(self, model_obj, inputs):
             fw_outputs = [fw_outputs.numpy(force=True)]
         return fw_outputs
 
-    def teardown_method(self):
-        # remove all downloaded files from cache
-        cleanup_dir(hf_hub_cache_dir)
-        super().teardown_method()
-
     @pytest.mark.parametrize("name", ["mobilevitv2_050.cvnets_in1k",
                                       "poolformerv2_s12.sail_in1k",
                                       "vit_base_patch8_224.augreg_in21k",
@@ -86,8 +82,8 @@ def test_convert_model_precommit(self, name, ie_device):
         self.run(name, None, ie_device)
 
     @pytest.mark.nightly
-    @pytest.mark.parametrize("mode", ["trace"]) # disable "export" for now
     @pytest.mark.parametrize("name", get_all_models())
+    @pytest.mark.parametrize("mode", ["trace", "export"])
     def test_convert_model_all_models(self, mode, name, ie_device):
         self.mode = mode
         self.run(name, None, ie_device)
diff --git a/tests/model_hub_tests/pytorch/test_torchbench.py b/tests/model_hub_tests/pytorch/test_torchbench.py
index c2801c04e50e2d..c53da3e547a9a0 100644
--- a/tests/model_hub_tests/pytorch/test_torchbench.py
+++ b/tests/model_hub_tests/pytorch/test_torchbench.py
@@ -32,7 +32,7 @@ def setup_class(self):
         subprocess.check_call(
             ["git", "checkout", "dbc109791dbb0dfb58775a5dc284fc2c3996cb30"], cwd=self.repo_dir.name)
 
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         subprocess.check_call([sys.executable, "install.py"] + [model_name], cwd=self.repo_dir.name)
         sys.path.append(self.repo_dir.name)
         from torchbenchmark import load_model_by_name
diff --git a/tests/model_hub_tests/pytorch/test_torchvision_models.py b/tests/model_hub_tests/pytorch/test_torchvision_models.py
index 0e8e923a7e2843..9a99af0bf196a6 100644
--- a/tests/model_hub_tests/pytorch/test_torchvision_models.py
+++ b/tests/model_hub_tests/pytorch/test_torchvision_models.py
@@ -53,13 +53,7 @@ def prepare_frames_for_raft(name, frames1, frames2):
 
 
 class TestTorchHubConvertModel(TestTorchConvertModel):
-    def setup_class(self):
-        self.cache_dir = tempfile.TemporaryDirectory()
-        # set temp dir for torch cache
-        if os.environ.get('USE_SYSTEM_CACHE', 'True') == 'False':
-            torch.hub.set_dir(str(self.cache_dir.name))
-
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         m = torch.hub.load("pytorch/vision", model_name,
                            weights='DEFAULT', skip_validation=True)
         m.eval()
@@ -97,11 +91,6 @@ def infer_fw_model(self, model_obj, inputs):
             fw_outputs = [fw_outputs.numpy(force=True)]
         return fw_outputs
 
-    def teardown_method(self):
-        # cleanup tmpdir
-        self.cache_dir.cleanup()
-        super().teardown_method()
-
     @pytest.mark.parametrize("model_name", ["efficientnet_b7", "raft_small", "swin_v2_s"])
     @pytest.mark.precommit
     def test_convert_model_precommit(self, model_name, ie_device):
@@ -114,9 +103,9 @@ def test_convert_model_precommit_export(self, model_name, ie_device):
         self.mode = "export"
         self.run(model_name, None, ie_device)
 
-    @pytest.mark.parametrize("mode", ["trace"]) # disable "export" for now
     @pytest.mark.parametrize("name",
                              process_pytest_marks(os.path.join(os.path.dirname(__file__), "torchvision_models")))
+    @pytest.mark.parametrize("mode", ["trace", "export"])
     @pytest.mark.nightly
     def test_convert_model_all_models(self, mode, name, ie_device):
         self.mode = mode
diff --git a/tests/model_hub_tests/pytorch/test_tpsmm.py b/tests/model_hub_tests/pytorch/test_tpsmm.py
index a3a2de39d4aa3a..7aeac2ca659510 100644
--- a/tests/model_hub_tests/pytorch/test_tpsmm.py
+++ b/tests/model_hub_tests/pytorch/test_tpsmm.py
@@ -25,7 +25,7 @@ def setup_class(self):
             ["git", "checkout", "c616878812c9870ed81ac72561be2676fd7180e2"], cwd=self.repo_dir.name)
         # verify model on random weights
 
-    def load_model_impl(self, model_name, model_link):
+    def load_model(self, model_name, model_link):
         sys.path.append(self.repo_dir.name)
         from modules.inpainting_network import InpaintingNetwork
         from modules.keypoint_detector import KPDetector
diff --git a/tests/model_hub_tests/pytorch/timm_models b/tests/model_hub_tests/pytorch/timm_models
index 99a0adacf4a2de..6efc426f5e62fc 100644
--- a/tests/model_hub_tests/pytorch/timm_models
+++ b/tests/model_hub_tests/pytorch/timm_models
@@ -26,17 +26,11 @@ convformer_s36.sail_in1k,None
 convit_base.fb_in1k,None,xfail,Trace failed
 convmixer_1024_20_ks9_p14.in1k,None
 convmixer_1536_20.in1k,None
-convnext_atto.d2_in1k,None
 convnext_atto_ols.a2_in1k,None
 convnext_base.clip_laion2b,None
-convnext_femto.d1_in1k,None
-convnext_femto_ols.d1_in1k,None
 convnext_large_mlp.clip_laion2b_augreg,None
-convnext_pico_ols.d1_in1k,None
 convnext_tiny_hnf.a2h_in1k,None
-convnextv2_atto.fcmae,None
 convnextv2_base.fcmae,None
-convnextv2_femto.fcmae,None
 crossvit_15_dagger_240.in1k,None
 crossvit_base_240.in1k,None
 cs3darknet_focus_m.c2ns_in1k,None
@@ -52,10 +46,10 @@ cspresnext50.ra_in1k,None
 darknet53.c2ns_in1k,None
 darknetaa53.c2ns_in1k,None
 davit_base.msft_in1k,None
-deit3_base_patch16_224.fb_in1k,None
-deit3_huge_patch14_224.fb_in1k,None
 deit_base_distilled_patch16_224.fb_in1k,None
 deit_base_patch16_224.fb_in1k,None
+deit3_base_patch16_224.fb_in1k,None
+deit3_huge_patch14_224.fb_in1k,None
 densenet121.ra_in1k,None
 densenet161.tv_in1k,None
 densenet169.tv_in1k,None
@@ -144,11 +138,11 @@ efficientvit_m4.r224_in1k,None
 efficientvit_m5.r224_in1k,None
 ese_vovnet19b_dw.ra_in1k,None
 ese_vovnet39b.ra_in1k,None
+eva_giant_patch14_clip_224.laion400m,None
+eva_large_patch14_196.in22k_ft_in1k,None
 eva02_base_patch14_224.mim_in22k,None
 eva02_base_patch16_clip_224.merged2b,None
 eva02_large_patch14_clip_224.merged2b,None
-eva_giant_patch14_clip_224.laion400m,None
-eva_large_patch14_196.in22k_ft_in1k,None
 fastvit_ma36.apple_dist_in1k,None
 fastvit_s12.apple_dist_in1k,None
 fastvit_sa12.apple_dist_in1k,None
@@ -186,6 +180,14 @@ hardcorenas_c.miil_green_in1k,None
 hardcorenas_d.miil_green_in1k,None
 hardcorenas_e.miil_green_in1k,None
 hardcorenas_f.miil_green_in1k,None
+hgnet_base.ssld_in1k,None
+hgnetv2_b0.ssld_stage1_in22k_in1k,None
+hgnetv2_b1.ssld_stage1_in22k_in1k,None
+hgnetv2_b2.ssld_stage1_in22k_in1k,None
+hgnetv2_b3.ssld_stage1_in22k_in1k,None
+hgnetv2_b4.ssld_stage1_in22k_in1k,None
+hgnetv2_b5.ssld_stage1_in22k_in1k,None
+hgnetv2_b6.ssld_stage1_in22k_in1k,None
 hrnet_w18_small.gluon_in1k,None
 hrnet_w18_small_v2.gluon_in1k,None
 hrnet_w18_ssld.paddle_in1k,None
@@ -245,6 +247,7 @@ mvitv2_base.fb_in1k,None
 mvitv2_base_cls.fb_inw21k,None
 nasnetalarge.tf_in1k,None
 nest_base_jx.goog_in1k,None
+nextvit_base.bd_in1k,None
 nf_regnet_b1.ra2_in1k,None
 nf_resnet50.ra2_in1k,None
 nfnet_l0.ra2_in1k,None

From 7d35d2a3730da8462871fa178bf1f3cfb6ba98af Mon Sep 17 00:00:00 2001
From: Rinne <liu_yaohui1998@126.com>
Date: Fri, 1 Mar 2024 19:08:52 +0800
Subject: [PATCH 02/28] [TF FE] Add complex type support for rank op (#23118)

### Details:
 - Add complex type support for rank op
 - Add corresponding test of it

### Tickets:
 - I think I don't have a ticket id

### Related issues

#22956

---------

Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 .../tensorflow_common/src/op/rank.cpp         | 18 +++++++-
 .../tensorflow_tests/test_tf_Rank.py          | 42 +++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/src/frontends/tensorflow_common/src/op/rank.cpp b/src/frontends/tensorflow_common/src/op/rank.cpp
index 1c37dfa5a0e878..1adbcd43d2fc04 100644
--- a/src/frontends/tensorflow_common/src/op/rank.cpp
+++ b/src/frontends/tensorflow_common/src/op/rank.cpp
@@ -3,8 +3,11 @@
 //
 
 #include "common_op_table.hpp"
+#include "helper_ops/complex_type_mark.hpp"
+#include "openvino/op/constant.hpp"
 #include "openvino/op/shape_of.hpp"
 #include "openvino/op/squeeze.hpp"
+#include "openvino/op/subtract.hpp"
 
 using namespace std;
 using namespace ov::op;
@@ -15,8 +18,21 @@ namespace tensorflow {
 namespace op {
 
 ov::OutputVector translate_rank_op(const NodeContext& node) {
-    default_op_checks(node, 1, {"Rank"});
+    default_op_checks(node, 1, {"Rank"}, true);
     auto input = node.get_input(0);
+    auto complex_type_mark = as_type_ptr<ComplexTypeMark>(input.get_node_shared_ptr());
+    if (complex_type_mark) {
+        input = complex_type_mark->input_value(0);
+        auto input_shape = make_shared<v3::ShapeOf>(input, ov::element::i32);
+
+        auto unsqueeze_input_rank = make_shared<v3::ShapeOf>(input_shape, ov::element::i32);
+        auto input_rank_with_complex = make_shared<v0::Squeeze>(unsqueeze_input_rank);
+        // eliminate the extra dimension
+        auto input_rank =
+            make_shared<v1::Subtract>(input_rank_with_complex, make_shared<v0::Constant>(ov::element::i32, Shape{}, 1));
+        set_node_name(node.get_name(), input_rank);
+        return {input_rank->output(0)};
+    }
     auto input_shape = make_shared<v3::ShapeOf>(input, ov::element::i32);
     auto unsqueeze_input_rank = make_shared<v3::ShapeOf>(input_shape, ov::element::i32);
     auto input_rank = make_shared<v0::Squeeze>(unsqueeze_input_rank);
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Rank.py b/tests/layer_tests/tensorflow_tests/test_tf_Rank.py
index e047b8ecb4ed8b..0a6ef2b08f6f57 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Rank.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Rank.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
+import numpy as np
 import tensorflow as tf
 from common.tf_layer_test_class import CommonTFLayerTest
 
@@ -33,3 +34,44 @@ def test_rank_basic(self, params, ie_device, precision, ir_version, temp_dir,
         self._test(*self.create_rank_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_legacy_frontend=use_legacy_frontend)
+
+class TestComplexRank(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        rng = np.random.default_rng()
+        assert 'param_real:0' in inputs_info
+        assert 'param_imag:0' in inputs_info
+        param_real_shape_1 = inputs_info['param_real:0']
+        param_imag_shape_1 = inputs_info['param_imag:0']
+        inputs_data = {}
+        inputs_data['param_real:0'] = 4 * rng.random(param_real_shape_1).astype(np.float32) - 2
+        inputs_data['param_imag:0'] = 4 * rng.random(param_imag_shape_1).astype(np.float32) - 2
+        return inputs_data
+    
+    def create_rank_net(self, input_shape):
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            input_real = tf.compat.v1.placeholder(tf.float32, input_shape, 'param_real')
+            input_imag = tf.compat.v1.placeholder(tf.float32, input_shape, 'param_imag')
+            input = tf.raw_ops.Complex(real=input_real, imag=input_imag)
+            tf.raw_ops.Rank(input=input)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(input_shape=[]),
+        dict(input_shape=[1]),
+        dict(input_shape=[2, 6]),
+        dict(input_shape=[3, 4, 5, 6])
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_complex_rank(self, params, ie_device, precision, ir_version, temp_dir,
+                        use_legacy_frontend):
+        self._test(*self.create_rank_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_legacy_frontend=use_legacy_frontend)

From 06a669e6612839e5073584e2e2d739a3594465dc Mon Sep 17 00:00:00 2001
From: Andrey Churkin <andrey.churkin@intel.com>
Date: Fri, 1 Mar 2024 11:46:23 +0000
Subject: [PATCH 03/28] Add information about ONNX backend to the docs (#23096)

### Details:
- The quantize with accuracy control supports the ONNX backend. I have
added information on how to use quantize with accuracy control for
`onnx.ModelProto`.

### Tickets:
 - *133388*

---------

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>
---
 .../quantizing-with-accuracy-control.rst      | 36 ++++++++-
 .../nncf/ptq/code/ptq_aa_onnx.py              | 75 +++++++++++++++++++
 2 files changed, 108 insertions(+), 3 deletions(-)
 create mode 100644 docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py

diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst
index eddde03eb6bb4d..ce792da5684e2a 100644
--- a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst
+++ b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst
@@ -14,7 +14,7 @@ This is the advanced quantization flow that allows to apply 8-bit quantization t
 * Since accuracy validation is run several times during the quantization process, quantization with accuracy control can take more time than the :doc:`Basic 8-bit quantization <basic_quantization_flow>` flow.
 * The resulted model can provide smaller performance improvement than the :doc:`Basic 8-bit quantization <basic_quantization_flow>` flow because some of the operations are kept in the original precision.
 
-.. note:: Currently, 8-bit quantization with accuracy control is available only for models in OpenVINO representation.
+.. note:: Currently, 8-bit quantization with accuracy control is available only for models in OpenVINO and onnx.ModelProto representation.
 
 The steps for the quantization with accuracy control are described below.
 
@@ -38,10 +38,18 @@ This step is similar to the :doc:`Basic 8-bit quantization <basic_quantization_f
          :language: python
          :fragment: [dataset]
 
+   .. tab-item:: ONNX
+      :sync: onnx
+
+      .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py
+         :language: python
+         :fragment: [dataset]
+
 Prepare validation function
 ############################
 
-Validation function receives ``openvino.CompiledModel`` object and validation dataset and returns accuracy metric value. The following code snippet shows an example of validation function for OpenVINO model:
+The validation function takes two arguments: a model object and a validation dataset, and it returns the accuracy metric value. The type of the model object varies for different frameworks. In OpenVINO, it is an ``openvino.CompiledModel``. In ONNX, it is an ``onnx.ModelProto``.
+The following code snippet shows an example of a validation function for OpenVINO and ONNX framework:
 
 .. tab-set::
 
@@ -52,10 +60,17 @@ Validation function receives ``openvino.CompiledModel`` object and validation da
          :language: python
          :fragment: [validation]
 
+   .. tab-item:: ONNX
+      :sync: onnx
+
+      .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py
+         :language: python
+         :fragment: [validation]
+
 Run quantization with accuracy control
 #######################################
 
-``nncf.quantize_with_accuracy_control()`` function is used to run the quantization with accuracy control. The following code snippet shows an example of quantization with accuracy control for OpenVINO model:
+``nncf.quantize_with_accuracy_control()`` function is used to run the quantization with accuracy control. The following code snippet shows an example of quantization with accuracy control for OpenVINO and ONNX framework:
 
 .. tab-set::
 
@@ -66,6 +81,13 @@ Run quantization with accuracy control
          :language: python
          :fragment: [quantization]
 
+   .. tab-item:: ONNX
+      :sync: onnx
+
+      .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py
+         :language: python
+         :fragment: [quantization]
+
 * ``max_drop`` defines the accuracy drop threshold. The quantization process stops when the degradation of accuracy metric on the validation dataset is less than the ``max_drop``. The default value is 0.01. NNCF will stop the quantization and report an error if the ``max_drop`` value can't be reached.
 
 * ``drop_type`` defines how the accuracy drop will be calculated: ``ABSOLUTE`` (used by default) or ``RELATIVE``.
@@ -81,6 +103,13 @@ After that the model can be compiled and run with OpenVINO:
          :language: python
          :fragment: [inference]
 
+   .. tab-item:: ONNX
+      :sync: onnx
+
+      .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py
+         :language: python
+         :fragment: [inference]
+
 To save the model in the OpenVINO Intermediate Representation (IR), use ``openvino.save_model()``. When dealing with an original model in FP32 precision, it's advisable to preserve FP32 precision in the most impactful model operations that were reverted from INT8 to FP32. To do this, consider using compress_to_fp16=False during the saving process. This recommendation is based on the default functionality of ``openvino.save_model()``, which saves models in FP16, potentially impacting accuracy through this conversion.
 
 .. tab-set::
@@ -101,6 +130,7 @@ Examples of NNCF post-training quantization with control of accuracy metric:
 
 * `Post-Training Quantization of Anomaly Classification OpenVINO model with control of accuracy metric <https://github.com/openvinotoolkit/nncf/blob/develop/examples/post_training_quantization/openvino/anomaly_stfpm_quantize_with_accuracy_control>`__
 * `Post-Training Quantization of YOLOv8 OpenVINO Model with control of accuracy metric <https://github.com/openvinotoolkit/nncf/blob/develop/examples/post_training_quantization/openvino/yolov8_quantize_with_accuracy_control>`__
+* `Post-Training Quantization of YOLOv8 ONNX Model with control of accuracy metric <https://github.com/openvinotoolkit/nncf/blob/develop/examples/post_training_quantization/onnx/yolov8_quantize_with_accuracy_control>`__
 
 See also
 ####################
diff --git a/docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py b/docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py
new file mode 100644
index 00000000000000..dd32cbdbf1d6bd
--- /dev/null
+++ b/docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py
@@ -0,0 +1,75 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#! [dataset]
+import nncf
+import torch
+
+calibration_loader = torch.utils.data.DataLoader(...)
+
+def transform_fn(data_item):
+    images, _ = data_item
+    return {input_name: images.numpy()} # input_name should be taken from the model, 
+                                        # e.g. model.graph.input[0].name
+
+calibration_dataset = nncf.Dataset(calibration_loader, transform_fn)
+validation_dataset = nncf.Dataset(calibration_loader, transform_fn)
+#! [dataset]
+
+#! [validation]
+import numpy as np
+import torch
+from sklearn.metrics import accuracy_score
+
+import onnx
+import onnxruntime
+
+
+def validate(model: onnx.ModelProto,
+             validation_loader: torch.utils.data.DataLoader) -> float:
+    predictions = []
+    references = []
+
+    input_name = model.graph.input[0].name
+    serialized_model = model.SerializeToString()
+    session = onnxruntime.InferenceSession(serialized_model, providers=["CPUExecutionProvider"])
+    output_names = [output.name for output in session.get_outputs()]
+
+    for images, target in validation_loader:
+        pred = session.run(output_names, input_feed={input_name: images.numpy()})[0]
+        predictions.append(np.argmax(pred, axis=1))
+        references.append(target)
+
+    predictions = np.concatenate(predictions, axis=0)
+    references = np.concatenate(references, axis=0)
+    return accuracy_score(predictions, references)
+#! [validation]
+
+#! [quantization]
+import onnx
+
+model = onnx.load("model_path")
+
+quantized_model = nncf.quantize_with_accuracy_control(
+    model,
+    calibration_dataset=calibration_dataset,
+    validation_dataset=validation_dataset,
+    validation_fn=validate,
+    max_drop=0.01,
+    drop_type=nncf.DropType.ABSOLUTE,
+)
+#! [quantization]
+
+#! [inference]
+import openvino as ov
+
+# convert ONNX model to OpenVINO model
+ov_quantized_model = ov.convert_model(quantized_model)
+
+# compile the model to transform quantized operations to int8
+model_int8 = ov.compile_model(ov_quantized_model)
+
+input_fp32 = ... # FP32 model input
+res = model_int8(input_fp32)
+
+#! [inference]

From dfff4e89c41cc1bd562d3954f71a7e5c53fc95ce Mon Sep 17 00:00:00 2001
From: Sebastian Golebiewski <sebastianx.golebiewski@intel.com>
Date: Fri, 1 Mar 2024 12:57:07 +0100
Subject: [PATCH 04/28] [DOCS] Adding npm distribution to install guides
 (#23164)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding npm distribution of OpenVINO™ Toolkit do install guides.

---------

Co-authored-by: Vishniakov Nikolai <nikolai.vishniakov@intel.com>
---
 .../install-openvino-linux-header.rst         | 10 ++-
 .../install-openvino-macos-header.rst         |  9 ++-
 .../install-openvino-npm.rst                  | 61 +++++++++++++++++++
 .../install-openvino-windows-header.rst       |  7 ++-
 .../api/nodejs_api/nodejs_api.rst             | 30 ---------
 5 files changed, 73 insertions(+), 44 deletions(-)
 create mode 100644 docs/articles_en/get-started/install-openvino-overview/install-openvino-npm.rst

diff --git a/docs/articles_en/get-started/install-openvino-overview/install-openvino-linux-header.rst b/docs/articles_en/get-started/install-openvino-overview/install-openvino-linux-header.rst
index a1e6bba87433aa..6ec4177cdb6290 100644
--- a/docs/articles_en/get-started/install-openvino-overview/install-openvino-linux-header.rst
+++ b/docs/articles_en/get-started/install-openvino-overview/install-openvino-linux-header.rst
@@ -5,8 +5,8 @@ Install OpenVINO™ Runtime on Linux
 
 
 .. meta::
-   :description: Learn how to install OpenVINO™ Runtime on Linux operating system. 
-                 You can use an archive, a PyPi package, APT, YUM, Conda Forge, 
+   :description: Learn how to install OpenVINO™ Runtime on Linux operating system.
+                 You can use an archive, a PyPi package, npm package, APT, YUM, Conda Forge,
                  Homebrew or a Docker image.
 
 
@@ -23,6 +23,7 @@ Install OpenVINO™ Runtime on Linux
    Use Homebrew <openvino_docs_install_guides_installing_openvino_brew>
    Use Conan <openvino_docs_install_guides_installing_openvino_conan>
    Use Docker <openvino_docs_install_guides_installing_openvino_docker>
+   Use npm <openvino_docs_install_guides_installing_openvino_npm>
 
 
 If you want to install OpenVINO™ Runtime on Linux, you have the following options:
@@ -36,8 +37,5 @@ If you want to install OpenVINO™ Runtime on Linux, you have the following opti
 * :doc:`Install OpenVINO using Homebrew <openvino_docs_install_guides_installing_openvino_brew>`
 * :doc:`Install OpenVINO using Docker <openvino_docs_install_guides_installing_openvino_docker>`
 * :doc:`Install OpenVINO using Conan Package Manager <openvino_docs_install_guides_installing_openvino_conan>`
-
-
-
-
+* :doc:`Install OpenVINO using npm <openvino_docs_install_guides_installing_openvino_npm>`
 
diff --git a/docs/articles_en/get-started/install-openvino-overview/install-openvino-macos-header.rst b/docs/articles_en/get-started/install-openvino-overview/install-openvino-macos-header.rst
index 157a3b19829c84..3eeab19b23e618 100644
--- a/docs/articles_en/get-started/install-openvino-overview/install-openvino-macos-header.rst
+++ b/docs/articles_en/get-started/install-openvino-overview/install-openvino-macos-header.rst
@@ -5,8 +5,8 @@ Install OpenVINO™ Runtime for macOS
 
 
 .. meta::
-   :description: Learn how to install OpenVINO™ Runtime on macOS operating 
-                 system. You can use an archive, a PyPi package, Conda Forge 
+   :description: Learn how to install OpenVINO™ Runtime on macOS operating
+                 system. You can use an archive, a PyPi package, npm package, Conda Forge
                  or Homebrew.
 
 
@@ -20,7 +20,7 @@ Install OpenVINO™ Runtime for macOS
    Use Conda Forge <openvino_docs_install_guides_installing_openvino_conda>
    Use vcpkg <openvino_docs_install_guides_installing_openvino_vcpkg>
    Use Conan <openvino_docs_install_guides_installing_openvino_conan>
-
+   Use npm <openvino_docs_install_guides_installing_openvino_npm>
 
 If you want to install OpenVINO™ Runtime on macOS, you have the following options:
 
@@ -31,6 +31,5 @@ If you want to install OpenVINO™ Runtime on macOS, you have the following opti
 * :doc:`Install OpenVINO using Homebrew <openvino_docs_install_guides_installing_openvino_brew>`
 * :doc:`Install OpenVINO using vcpkg <openvino_docs_install_guides_installing_openvino_vcpkg>`
 * :doc:`Install OpenVINO using Conan Package Manager <openvino_docs_install_guides_installing_openvino_conan>`
-
-
+* :doc:`Install OpenVINO using npm <openvino_docs_install_guides_installing_openvino_npm>`
 
diff --git a/docs/articles_en/get-started/install-openvino-overview/install-openvino-npm.rst b/docs/articles_en/get-started/install-openvino-overview/install-openvino-npm.rst
new file mode 100644
index 00000000000000..db8f8e6d2860d2
--- /dev/null
+++ b/docs/articles_en/get-started/install-openvino-overview/install-openvino-npm.rst
@@ -0,0 +1,61 @@
+.. {#openvino_docs_install_guides_installing_openvino_npm}
+
+Install Intel® Distribution of OpenVINO™ Toolkit from npm Registry
+==================================================================
+
+.. meta::
+   :description: Learn how to install OpenVINO™ Runtime on Windows, Linux, and
+                 macOS operating systems, using the npm registry.
+
+
+.. note::
+
+   Note that the npm distribution:
+
+   * offers the JavaScript API only
+   * is dedicated to users of all major OSes: Windows, Linux, and macOS
+     (all x86_64 / arm64 architectures)
+   * macOS offers support only for CPU inference
+
+.. tab-set::
+
+   .. tab-item:: System Requirements
+      :sync: system-requirements
+
+      - Windows, Linux, macOS
+      - x86, ARM (Windows ARM not supported)
+
+   .. tab-item:: Software Requirements
+      :sync: software-requirements
+
+      `Node.js version 20.5.1 and higher <https://nodejs.org/en/download/>`__
+
+
+Installing OpenVINO Node.js
+###########################
+
+1. Make sure that you have installed `Node.js and npm <https://nodejs.org/en/download>`__
+   on your system.
+2. Navigate to your project directory and run the following command in the terminal:
+
+   .. code-block:: sh
+
+      npm install openvino-node
+
+.. note::
+
+   The *openvino-node* npm package runs in Node.js environment only and provides
+   a subset of :doc:`OpenVINO Runtime C++ API <../../api/c_cpp_api/group__ov__cpp__api>`.
+
+What's Next?
+####################
+
+Now that you’ve installed OpenVINO npm package, you’re ready to run your own machine
+learning applications! Explore :doc:`OpenVINO Node.js API <../../api/nodejs_api/nodejs_api>`
+to learn more about how to integrate a model in Node.js applications.
+
+Additional Resources
+####################
+
+- Intel® Distribution of OpenVINO™ toolkit home page: https://software.intel.com/en-us/openvino-toolkit
+- For IoT Libraries & Code Samples, see `Intel® IoT Developer Kit <https://github.com/intel-iot-devkit>`__.
diff --git a/docs/articles_en/get-started/install-openvino-overview/install-openvino-windows-header.rst b/docs/articles_en/get-started/install-openvino-overview/install-openvino-windows-header.rst
index 468cde8b0b3557..a03084819d9800 100644
--- a/docs/articles_en/get-started/install-openvino-overview/install-openvino-windows-header.rst
+++ b/docs/articles_en/get-started/install-openvino-overview/install-openvino-windows-header.rst
@@ -5,8 +5,8 @@ Install OpenVINO™ Runtime on Windows
 
 
 .. meta::
-   :description: Learn how to install OpenVINO™ Runtime on Windows operating 
-                 system. You can use an archive, a PyPi package, Conda Forge, 
+   :description: Learn how to install OpenVINO™ Runtime on Windows operating
+                 system. You can use an archive, a PyPi package, npm package, Conda Forge,
                  or a Docker image.
 
 
@@ -20,6 +20,7 @@ Install OpenVINO™ Runtime on Windows
    Use vcpkg <openvino_docs_install_guides_installing_openvino_vcpkg>
    Use Docker <openvino_docs_install_guides_installing_openvino_docker>
    Use Conan <openvino_docs_install_guides_installing_openvino_conan>
+   Use npm <openvino_docs_install_guides_installing_openvino_npm>
 
 
@@ -31,5 +32,5 @@ If you want to install OpenVINO™ Runtime on Windows, you have the following op
 * :doc:`Install OpenVINO using vcpkg <openvino_docs_install_guides_installing_openvino_vcpkg>`
 * :doc:`Install OpenVINO using Docker <openvino_docs_install_guides_installing_openvino_docker>`
 * :doc:`Install OpenVINO using Conan Package Manager <openvino_docs_install_guides_installing_openvino_conan>`
-
+* :doc:`Install OpenVINO using npm <openvino_docs_install_guides_installing_openvino_npm>`
 
diff --git a/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst b/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst
index adeab58d514d6f..b42a4adf748545 100644
--- a/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst
+++ b/docs/sphinx_setup/api/nodejs_api/nodejs_api.rst
@@ -10,36 +10,6 @@ OpenVINO Node.js API is distributed as an *openvino-node* npm package that conta
 wrappers with TypeScript types descriptions and a script that downloads the OpenVINO Node.js
 bindings for current OS.⠀
 
-System requirements
-###################
-
-.. list-table::
-   :header-rows: 1
-
-   * - Operating System
-     - Architecture
-     - Software
-   * - Windows, Linux, macOS
-     - x86, ARM (Windows ARM not supported)
-     - `Node.js version 20.5.1 and higher <https://nodejs.org/en/download/>`__
-
-
-Install openvino-node package
-#############################
-
-To install the package, use the following command:
-
-.. code-block:: sh
-
-   npm install openvino-node
-
-
-.. note::
-
-   The *openvino-node* npm package runs in Node.js environment only and provides
-   a subset of :doc:`OpenVINO Runtime C++ API <../c_cpp_api/group__ov__cpp__api>`.
-
-
 Use openvino-node package
 #########################
 

From 17bf1f6d841da760708378a46f86612b1fb19456 Mon Sep 17 00:00:00 2001
From: Mateusz Mikolajczyk <mateusz.mikolajczyk@intel.com>
Date: Fri, 1 Mar 2024 14:09:05 +0100
Subject: [PATCH 05/28] [PyAPI][Opset14] Python API ConvertPromoteTypes-14
 (#22988)

### Details:
 - *Python API for ConvertPromoteTypes-14*
 - *...*

### Tickets:
 - *129205*

---------

Co-authored-by: Anastasia Kuporosova <anastasia.kuporosova@intel.com>
---
 .../src/openvino/runtime/opset14/__init__.py  |  1 +
 .../src/openvino/runtime/opset14/ops.py       | 45 +++++++---
 .../test_graph/test_convert_promote_types.py  | 89 +++++++++++++++++++
 3 files changed, 122 insertions(+), 13 deletions(-)
 create mode 100644 src/bindings/python/tests/test_graph/test_convert_promote_types.py

diff --git a/src/bindings/python/src/openvino/runtime/opset14/__init__.py b/src/bindings/python/src/openvino/runtime/opset14/__init__.py
index d419d92321b63f..c43adc3c50a77c 100644
--- a/src/bindings/python/src/openvino/runtime/opset14/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/opset14/__init__.py
@@ -31,6 +31,7 @@
 from openvino.runtime.opset13.ops import constant
 from openvino.runtime.opset1.ops import convert
 from openvino.runtime.opset1.ops import convert_like
+from openvino.runtime.opset14.ops import convert_promote_types
 from openvino.runtime.opset1.ops import convolution
 from openvino.runtime.opset1.ops import convolution_backprop_data
 from openvino.runtime.opset1.ops import cos
diff --git a/src/bindings/python/src/openvino/runtime/opset14/ops.py b/src/bindings/python/src/openvino/runtime/opset14/ops.py
index ea53ef605e14e5..482b2a0d7c2c9b 100644
--- a/src/bindings/python/src/openvino/runtime/opset14/ops.py
+++ b/src/bindings/python/src/openvino/runtime/opset14/ops.py
@@ -4,30 +4,49 @@
 
 """Factory functions for ops added to openvino opset14."""
 from functools import partial
-from typing import Literal, Optional, Union
-import logging
+from typing import Union
 
-import numpy as np
-
-log = logging.getLogger(__name__)
-
-from openvino.runtime import Node
+from openvino.runtime import Node, Type
 from openvino.runtime.opset_utils import _get_node_factory
 from openvino.runtime.utils.decorators import nameable_op
-from openvino.runtime.utils.types import (
-    NodeInput,
-    as_node,
-    as_nodes
-)
+from openvino.runtime.utils.types import NodeInput, as_nodes
 
 _get_node_factory_opset14 = partial(_get_node_factory, "opset14")
 
 
 # -------------------------------------------- ops ------------------------------------------------
+@nameable_op
+def convert_promote_types(
+    left_node: NodeInput,
+    right_node: NodeInput,
+    promote_unsafe: bool = False,
+    pytorch_scalar_promotion: bool = False,
+    u64_integer_promotion_target: Union[str, Type] = "f32",
+) -> Node:
+    """Return a node performing conversion to common type based on promotion rules.
+
+    :param left_node: input node with type to be promoted to common one.
+    :param right_node: input node with type to be promoted to common one.
+    :param promote_unsafe: Bool attribute whether to allow promotions that might result in bit-widening, precision loss and undefined behaviors.
+    :param pytorch_scalar_promotion: Bool attribute whether to promote scalar input to type provided by non-scalar input when number format is matching.
+    :param u64_integer_promotion_target: Element type attribute to select promotion result when inputs are u64 and signed integer.
+
+    :return: The new node performing ConvertPromoteTypes operation.
+    """
+    inputs = as_nodes(left_node, right_node)
+
+    attributes = {
+        "promote_unsafe": promote_unsafe,
+        "pytorch_scalar_promotion": pytorch_scalar_promotion,
+        "u64_integer_promotion_target": u64_integer_promotion_target,
+    }
+    return _get_node_factory_opset14().create("ConvertPromoteTypes", inputs, attributes)
+
+
 @nameable_op
 def inverse(
     data: NodeInput,
-    adjoint: bool = False
+    adjoint: bool = False,
 ) -> Node:
     """Return a node with inverse matrices of the input.
 
diff --git a/src/bindings/python/tests/test_graph/test_convert_promote_types.py b/src/bindings/python/tests/test_graph/test_convert_promote_types.py
new file mode 100644
index 00000000000000..177a00bb51a24b
--- /dev/null
+++ b/src/bindings/python/tests/test_graph/test_convert_promote_types.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+
+import openvino.runtime.opset14 as ops
+from openvino import Type
+
+
+@pytest.mark.parametrize(
+    ("lhs", "rhs", "promote_unsafe", "pytorch_scalar_promotion", "u64_integer_promotion_target", "expected_output_type"),
+    [
+        (([], np.float32), ([2], np.float16), False, False, "f32", Type.f32),
+        (([], np.float32), ([2], np.float16), True, True, Type.f32, Type.f16),
+        (([], np.float32), ([2], np.int8), False, True, "f32", Type.f32),
+        (([], np.uint64), ([2], np.int8), True, False, "f64", Type.f64),
+    ],
+)
+def test_convert_promote_types_param_inputs(lhs, rhs, promote_unsafe, pytorch_scalar_promotion, u64_integer_promotion_target, expected_output_type):
+    lhs_param = ops.parameter(*lhs)
+    rhs_param = ops.parameter(*rhs)
+
+    op = ops.convert_promote_types(lhs_param, rhs_param, promote_unsafe, pytorch_scalar_promotion, u64_integer_promotion_target)
+    attrs = op.get_attributes()
+    assert attrs.get("promote_unsafe") == promote_unsafe
+    assert attrs.get("pytorch_scalar_promotion") == pytorch_scalar_promotion
+    if isinstance(u64_integer_promotion_target, Type):
+        u64_integer_promotion_target = u64_integer_promotion_target.to_string()
+    assert attrs.get("u64_integer_promotion_target") == u64_integer_promotion_target
+    assert op.get_output_size() == 2
+    assert op.get_type_name() == "ConvertPromoteTypes"
+    assert op.get_output_element_type(0) == expected_output_type
+    assert op.get_output_element_type(1) == expected_output_type
+    assert op.get_output_partial_shape(0) == lhs_param.get_output_partial_shape(0)
+    assert op.get_output_partial_shape(1) == rhs_param.get_output_partial_shape(0)
+
+
+@pytest.mark.parametrize(
+    ("lhs", "rhs", "promote_unsafe", "pytorch_scalar_promotion", "u64_integer_promotion_target", "expected_output_type"),
+    [
+        ((1, np.float32), ([2], np.float16), False, False, "f32", Type.f32),
+        ((1, np.float32), ([2], np.float16), True, True, "f32", Type.f16),
+        ((1, np.float32), ([2], np.int8), False, True, Type.f32, Type.f32),
+        ((1, np.uint64), ([2], np.int8), True, False, Type.f64, Type.f64),
+    ],
+)
+def test_convert_promote_types_const_inputs(lhs, rhs, promote_unsafe, pytorch_scalar_promotion, u64_integer_promotion_target, expected_output_type):
+    lhs_param = ops.constant(*lhs)
+    rhs_param = ops.constant(*rhs)
+
+    op = ops.convert_promote_types(lhs_param, rhs_param, promote_unsafe, pytorch_scalar_promotion, u64_integer_promotion_target)
+    attrs = op.get_attributes()
+    assert attrs.get("promote_unsafe") == promote_unsafe
+    assert attrs.get("pytorch_scalar_promotion") == pytorch_scalar_promotion
+    if isinstance(u64_integer_promotion_target, Type):
+        u64_integer_promotion_target = u64_integer_promotion_target.to_string()
+    assert attrs.get("u64_integer_promotion_target") == u64_integer_promotion_target
+    assert op.get_output_size() == 2
+    assert op.get_type_name() == "ConvertPromoteTypes"
+    assert op.get_output_element_type(0) == expected_output_type
+    assert op.get_output_element_type(1) == expected_output_type
+    assert op.get_output_partial_shape(0) == lhs_param.get_output_partial_shape(0)
+    assert op.get_output_partial_shape(1) == rhs_param.get_output_partial_shape(0)
+
+
+@pytest.mark.parametrize(
+    ("lhs", "rhs", "expected_output_type"),
+    [
+        (([4, 4], np.float32), ([2], np.float16), Type.f32),
+        (([], np.uint32), ([4, 4], np.int64), Type.i64),
+        (([], np.uint8), ([4, 4], np.float16), Type.f16),
+    ],
+)
+def test_convert_promote_types_default_attrs(lhs, rhs, expected_output_type):
+    lhs_param = ops.parameter(*lhs)
+    rhs_param = ops.parameter(*rhs)
+    op = ops.convert_promote_types(lhs_param, rhs_param)
+    attrs = op.get_attributes()
+    assert not attrs.get("promote_unsafe")
+    assert not attrs.get("pytorch_scalar_promotion")
+    assert attrs.get("u64_integer_promotion_target") == "f32"
+    assert op.get_output_size() == 2
+    assert op.get_type_name() == "ConvertPromoteTypes"
+    assert op.get_output_element_type(0) == expected_output_type
+    assert op.get_output_element_type(1) == expected_output_type
+    assert op.get_output_partial_shape(0) == lhs_param.get_output_partial_shape(0)
+    assert op.get_output_partial_shape(1) == rhs_param.get_output_partial_shape(0)

From 7b46de73866698ab1f15476d05e39348640f626b Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Sun, 3 Mar 2024 00:08:25 +0400
Subject: [PATCH 06/28] [TF FE] Switch on layer test for LookupTableFindV2 with
 string key (#23197)

**Details:** Switch on layer test for LookupTableFindV2 with string key.
Merge after
https://github.com/openvinotoolkit/openvino_tokenizers/pull/50

**Ticket:** 132669

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../test_tf_LookupTableFind.py                | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py
index 6ff6daeda99045..3c585ff65cec88 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import platform
+
 import numpy as np
 import pytest
 import tensorflow as tf
@@ -35,6 +37,8 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v
         self.keys_type = keys_type
         self.all_keys = all_keys
         self.invalid_key = invalid_key
+        if keys_type == str:
+            keys_type = tf.string
         tf.compat.v1.reset_default_graph()
         # Create the graph and model
         with tf.compat.v1.Session() as sess:
@@ -67,11 +71,10 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v
         dict(keys_type=np.int32, values_type=tf.string, all_keys=[20, 10, 33, -22, 44, 11],
              all_values=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'],
              default_value='UNKNOWN', invalid_key=1000),
-        pytest.param(dict(keys_type=str, values_type=np.int64,
-                          all_keys=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'],
-                          all_values=[200, 100, 0, -3, 10, 1],
-                          default_value=0, invalid_key='AbraCadabra'),
-                     marks=pytest.mark.xfail(reason="132669 - Support LookupTableFindV2 with string key")),
+        dict(keys_type=str, values_type=np.int64,
+             all_keys=['PyTorch', 'TensorFlow', 'JAX', 'Lightning', 'MindSpore', 'OpenVINO'],
+             all_values=[200, 100, 0, -3, 10, 1],
+             default_value=0, invalid_key='AbraCadabra'),
     ]
 
     @pytest.mark.parametrize("hash_table_type", [0, 1])
@@ -81,6 +84,12 @@ def create_lookup_table_find_net(self, hash_table_type, keys_shape, keys_type, v
     @pytest.mark.nightly
     def test_lookup_table_find(self, hash_table_type, keys_shape, params, ie_device, precision, ir_version, temp_dir,
                                use_legacy_frontend):
+        if params['keys_type'] == str and params['values_type'] == np.int64:
+            if platform.system() in ('Darwin') or platform.machine() in ['arm', 'armv7l',
+                                                                         'aarch64',
+                                                                         'arm64',
+                                                                         'ARM64']:
+                pytest.xfail(reason='126314, 132699: Build tokenizers for ARM and MacOS')
         self._test(*self.create_lookup_table_find_net(hash_table_type=hash_table_type,
                                                       keys_shape=keys_shape, **params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,

From 54ac9947aca3697513daf8318f8da8fac9b8b60d Mon Sep 17 00:00:00 2001
From: Oleg Pipikin <oleg.pipikin@intel.com>
Date: Sun, 3 Mar 2024 18:57:29 +0100
Subject: [PATCH 07/28] Fix includes in dev api for conan integration (#23175)

### Details:
 - Fix includes in dev api for conan integration

### Tickets:
 - *ticket-id*
---
 src/inference/src/dev/make_tensor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp
index f28c90ccf4856a..e457b81fc0e850 100644
--- a/src/inference/src/dev/make_tensor.cpp
+++ b/src/inference/src/dev/make_tensor.cpp
@@ -9,6 +9,7 @@
 
 #include "openvino/runtime/iremote_tensor.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/tensor.hpp"
 #ifdef PROXY_PLUGIN_ENABLED
 #    include "openvino/proxy/plugin.hpp"
 #endif

From 608a9981c52b4714453ad5ae5683ecf17442c6e9 Mon Sep 17 00:00:00 2001
From: Wilson Seok <wilson.seok@intel.com>
Date: Mon, 4 Mar 2024 09:29:07 +0900
Subject: [PATCH 08/28] [GPU] update layout.compatible() to check false
 conditions first (#23101)

### Details:
- Update layout.compatible() to check properly for the case where o(or
b) axis is blocked
 - Add unit test case

### Tickets:
 - 132367
---
 src/plugins/intel_gpu/src/runtime/layout.cpp                  | 3 ++-
 src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp
index 70e760bca41557..19e75275997ee6 100644
--- a/src/plugins/intel_gpu/src/runtime/layout.cpp
+++ b/src/plugins/intel_gpu/src/runtime/layout.cpp
@@ -585,7 +585,8 @@ ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format
 // Check a reorder is 1d along feature axis. Or feature size fits to inner block size of feature axis
 static inline bool check_redundant_1d_along_feature(layout const& l1, layout const& l2) {
     // No padding, double blocked format and different data_type
-    if (!l1.data_padding && !l2.data_padding && !format::is_multi_blocked(l1.format) && !format::is_multi_blocked(l2.format) &&
+    if ((l1.get_linear_size() == l2.get_linear_size()) && !l1.data_padding && !l2.data_padding &&
+        !format::is_multi_blocked(l1.format) && !format::is_multi_blocked(l2.format) &&
         l2.data_type == l1.data_type && l2.count() == l1.count()) {
         auto l1_inner_blk = format::is_single_blocked(l1.format) ? l1.format.traits().block_sizes.at(0).second : 1;
         auto l2_inner_blk = format::is_single_blocked(l2.format) ? l2.format.traits().block_sizes.at(0).second : 1;
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp
index 1eeb065d3d2b7b..6a2c8c5a80e991 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/layout_test.cpp
@@ -223,6 +223,8 @@ INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test,
          layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)}, true, true},
         {layout{ov::PartialShape{10, 20}, data_types::f16, format::bfyx},
          layout{ov::PartialShape{10, 20}, data_types::f16, format::os_iyx_osv16}, false, false},
+        {layout{ov::PartialShape{1, 16, 1, 1}, data_types::f16, format::bfyx},
+         layout{ov::PartialShape{1, 16, 1, 1}, data_types::f16, format::os_iyx_osv16}, false, false},
         {layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
          layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::oiyx}, false, true},
         {layout{ov::PartialShape{128, 10}, data_types::f16, format::bfyx},

From 9e2accb57282142716510f09d1a27767d72ff6c1 Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Mon, 4 Mar 2024 09:20:45 +0400
Subject: [PATCH 09/28] Enable skipped tests (#23168)

### Tickets:
- [CVS-55937](https://jira.devtools.intel.com/browse/CVS-55937)
- [CVS-123427](https://jira.devtools.intel.com/browse/CVS-123427)
---
 .../functional/shared_tests_instances/skip_tests_config.cpp    | 2 --
 src/plugins/template/tests/functional/skip_tests_config.cpp    | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 8d5e432d7b87de..1e8852e31e0562 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -184,8 +184,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)",
         // Issue: 123321
         R"(.*smoke_RNNSequenceCommonZeroClip/RNNSequenceTest.Inference.*hidden_size=10.*relu.*)",
-        // Issue: 123427
-        R"(.*RDFTLayerTest.*SignalSize=().*)",
         // Issue: 123815 (Tests are sensintive to available thread count on testing machines)
         R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)",
         // Issue: 122356
diff --git a/src/plugins/template/tests/functional/skip_tests_config.cpp b/src/plugins/template/tests/functional/skip_tests_config.cpp
index eac640ebfe40f3..297cba2660d11d 100644
--- a/src/plugins/template/tests/functional/skip_tests_config.cpp
+++ b/src/plugins/template/tests/functional/skip_tests_config.cpp
@@ -21,9 +21,6 @@ std::vector<std::string> disabledTestPatterns() {
         // unsupported metrics
         R"(.*smoke_OVGetMetricPropsTest.*OVGetMetricPropsTest.*(RANGE_FOR_STREAMS|MAX_BATCH_SIZE).*)",
 
-        // CVS-55937
-        R"(.*SplitLayerTest.*numSplits=30.*)",
-
         // CVS-64094
         R"(.*ReferenceLogSoftmaxLayerTest.*4.*iType=f16.*axis=.*1.*)",
         // CVS-64012

From 5d74236bcaa21246823dadcac87c7f8cc3f14a63 Mon Sep 17 00:00:00 2001
From: linzs148 <56420840+linzs148@users.noreply.github.com>
Date: Mon, 4 Mar 2024 13:58:18 +0800
Subject: [PATCH 10/28] [TF FE] Support complex type for Inv (#23085)

**Ticket:** https://github.com/openvinotoolkit/openvino/issues/22952

---------

Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 .../tensorflow_common/src/op/inv.cpp          | 40 +++++++++++++++-
 .../tensorflow_tests/test_tf_Inv.py           | 46 ++++++++++++++++++-
 2 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/src/frontends/tensorflow_common/src/op/inv.cpp b/src/frontends/tensorflow_common/src/op/inv.cpp
index ec2196219f5033..5af62e2a1764e4 100644
--- a/src/frontends/tensorflow_common/src/op/inv.cpp
+++ b/src/frontends/tensorflow_common/src/op/inv.cpp
@@ -1,10 +1,17 @@
-// Copyright (C) 2018-2023 Intel Corporation
+// Copyright (C) 2018-2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "common_op_table.hpp"
+#include "helper_ops/complex_type_mark.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/concat.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/divide.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/negative.hpp"
+#include "openvino/op/unsqueeze.hpp"
 
 using namespace std;
 using namespace ov::op;
@@ -14,9 +21,38 @@ namespace frontend {
 namespace tensorflow {
 namespace op {
 OutputVector translate_inv_op(const NodeContext& node) {
-    default_op_checks(node, 1, {"Inv"});
+    default_op_checks(node, 1, {"Inv"}, true);
     auto x = node.get_input(0);
 
+    auto complex_type_mark = as_type_ptr<ComplexTypeMark>(x.get_node_shared_ptr());
+    if (complex_type_mark) {
+        x = complex_type_mark->input_value(0);
+        element::Type complex_part_type = complex_type_mark->get_complex_part_type();
+
+        auto gather_index_real = make_shared<v0::Constant>(element::i32, Shape{}, 0);
+        auto gather_index_imag = make_shared<v0::Constant>(element::i32, Shape{}, 1);
+
+        auto minus_one = make_shared<v0::Constant>(element::i32, Shape{1}, -1);
+
+        auto x_real = make_shared<v8::Gather>(x, gather_index_real, minus_one)->output(0);
+        auto x_imag = make_shared<v8::Gather>(x, gather_index_imag, minus_one)->output(0);
+
+        auto scale =
+            make_shared<v1::Add>(make_shared<v1::Multiply>(x_real, x_real), make_shared<v1::Multiply>(x_imag, x_imag));
+
+        auto y_real = make_shared<v1::Divide>(x_real, scale);
+        auto y_imag = make_shared<v1::Divide>(make_shared<v0::Negative>(x_imag), scale);
+
+        auto real_unsqueeze = make_shared<v0::Unsqueeze>(y_real, minus_one);
+        auto imag_unsqueeze = make_shared<v0::Unsqueeze>(y_imag, minus_one);
+
+        auto concat_result = make_shared<v0::Concat>(OutputVector{real_unsqueeze, imag_unsqueeze}, -1);
+        set_node_name(node.get_name(), concat_result);
+
+        auto complex_result = make_shared<ComplexTypeMark>(concat_result->output(0), complex_part_type);
+        return {complex_result};
+    }
+
     // prepare auxiliary one constants of the same type as the inputs
     auto one = create_same_type_const_scalar<int32_t>(x, 1);
 
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Inv.py b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py
index d2350f9841fb6c..875ab763ab81ce 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_Inv.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_Inv.py
@@ -41,4 +41,48 @@ def test_inv_basic(self, params, ie_device, precision, ir_version, temp_dir,
                                          use_legacy_frontend):
         self._test(*self.create_inv_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
-                   use_legacy_frontend=use_legacy_frontend)
\ No newline at end of file
+                   use_legacy_frontend=use_legacy_frontend)
+
+class TestComplexInv(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        rng = np.random.default_rng()
+        assert 'param_real:0' in inputs_info
+        assert 'param_imag:0' in inputs_info
+        param_real_shape_1 = inputs_info['param_real:0']
+        param_imag_shape_1 = inputs_info['param_imag:0']
+        inputs_data = {}
+        inputs_data['param_real:0'] = 4 * rng.random(param_real_shape_1).astype(np.float32) - 2
+        inputs_data['param_imag:0'] = 4 * rng.random(param_imag_shape_1).astype(np.float32) - 2
+        return inputs_data
+
+    def create_complex_inv_net(self, input_shape):
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            param_real = tf.compat.v1.placeholder(np.float32, input_shape, 'param_real')
+            param_imag = tf.compat.v1.placeholder(np.float32, input_shape, 'param_imag')
+            complex = tf.raw_ops.Complex(real=param_real, imag=param_imag)
+            inv = tf.raw_ops.Inv(x=complex, name="complex_inv")
+            real = tf.raw_ops.Real(input=inv)
+            img = tf.raw_ops.Imag(input=inv)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+
+        return tf_net, None
+
+    test_data_basic = [
+        dict(input_shape=[]),
+        dict(input_shape=[2]),
+        dict(input_shape=[1, 3]),
+        dict(input_shape=[2, 3, 4]),
+        dict(input_shape=[3, 4, 5, 6]),
+    ]
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit_tf_fe
+    @pytest.mark.nightly
+    def test_complex_inv(self, params, ie_device, precision, ir_version, temp_dir,
+                         use_legacy_frontend):
+        self._test(
+            *self.create_complex_inv_net(**params),
+            ie_device, precision, ir_version, temp_dir=temp_dir,
+            use_legacy_frontend=use_legacy_frontend)

From 5c7a4bc51f3a2897b72642fd534416e97241d3cb Mon Sep 17 00:00:00 2001
From: Aleksandr Voron <aleksandr.voron@intel.com>
Date: Mon, 4 Mar 2024 07:31:33 +0100
Subject: [PATCH 11/28] [CPU][ARM] Enable f16 eltwise fusing in ACL (#23117)

oneDNN PR: https://github.com/openvinotoolkit/oneDNN/pull/234
---
 .../intel_cpu/src/dnnl_extension_utils.cpp    |   3 +-
 src/plugins/intel_cpu/src/graph_optimizer.cpp |  28 ++--
 src/plugins/intel_cpu/src/nodes/conv.cpp      |   4 +
 .../classes/convolution.cpp                   |  13 ++
 .../classes/convolution.hpp                   |   1 +
 .../instances/common/convolution.cpp          | 149 +-----------------
 .../instances/x64/convolution.cpp             | 133 ++++++++++++++++
 src/plugins/intel_cpu/thirdparty/onednn       |   2 +-
 8 files changed, 167 insertions(+), 166 deletions(-)

diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
index f09b9c5ab2f101..6eced71f2b83fd 100644
--- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@@ -274,7 +274,8 @@ bool DnnlExtensionUtils::isUnarySupportedAsPostOp(Algorithm alg) {
                        Algorithm::EltwiseAbs,
                        Algorithm::EltwiseSqrt,
                        Algorithm::EltwiseSoftRelu,
-                       Algorithm::EltwiseSigmoid);
+                       Algorithm::EltwiseSigmoid,
+                       Algorithm::EltwiseClamp);
 #elif defined(OPENVINO_ARCH_X86_64)
     return one_of(alg, Algorithm::EltwiseRelu,
                        Algorithm::EltwiseGeluErf,
diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
index d85c7fcead4001..5d52cfdfb0155f 100644
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -1496,19 +1496,19 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(Graph &grap
             parent++;
             continue;
         }
-//Disable ACL post-ops in fp16 to avoid performance degradation
-#if defined(OPENVINO_ARCH_ARM64)
-        if (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::f16) {
+
+        auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild();
+        if (parentNode->getType() == Type::BinaryConvolution && !parentNode->canFuse(fuseCandidate)) {
             parent++;
             continue;
         }
-#endif
 
-        auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild();
-        if (parentNode->getType() == Type::BinaryConvolution && !parentNode->canFuse(fuseCandidate)) {
+#if defined(OV_CPU_WITH_ACL)
+        if (!parentNode->getFusedWith().empty()) {
             parent++;
             continue;
         }
+#endif
 
         if (!DnnlExtensionUtils::isUnarySupportedAsPostOp(fuseCandidate->getAlgorithm())) {
             parent++;
@@ -1552,13 +1552,6 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph &graph) {
             parent++;
             continue;
         }
-//Disable ACL post-ops in fp16 to avoid performance degradation
-#if defined(OPENVINO_ARCH_ARM64)
-        if (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::f16) {
-            parent++;
-            continue;
-        }
-#endif
 
         childNode->fuseInto(parentNode);
 
@@ -1686,6 +1679,10 @@ static bool is_data_dependency(const std::shared_ptr<Node> &parent,
  */
 
 void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph &graph) {
+#if !defined(OPENVINO_ARCH_X86) && !defined(OPENVINO_ARCH_X86_64)
+    return;
+#endif
+
     auto &graphNodes = graph.GetNodes();
 
     auto isFusingSupported = [&](NodePtr conv, NodePtr child) {
@@ -1817,11 +1814,6 @@ void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph &graph)
         if (mergedConv->isConstant() && !sum->isConstant())
             continue;
 
-//Disable ACL post-ops in fp16 to avoid performance degradation
-#if defined(OPENVINO_ARCH_ARM64)
-        if (mergedConv->getOriginalInputPrecisionAtPort(0) == ov::element::f16)
-            continue;
-#endif
         // Disable fusing for Add with broadcasing in case of known data ranges. Add with brodcasting triggers
         // non-optimal code path inside Convolution node, so better to avoid fusing at all.
         const auto& shape1 = sum->getInputShapeAtPort(0);
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index 59721c5df76c4d..b89d3a1e21d61a 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -1110,6 +1110,10 @@ std::shared_ptr<MemoryDesc> Convolution::getSrcMemDesc(const dnnl::primitive_des
 }
 
 bool Convolution::canFuse(const NodePtr& node) const {
+#if defined(OV_CPU_WITH_ACL)
+    if (!fusedWith.empty())
+        return false;
+#endif
     return canFuseSimpleOperation(node);
 }
 
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp
index 71d8f0b3e2fa14..856fa9cd151f26 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.cpp
@@ -352,6 +352,19 @@ const std::vector<CPUSpecificParams>& CPUParams_2D() {
     return CPUParams_2D;
 }
 
+const std::vector<CPUSpecificParams>& CPUParams_3D() {
+    static const std::vector<CPUSpecificParams> CPUParams_3D = {
+        //conv_sse42_3D, // not supported jit_sse42 for 3d
+        conv_avx2_3D,
+        conv_avx512_3D,
+        conv_avx2_3D_nspc,
+        conv_avx2_3D_nspc_brgconv,
+        conv_avx512_3D_nspc,
+        conv_avx512_3D_nspc_brgconv
+    };
+    return CPUParams_3D;
+}
+
 const std::vector<CPUSpecificParams>& CPUParams_GEMM_1D() {
     static const std::vector<CPUSpecificParams> CPUParams_GEMM_1D = {
             conv_gemm_1D,
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp
index a6e5faee3e909d..db8b6ca8f943b1 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/convolution.hpp
@@ -72,6 +72,7 @@ class ConvolutionLayerCPUTest : public testing::WithParamInterface<convLayerCPUT
     const std::vector<CPUSpecificParams>& CPUParams_1x1_1D();
     const std::vector<CPUSpecificParams>& CPUParams_1x1_2D();
     const std::vector<CPUSpecificParams>& CPUParams_2D();
+    const std::vector<CPUSpecificParams>& CPUParams_3D();
     const std::vector<CPUSpecificParams>& CPUParams_GEMM_1D();
     const std::vector<CPUSpecificParams>& CPUParams_GEMM_2D();
     const std::vector<CPUSpecificParams>& CPUParams_GEMM_3D();
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp
index b6518f8e8f48f1..09f8dc14660392 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/convolution.cpp
@@ -62,20 +62,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_FP32, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_GEMM_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inShapesGemm2D()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_empty_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -90,20 +76,6 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_empty_fusing, ConvolutionLaye
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_I8_dilated, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_GEMM_2D_dilated(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inShapesGemm2D()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 /* ============= Convolution (2D) ============= */
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_empty_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
@@ -119,41 +91,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_empty_fusing, ConvolutionLayerCPUTes
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-const std::vector<fusingSpecificParams> fusingParamsSet_dynBatch{
-        emptyFusingSpec,
-        fusingSum,
-        fusingAddPerChannel,
-        fusingReluScaleShift
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_dynBatch, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d_dynBatch()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
-                                 ::testing::ValuesIn(fusingParamsSet_dynBatch),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_empty_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -168,20 +105,6 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_empty_fusing, ConvolutionLayerCPUT
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(Conv_2D_I8_dilated, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_2D_dilated(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 const std::vector<CPUSpecificParams> CPUParams_2D_plain_to_blocked = {
         conv_sse42_plain_to_blocked_2D,
         conv_avx2_plain_to_blocked_2D,
@@ -262,16 +185,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_reorder_Conv_2D, ConvolutionLayerCPUTest,
                          ConvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= Convolution (3D) ============= */
-const std::vector<CPUSpecificParams> CPUParams_3D = {
-        //conv_sse42_3D, // not supported jit_sse42 for 3d
-        conv_avx2_3D,
-        conv_avx512_3D,
-        conv_avx2_3D_nspc,
-        conv_avx2_3D_nspc_brgconv,
-        conv_avx512_3D_nspc,
-        conv_avx512_3D_nspc_brgconv
-};
-
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -281,7 +194,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest,
                                          ::testing::Values(ElementType::undefined),
                                          ::testing::ValuesIn(inputShapes3d()),
                                          ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
                                  ::testing::Values(emptyFusingSpec),
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
@@ -295,25 +208,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32_fusingScaleShiftAndFakeQuantizePerCh
                                          ::testing::Values(ElementType::undefined),
                                          ::testing::ValuesIn(inputShapes3d()),
                                          ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
                                  ::testing::Values(fusingScaleShiftAndFakeQuantizePerChannel),
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_3D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes3d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 INSTANTIATE_TEST_SUITE_P(Conv_3D_FP32_dilated, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -323,25 +222,11 @@ INSTANTIATE_TEST_SUITE_P(Conv_3D_FP32_dilated, ConvolutionLayerCPUTest,
                                          ::testing::Values(ElementType::undefined),
                                          ::testing::ValuesIn(inputShapes3d()),
                                          ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
                                  ::testing::Values(emptyFusingSpec),
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(Conv_3D_I8_dilated, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_3D_dilated(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes3d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D)),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 const std::vector<CPUSpecificParams> CPUParams_3D_plain_to_blocked = {
         conv_avx2_plain_to_blocked_3D,
         conv_avx512_plain_to_blocked_3D,
@@ -419,20 +304,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32_empty_fusing, ConvolutionLayerCP
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_1x1_1D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes1d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_1D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 /* ============= Kernel_1x1 (2D) ============= */
 
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_empty_fusing, ConvolutionLayerCPUTest,
@@ -449,20 +320,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_empty_fusing, ConvolutionLayerCP
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
-                         ::testing::Combine(
-                                 ::testing::Combine(
-                                         convParams_ExplicitPadding_1x1_2D(),
-                                         ::testing::Values(ElementType::f32),
-                                         ::testing::Values(ElementType::i8),
-                                         ::testing::Values(ElementType::undefined),
-                                         ::testing::ValuesIn(inputShapes2d()),
-                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
-                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_2D())),
-                                 ::testing::Values(fusingSum),
-                                 ::testing::Values(empty_plugin_config)),
-                         ConvolutionLayerCPUTest::getTestCaseName);
-
 /* ============= Convolution auto padding tests ============= */
 
 const auto convParams_AutoPadding_2D = ::testing::Combine(
diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp
index 8073b67b726eb8..2d1638856386f3 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/convolution.cpp
@@ -113,6 +113,34 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_I8, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_GEMM_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inShapesGemm2D()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_I8_dilated, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_GEMM_2D_dilated(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inShapesGemm2D()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_GEMM_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_BF16, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -169,6 +197,27 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_GEMM_FP32_dilated_fusing, ConvolutionLayerCPUTe
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+const std::vector<fusingSpecificParams> fusingParamsSet_dynBatch{
+        emptyFusingSpec,
+        fusingSum,
+        fusingAddPerChannel,
+        fusingReluScaleShift
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_dynBatch, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d_dynBatch()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
+                                 ::testing::ValuesIn(fusingParamsSet_dynBatch),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_FP32_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
@@ -197,6 +246,34 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_FP32_dilated_fusing, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(Conv_2D_I8_dilated, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_2D_dilated(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 /* ============= Kernel_1x1 (1D) ============= */
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32_fusing, ConvolutionLayerCPUTest,
                          ::testing::Combine(
@@ -226,6 +303,34 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_FP32_fusing, ConvolutionLayerCPUTest,
                                  ::testing::Values(empty_plugin_config)),
                          ConvolutionLayerCPUTest::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_1x1_1D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes1d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_1D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_1x1_2D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes2d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_1x1_2D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 /* ============= Convolution (1D) ============= */
 const auto convParams_ExplicitPadding_1D = ::testing::Combine(
         ::testing::ValuesIn(kernels1d()),
@@ -503,6 +608,34 @@ INSTANTIATE_TEST_SUITE_P(Conv_2D_Jit_Planar_FP32_dilated, ConvolutionLayerCPUTes
                          ConvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= Convolution (GEMM 3D) ============= */
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_3D(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes3d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(Conv_3D_I8_dilated, ConvolutionLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         convParams_ExplicitPadding_3D_dilated(),
+                                         ::testing::Values(ElementType::f32),
+                                         ::testing::Values(ElementType::i8),
+                                         ::testing::Values(ElementType::undefined),
+                                         ::testing::ValuesIn(inputShapes3d()),
+                                         ::testing::Values(ov::test::utils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfo(CPUParams_3D())),
+                                 ::testing::Values(fusingSum),
+                                 ::testing::Values(empty_plugin_config)),
+                         ConvolutionLayerCPUTest::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_GEMM_FP32, ConvolutionLayerCPUTest,
                          ::testing::Combine(
                                  ::testing::Combine(
diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn
index b2cdc2cfdec616..0f94c0e7b94f64 160000
--- a/src/plugins/intel_cpu/thirdparty/onednn
+++ b/src/plugins/intel_cpu/thirdparty/onednn
@@ -1 +1 @@
-Subproject commit b2cdc2cfdec61638f941ccdfb0b9dbcc27a7c333
+Subproject commit 0f94c0e7b94f64df2b94929279bbeb4f576a6a36

From 74570b748e803c04319c258a26534236c8cb02a5 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Mon, 4 Mar 2024 07:47:35 +0100
Subject: [PATCH 12/28] [Coverity] In IStreamsExecutor::Config use move instead
 copy (#23106)

### Details:
- Fix Coverity reported issues `COPY_INSTEAD_OF_MOVE` in
`ov::threading::IStreamsExecutor::Config` constructor.

### Tickets:
 - CID 1518201
 - CID 1518243
---
 .../dev_api/openvino/runtime/threading/istreams_executor.hpp  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
index 7f5693f24ce4fb..b3b8be765e630a 100644
--- a/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
+++ b/src/inference/dev_api/openvino/runtime/threading/istreams_executor.hpp
@@ -125,7 +125,7 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor {
                PreferredCoreType threadPreferredCoreType = PreferredCoreType::ANY,
                std::vector<std::vector<int>> streamsInfoTable = {},
                bool cpuReservation = false)
-            : _name{name},
+            : _name{std::move(name)},
               _streams{streams},
               _threads_per_stream{threadsPerStream},
               _threadBindingType{threadBindingType},
@@ -133,7 +133,7 @@ class OPENVINO_RUNTIME_API IStreamsExecutor : virtual public ITaskExecutor {
               _threadBindingOffset{threadBindingOffset},
               _threads{threads},
               _thread_preferred_core_type(threadPreferredCoreType),
-              _streams_info_table{streamsInfoTable},
+              _streams_info_table{std::move(streamsInfoTable)},
               _cpu_reservation{cpuReservation} {
             update_executor_config();
         }

From a605edbd776ca4c14f4efb120403a74911ba9c27 Mon Sep 17 00:00:00 2001
From: Sun Xiaoxia <xiaoxia.sun@intel.com>
Date: Mon, 4 Mar 2024 15:44:36 +0800
Subject: [PATCH 13/28] Fix performance regression of conformance tests
 (#22938)

### Details:

fix performance regression of conformance tests

Root cause is `make_default_multi_threaded()` API which was refactored
in PR22414. This API is used to calculate the number of threads.
**Master version**: When the test machine has both Pcore and Ecore, it
recognized the machine type and will select proper cores (Pcores are
selected in general machine which pcores >Ecores / 2) to create
executor.
**Old version**: the input parameter `_threadBindingType` of config was
used to judge the type of current machine, and `_threadBindingType` is
always set to default value `None` in template plugin. So hybrid core
machine is regarded to core machine and physical cores (include Pcores
and Ecores) are used. `_threadBindingType` is deprecated in master now.
For example: ADL i9-12900K, 8 Pcore, 8Ecore.
Old version: threads=16 in any cores.
Master: threads=8 in Pcores.
Actually, the threads calculated from `make_default_multi_threaded` of
master version is the best option in normal situation which not create
multi threads in app side. But in conformance test, 24 threads are
created to seize CPU resource. Of cause, all cores are must be used to
achieved the best performance.


### Tickets:
 - *CVS-131820*

---------

Co-authored-by: Wanglei Shen <wanglei.shen@intel.com>
---
 .github/workflows/linux.yml                   |  2 +-
 .../src/dev/threading/istreams_executor.cpp   | 24 +++--------------
 .../unit/make_default_multi_threaded_test.cpp | 26 ++++++++++++++-----
 3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 46493c55bc3ab3..0e2ee84264d6a6 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -318,7 +318,7 @@ jobs:
 
   Conformance:
     needs: [ Build, Smart_CI ]
-    timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 30 }}
+    timeout-minutes: ${{ matrix.TEST_TYPE == 'API' && 5 || 20 }}
     defaults:
       run:
         shell: bash
diff --git a/src/inference/src/dev/threading/istreams_executor.cpp b/src/inference/src/dev/threading/istreams_executor.cpp
index b7151e15b74e5e..fd75cf7d8a8a5d 100644
--- a/src/inference/src/dev/threading/istreams_executor.cpp
+++ b/src/inference/src/dev/threading/istreams_executor.cpp
@@ -32,7 +32,7 @@ void IStreamsExecutor::Config::set_property(const ov::AnyMap& property) {
         if (key == ov::num_streams) {
             auto streams = value.as<ov::streams::Num>();
             if (streams == ov::streams::NUMA) {
-                _streams = 1;
+                _streams = get_num_numa_nodes();
             } else if (streams == ov::streams::AUTO) {
                 // bare minimum of streams (that evenly divides available number of cores)
                 _streams = get_default_num_streams();
@@ -114,29 +114,11 @@ IStreamsExecutor::Config IStreamsExecutor::Config::make_default_multi_threaded(
         return streamConfig;
     }
 
-    const auto numa_nodes = proc_type_table.size() > 1 ? proc_type_table.size() - 1 : proc_type_table.size();
-    const bool latency_case = static_cast<size_t>(streamConfig._streams) <= numa_nodes;
+    int num_cores = proc_type_table[0][ALL_PROC];
 
-    // by default, do not use the hyper-threading (to minimize threads synch overheads)
-    int num_cores = !latency_case && numa_nodes == 1
-                        ? proc_type_table[0][ALL_PROC]
-                        : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC];
-
-    // additional latency-case logic for hybrid processors:
     if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && proc_type_table[0][MAIN_CORE_PROC] > 0) {
         if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::ANY) {
-            // by default the latency case uses (faster) Big cores only, depending on the compute ratio
-            const bool big_only = proc_type_table[0][MAIN_CORE_PROC] > (proc_type_table[0][EFFICIENT_CORE_PROC] / 2);
-            // selecting the preferred core type
-            if (big_only) {
-                streamConfig._thread_preferred_core_type = IStreamsExecutor::Config::PreferredCoreType::BIG;
-                const int hyper_threading_threshold =
-                    2;  // min #cores, for which the hyper-threading becomes useful for the latency case
-                // additionally selecting the #cores to use in the "Big-only" case
-                num_cores = (proc_type_table[0][MAIN_CORE_PROC] <= hyper_threading_threshold)
-                                ? proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC]
-                                : proc_type_table[0][MAIN_CORE_PROC];
-            }
+            num_cores = proc_type_table[0][ALL_PROC];
         } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::BIG) {
             num_cores = proc_type_table[0][MAIN_CORE_PROC];
         } else if (streamConfig._thread_preferred_core_type == IStreamsExecutor::Config::LITTLE) {
diff --git a/src/inference/tests/unit/make_default_multi_threaded_test.cpp b/src/inference/tests/unit/make_default_multi_threaded_test.cpp
index e1917394507045..40dafe9911b69b 100644
--- a/src/inference/tests/unit/make_default_multi_threaded_test.cpp
+++ b/src/inference/tests/unit/make_default_multi_threaded_test.cpp
@@ -51,7 +51,9 @@ MakeDefaultMultiThreadsTestCase _1sockets_streams_1 = {
     1,  // param[in]: the number of streams
     // param[out]: streams info table
     {
-        {1, 1, 6, 0, 0},
+        {1, 0, 12, 0, 0},
+        {0, 1, 6, 0, 0},
+        {0, 3, 6, 0, 0},
     },
 };
 
@@ -74,9 +76,11 @@ MakeDefaultMultiThreadsTestCase _2sockets_streams_1 = {
     },
     1,
     {
-        {1, 0, 36, -1, -1},
+        {1, 0, 72, -1, -1},
         {0, 1, 18, 0, 0},
         {0, 1, 18, 1, 1},
+        {0, 3, 18, 0, 0},
+        {0, 3, 18, 1, 1},
     },
 };
 
@@ -88,8 +92,10 @@ MakeDefaultMultiThreadsTestCase _2sockets_streams_4 = {
     },
     4,
     {
-        {2, 1, 9, 0, 0},
-        {2, 1, 9, 1, 1},
+        {1, 1, 18, 0, 0},
+        {1, 1, 18, 1, 1},
+        {1, 3, 18, 0, 0},
+        {1, 3, 18, 1, 1},
     },
 };
 
@@ -99,7 +105,10 @@ MakeDefaultMultiThreadsTestCase _pecore24_streams_1 = {
     },
     1,
     {
-        {1, 1, 8, 0, 0},
+        {1, 0, 24, 0, 0},
+        {0, 1, 8, 0, 0},
+        {0, 2, 8, 0, 0},
+        {0, 3, 8, 0, 0},
     },
 };
 
@@ -109,7 +118,9 @@ MakeDefaultMultiThreadsTestCase _pecore24_streams_3 = {
     },
     3,
     {
-        {3, 1, 2, 0, 0},
+        {1, 1, 8, 0, 0},
+        {1, 2, 8, 0, 0},
+        {1, 3, 8, 0, 0},
     },
 };
 
@@ -119,9 +130,10 @@ MakeDefaultMultiThreadsTestCase _pecore32_streams_1 = {
     },
     1,
     {
-        {1, 0, 24, 0, 0},
+        {1, 0, 32, 0, 0},
         {0, 1, 8, 0, 0},
         {0, 2, 16, 0, 0},
+        {0, 3, 8, 0, 0},
     },
 };
 

From b1a19fc62c3a485c32dd0e52a2983ca022e59da9 Mon Sep 17 00:00:00 2001
From: Wanglei Shen <wanglei.shen@intel.com>
Date: Mon, 4 Mar 2024 15:44:58 +0800
Subject: [PATCH 14/28] add get_streams_info_table() test cases for 4 cores and
 8 cores CPUs (#23156)

### Details:
- *add get_streams_info_table() test cases for 4 cores and 8 cores CPUs*
 - *...*

### Tickets:
 - *ticket-id*
---
 .../streams_info/streams_info_table_test.cpp  | 70 ++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
index 1fce6e02e96f8f..204501b6046ec0 100644
--- a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp
@@ -1821,6 +1821,32 @@ StreamsCalculationTestCase _1sockets_6cores_tput_4 = {
     {{6, MAIN_CORE_PROC, 1, 0, 0}, {6, HYPER_THREADING_PROC, 1, 0, 0}},
 };
 
+StreamsCalculationTestCase _1sockets_4cores_latency_1 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "LATENCY",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM,
+    {{4, 4, 0, 0, 0, 0}},
+    {{1, MAIN_CORE_PROC, 4, 0, 0}},
+};
+
+StreamsCalculationTestCase _1sockets_4cores_tput_1 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "THROUGHPUT",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM,
+    {{4, 4, 0, 0, 0, 0}},
+    {{2, MAIN_CORE_PROC, 2, 0, 0}},
+};
+
 StreamsCalculationTestCase _1sockets_ecores_latency_1 = {
     1,
     false,
@@ -1976,6 +2002,20 @@ StreamsCalculationTestCase _1sockets_mock_tput_3 = {
     {{19, 19, 0, 0, -1, -1}, {11, 11, 0, 0, 0, 0}, {8, 8, 0, 0, 1, 1}},
     {{5, MAIN_CORE_PROC, 2, 0, 0}, {4, MAIN_CORE_PROC, 2, 1, 1}},
 };
+
+StreamsCalculationTestCase _1sockets_mock_tput_4 = {
+    1,
+    false,
+    0,
+    0,
+    0,
+    0,
+    "THROUGHPUT",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_PLATFORM,
+    {{8, 8, 0, 0, 0, 0}},
+    {{4, MAIN_CORE_PROC, 2, 0, 0}},
+};
+
 StreamsCalculationTestCase _2sockets_mock_latency_1 = {
     1,
     false,
@@ -2286,6 +2326,30 @@ StreamsCalculationTestCase _2sockets_mock_latency_21 = {
      {0, HYPER_THREADING_PROC, 20, 6, 6},
      {0, MAIN_CORE_PROC, 10, 0, 0}},
 };
+StreamsCalculationTestCase _2sockets_mock_latency_22 = {
+    1,
+    false,
+    200,
+    0,
+    0,
+    3,
+    "LATENCY",
+    ov::intel_cpu::Config::LatencyThreadingMode::PER_SOCKET,
+    {{200, 100, 0, 100, -1, -1},
+     {80, 40, 0, 40, 0, 0},
+     {60, 30, 0, 30, 1, 1},
+     {40, 20, 0, 20, 2, 2},
+     {20, 10, 0, 10, 3, 3}},
+    {{1, ALL_PROC, 200, -1, -1},
+     {0, MAIN_CORE_PROC, 10, 3, 3},
+     {0, HYPER_THREADING_PROC, 10, 3, 3},
+     {0, MAIN_CORE_PROC, 40, 0, 0},
+     {0, MAIN_CORE_PROC, 30, 1, 1},
+     {0, MAIN_CORE_PROC, 20, 2, 2},
+     {0, HYPER_THREADING_PROC, 40, 0, 0},
+     {0, HYPER_THREADING_PROC, 30, 1, 1},
+     {0, HYPER_THREADING_PROC, 20, 2, 2}},
+};
 
 TEST_P(StreamsCalculationTests, StreamsCalculation) {}
 
@@ -2419,6 +2483,8 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_6cores_tput_2,
                                          _1sockets_6cores_tput_3,
                                          _1sockets_6cores_tput_4,
+                                         _1sockets_4cores_latency_1,
+                                         _1sockets_4cores_tput_1,
                                          _1sockets_ecores_latency_1,
                                          _1sockets_ecores_latency_2,
                                          _1sockets_ecores_latency_3,
@@ -2431,6 +2497,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _1sockets_mock_tput_1,
                                          _1sockets_mock_tput_2,
                                          _1sockets_mock_tput_3,
+                                         _1sockets_mock_tput_4,
                                          _2sockets_mock_latency_1,
                                          _2sockets_mock_latency_2,
                                          _2sockets_mock_latency_3,
@@ -2451,6 +2518,7 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable,
                                          _2sockets_mock_latency_18,
                                          _2sockets_mock_latency_19,
                                          _2sockets_mock_latency_20,
-                                         _2sockets_mock_latency_21));
+                                         _2sockets_mock_latency_21,
+                                         _2sockets_mock_latency_22));
 
 }  // namespace
\ No newline at end of file

From df32562523349c325d35afcb056574c8e368dd3e Mon Sep 17 00:00:00 2001
From: Piotr Kowalczyk <piotr.kowalczyk@intel.com>
Date: Mon, 4 Mar 2024 08:58:11 +0100
Subject: [PATCH 15/28] [GPU]: Slice op supports dynamic shapes (#22935)

### Details:
 - Slice op on GPU supports dynamic shapes

### Tickets:
 - *CVS-129985*
 - *CVS-100235*
---
 .../intel_gpu/src/graph/impls/ocl/slice.cpp   | 205 +++++++++++------
 .../intel_gpu/src/graph/include/slice_inst.h  |  60 +++++
 src/plugins/intel_gpu/src/graph/slice.cpp     |  74 +++++-
 .../kernel_selector/cl_kernels/slice_ref.cl   | 103 +++++++--
 .../kernels/slice/slice_kernel_ref.cpp        | 102 +++++++--
 .../kernels/slice/slice_kernel_ref.h          |  10 +-
 .../single_layer_tests/slice.cpp              |  15 ++
 .../tests/unit/test_cases/slice_gpu_test.cpp  | 216 +++++++++++++++---
 8 files changed, 623 insertions(+), 162 deletions(-)

diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp
index 7573acd46d3153..cd778650b12a3c 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/slice.cpp
@@ -2,33 +2,32 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "primitive_base.hpp"
+#include <algorithm>
+#include <cstddef>
 
-#include "slice_inst.h"
 #include "data_inst.h"
-#include "slice/slice_kernel_selector.h"
+#include "primitive_base.hpp"
 #include "slice/slice_kernel_ref.h"
-
-#include <algorithm>
-#include <cstddef>
+#include "slice/slice_kernel_selector.h"
+#include "slice_inst.h"
 
 namespace cldnn {
 namespace ocl {
 
 namespace {
-template<typename T, class = typename std::enable_if<std::is_integral<T>::value>::type>
-std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
+template <typename T, class = typename std::enable_if<std::is_integral<T>::value>::type>
+std::vector<std::int64_t> extractIntegerData(const data_node& node, const stream& stream) {
     mem_lock<T> lock{node.get_attached_memory_ptr(), stream};
     T* data = lock.data();
-    std::vector<std::int32_t> integer_data;
+    std::vector<std::int64_t> integer_data;
     integer_data.reserve(node.get_output_layout().count());
     for (size_t i = 0; i < node.get_output_layout().count(); i++) {
-        integer_data.emplace_back(static_cast<std::int32_t>(data[i]));
+        integer_data.emplace_back(static_cast<std::int64_t>(data[i]));
     }
     return integer_data;
 }
 
-std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
+std::vector<std::int64_t> extractIntegerData(const data_node& node, const stream& stream) {
     auto dt = node.get_output_layout().data_type;
     switch (dt) {
     case data_types::u8:
@@ -40,22 +39,16 @@ std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream
     case data_types::i64:
         return extractIntegerData<std::int64_t>(node, stream);
     default:
-        OPENVINO_ASSERT(false, "[GPU] Slice parameters should be of integral type for node ", node.id(), " while got ", dt);
+        OPENVINO_ASSERT(false,
+                        "[GPU] Slice parameters should be of integral type for node ",
+                        node.id(),
+                        " while got ",
+                        dt);
     }
     return {};
 }
 
-std::vector<std::int32_t> extractShape(kernel_selector::Tensor::DataTensor& tensor) {
-    auto logical_dims = tensor.LogicalDims();
-    // LogicalDims method returns dims in reversed order
-    std::vector<int32_t> reverse_logical_dims;
-    for (auto it = logical_dims.rbegin(); it != logical_dims.rend(); ++it) {
-        reverse_logical_dims.push_back(static_cast<int32_t>(*it));
-    }
-    return reverse_logical_dims;
-}
-
-} // namespace
+}  // namespace
 
 struct slice_impl : typed_primitive_impl_ocl<slice> {
     using parent = typed_primitive_impl_ocl<slice>;
@@ -63,81 +56,141 @@ struct slice_impl : typed_primitive_impl_ocl<slice> {
     using kernel_selector_t = kernel_selector::slice_kernel_selector;
     using kernel_params_t = kernel_selector::slice_params;
 
-    enum InputIndices {
-        kData,
-        kStart,
-        kEnd,
-        kStep,
-        kAxes,
-        kInputsNum
-    };
-
     DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::ocl::slice_impl)
 
     std::unique_ptr<primitive_impl> clone() const override {
         return make_unique<slice_impl>(*this);
     }
 
+    void load(BinaryInputBuffer& ib) override {
+        parent::load(ib);
+        if (is_dynamic()) {
+            auto& kernel_selector = kernel_selector_t::Instance();
+            auto kernel_impl = kernel_selector.GetImplementation(_kernel_data.kernelName);
+            kernel_impl->GetUpdateDispatchDataFunc(_kernel_data);
+        }
+    }
+
+    kernel_arguments_data get_arguments(const slice_inst& instance) const override {
+        kernel_arguments_data args;
+
+        const SliceKernelRefNeededInputs inputs = SliceKernelRefNeededInputs::Create(*instance.node);
+
+        for (auto idx : inputs.GetNeededInputIndexes()) {
+            args.inputs.push_back(instance.input_memory_ptr(idx));
+        }
+
+        for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
+            args.outputs.push_back(instance.output_memory_ptr(i));
+        }
+
+        args.shape_info = instance.shape_info_memory_ptr();
+        return args;
+    }
+
     static std::unique_ptr<primitive_impl> create(const slice_node& arg, const kernel_impl_params& impl_param) {
-        auto params = get_default_params<kernel_selector::slice_params>(impl_param);
-        const auto& inputs = arg.get_dependencies();
-        const stream& stream = arg.get_program().get_stream();
-        auto start_elts = extractIntegerData(inputs[InputIndices::kStart].first->as<data>(), stream);
-        auto end_elts = extractIntegerData(inputs[InputIndices::kEnd].first->as<data>(), stream);
-        auto step_elts = extractIntegerData(inputs[InputIndices::kStep].first->as<data>(), stream);
-        auto data_shape = extractShape(params.inputs[0]);
-        std::vector<std::int32_t> axes(data_shape.size());
-        if (inputs.size() == InputIndices::kInputsNum)
-            axes = extractIntegerData(inputs[InputIndices::kAxes].first->as<data>(), stream);
-        else
-            std::iota(axes.begin(), axes.end(), 0);
-        std::vector<std::int32_t> selected_start(data_shape.size(), 0);
-        std::vector<std::int32_t> selected_step(data_shape.size(), 1);
-        std::vector<std::int32_t> selected_end(data_shape);
-        for (size_t axis = 0; axis < axes.size(); axis++) {
-            auto transformed_axe = axes[axis] < 0 ? data_shape.size() + axes[axis] : axes[axis];
-            auto start = start_elts[axis];
-            auto end = end_elts[axis];
-            auto dim_size = data_shape[transformed_axe];
-            selected_start[transformed_axe] = std::max(std::min(start < 0 ? dim_size + start : start, dim_size - 1), 0);
-            selected_end[transformed_axe] = std::max(std::min(end < 0 ? dim_size + end : end, dim_size - 1), 0);
-            selected_step[transformed_axe] = step_elts[axis];
+        auto params = get_default_params<kernel_selector::slice_params>(impl_param, impl_param.is_dynamic());
+        const auto input_rank = params.inputs[0].Dimentions();
+
+        if (!PrepareInput(arg,
+                          SliceKernelRefNeededInputs::kStart,
+                          params.compile_time_start,
+                          params.start_data_type,
+                          params.inputs)) {
+            // No kStart input - set it to default:
+            params.axes_data_type = kernel_selector::Datatype::INT64;
+            params.compile_time_start = std::vector<int64_t>(input_rank, 0);
+        }
+
+        // NOTE: Stop input is not used by the slice kernel, as this information
+        // is implicitely passed with output shape.
+
+        if (!PrepareInput(arg,
+                          SliceKernelRefNeededInputs::kStep,
+                          params.compile_time_step,
+                          params.step_data_type,
+                          params.inputs)) {
+            // No kStep input - set it to default:
+            params.axes_data_type = kernel_selector::Datatype::INT64;
+            params.compile_time_step = std::vector<int64_t>(input_rank, 1);
+        }
+
+        if (!PrepareInput(arg,
+                          SliceKernelRefNeededInputs::kAxes,
+                          params.compile_time_axes,
+                          params.axes_data_type,
+                          params.inputs)) {
+            // No kAxes input - set it to default:
+            params.axes_data_type = kernel_selector::Datatype::INT64;
+            params.compile_time_axes.resize(input_rank);
+            std::iota(params.compile_time_axes.begin(), params.compile_time_axes.end(), 0);
         }
-        params.start = std::move(selected_start);
-        params.end = std::move(selected_end);
-        params.step = std::move(selected_step);
+
+        // Transform compile time axes:
+        for (size_t axis = 0; axis < params.compile_time_axes.size(); ++axis) {
+            const int64_t transformed_axe = params.compile_time_axes[axis] < 0
+                                             ? input_rank + params.compile_time_axes[axis]
+                                             : params.compile_time_axes[axis];
+            params.compile_time_axes[axis] = transformed_axe;
+        }
+
         params.set_dynamic_shape_offsets();
-        auto &kernel_selector =
-                kernel_selector::slice_kernel_selector::Instance();
+        auto& kernel_selector = kernel_selector::slice_kernel_selector::Instance();
         auto best_kernel = kernel_selector.get_best_kernel(params);
 
         return make_unique<slice_impl>(best_kernel);
     }
+
+    void update_dispatch_data(const kernel_impl_params& impl_param) override {
+        auto kernel_params = get_default_params<kernel_selector::slice_params>(impl_param, true);
+        (_kernel_data.update_dispatch_data_func)(kernel_params, _kernel_data);
+    }
+
+private:
+    // Returns true if input was prepared(was avaiable in node def), false otherwise.
+    static bool PrepareInput(const slice_node& arg,
+                             SliceKernelRefNeededInputs::InputIndices idx,
+                             std::vector<std::int64_t>& out_compile_time_buff,
+                             kernel_selector::Datatype& out_buff_data_type,
+                             kernel_selector::MultiDataTensor& out_runtime_inputs) {
+        const stream& stream = arg.get_program().get_stream();
+        const auto& inputs = arg.get_dependencies();
+
+        if (inputs.size() <= idx)
+            return false;
+
+        const SliceKernelRefNeededInputs kernel_needed_inputs = SliceKernelRefNeededInputs::Create(arg);
+        if (kernel_needed_inputs.IsInputNeededInRuntime(idx)) {
+            const auto layout = inputs[idx].first->get_output_layout(0);
+            out_buff_data_type = to_data_type(layout.data_type);
+            out_compile_time_buff.clear();
+            out_runtime_inputs.push_back(convert_data_tensor(layout));
+        } else {
+            out_buff_data_type = kernel_selector::Datatype::INT64;
+            out_compile_time_buff = extractIntegerData(inputs[idx].first->as<data>(), stream);
+        }
+
+        return true;
+    }
 };
 
 namespace detail {
 
 attach_slice_impl::attach_slice_impl() {
-    implementation_map<slice>::add(impl_types::ocl, slice_impl::create, {
-        std::make_tuple(data_types::f16, format::bfyx),
-        std::make_tuple(data_types::f32, format::bfyx),
-        std::make_tuple(data_types::u8, format::bfyx),
-        std::make_tuple(data_types::i8, format::bfyx),
-        std::make_tuple(data_types::i32, format::bfyx),
-        std::make_tuple(data_types::i64, format::bfyx),
-        std::make_tuple(data_types::f16, format::bfzyx),
-        std::make_tuple(data_types::f32, format::bfzyx),
-        std::make_tuple(data_types::u8, format::bfyx),
-        std::make_tuple(data_types::i8, format::bfyx),
-        std::make_tuple(data_types::i32, format::bfzyx),
-        std::make_tuple(data_types::i64, format::bfzyx),
-    });
+    auto types = {data_types::f32, data_types::f16, data_types::i8, data_types::u8, data_types::i32, data_types::i64};
+
+    auto formats = {
+        format::bfyx,
+        format::bfzyx,
+    };
+
+    implementation_map<slice>::add(impl_types::ocl, shape_types::any, slice_impl::create, types, formats);
 }
 
 }  // namespace detail
 
-} // namespace ocl
-} // namespace cldnn
+}  // namespace ocl
+}  // namespace cldnn
 
 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::slice_impl)
 BIND_BINARY_BUFFER_WITH_TYPE(cldnn::slice)
diff --git a/src/plugins/intel_gpu/src/graph/include/slice_inst.h b/src/plugins/intel_gpu/src/graph/include/slice_inst.h
index 189a2a6096fea6..09425f20f5dd47 100644
--- a/src/plugins/intel_gpu/src/graph/include/slice_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/slice_inst.h
@@ -10,6 +10,45 @@ namespace cldnn {
 
 using slice_node = typed_program_node<slice>;
 
+// This class is needed to have one place where decision
+// is made which Slice inputs are used by the kernel on GPU.
+// Unfortnately, the same decison needs to be made
+// in multiple places, including:
+// - slice_inst::update_shape_info_tensor
+// - slice_impl::get_arguments
+// - slice_impl::create
+// This class was created to encapsulate that logic in single place.
+// NOTE: the placement of this class is the 'lesser evil'. Normally such logic
+// should be a part of codegen/jitter, which should output some struct with information
+// about which data is needed by the kernel, how it should be provided, bindings, etc.
+// Currently it is scattered in mutiple places, where basically similar logic has to be applied.
+// NOTE: This class implicietly depends on logic inside SliceKernelRef and the kernel
+// itself. If you make any changes of how params are provided to kernel,
+// likely you will needed to update this one too.
+class SliceKernelRefNeededInputs {
+public:
+    enum InputIndices {
+        kData,
+        kStart,
+        kEnd,
+        kStep,
+        kAxes,
+        kInputsNum
+    };
+
+    // Creates instance of SliceKernelRefNeededInputs.
+    static SliceKernelRefNeededInputs Create(const slice_node& node);
+
+    // Retruns needed indexes in runtime.
+    const std::vector<size_t>& GetNeededInputIndexes() const;
+
+    // Returns true if given input is needed in runtime.
+    bool IsInputNeededInRuntime(InputIndices type) const;
+
+private:
+    std::vector<size_t> neededIndexes;
+};
+
 template <>
 class typed_primitive_inst<slice> : public typed_primitive_inst_base<slice> {
     using parent = typed_primitive_inst_base<slice>;
@@ -22,8 +61,29 @@ class typed_primitive_inst<slice> : public typed_primitive_inst_base<slice> {
     static std::string to_string(slice_node const& node);
 
     typed_primitive_inst(network& network, slice_node const& desc);
+    void update_shape_info_tensor(const kernel_impl_params& params) override;
 };
 
 using slice_inst = typed_primitive_inst<slice>;
 
+///////////////////////////////////////////////////////////////////
+//
+// INLINES:
+//
+///////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////
+inline const std::vector<size_t>& SliceKernelRefNeededInputs::GetNeededInputIndexes() const {
+    return neededIndexes;
+}
+
+///////////////////////////////////////////////////////////////////
+inline bool SliceKernelRefNeededInputs::IsInputNeededInRuntime(InputIndices type) const {
+    for (auto idx : neededIndexes) {
+        if (idx == type)
+            return true;
+    }
+    return false;
+}
+
 } // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/slice.cpp b/src/plugins/intel_gpu/src/graph/slice.cpp
index 4ac47b63c2e66f..84e57ea10ca0d0 100644
--- a/src/plugins/intel_gpu/src/graph/slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/slice.cpp
@@ -10,6 +10,30 @@
 #include <json_object.h>
 
 namespace cldnn {
+
+SliceKernelRefNeededInputs SliceKernelRefNeededInputs::Create(const slice_node& node) {
+    SliceKernelRefNeededInputs inputs;
+
+    const auto& node_inputs = node.get_dependencies();
+
+    const bool axes_in_runtime =
+        ((node_inputs.size() == InputIndices::kInputsNum) && !node_inputs[InputIndices::kAxes].first->is_constant());
+    const bool start_in_runtime = !node_inputs[InputIndices::kStart].first->is_constant();
+    const bool step_in_runtime = !node_inputs[InputIndices::kStep].first->is_constant();
+
+    inputs.neededIndexes.push_back(InputIndices::kData);
+    if (start_in_runtime)
+        inputs.neededIndexes.push_back(InputIndices::kStart);
+    if (step_in_runtime)
+        inputs.neededIndexes.push_back(InputIndices::kStep);
+    if (axes_in_runtime)
+        inputs.neededIndexes.push_back(InputIndices::kAxes);
+
+    // NOTE: stop is never needed as it is passed implicitely via output shape.
+
+    return inputs;
+}
+
 GPU_DEFINE_PRIMITIVE_TYPE_ID(slice)
 
 slice_inst::typed_primitive_inst(network& network, slice_node const& node)
@@ -19,18 +43,33 @@ layout slice_inst::calc_output_layout(slice_node const& node, kernel_impl_params
     return calc_output_layouts<ov::PartialShape>(node, impl_param)[0];
 }
 
-template<typename ShapeType>
+template <typename ShapeType>
 inline std::vector<layout> slice_inst::calc_output_layouts(const slice_node&, const kernel_impl_params& impl_param) {
     std::vector<ShapeType> input_shapes{impl_param.input_layouts[0].get<ShapeType>()};
     std::unordered_map<size_t, ov::Tensor> const_data;
     for (std::size_t i = 1; i < impl_param.input_layouts.size(); i++) {
-        const auto shape_len = shape_size(impl_param.input_layouts[i].get<ShapeType>().to_shape());
-        const ov::PartialShape input_shape{static_cast<ov::Dimension::value_type>(shape_len)};
+        // NOTE: This code effectively makes a reshape operation on tensors start,
+        // stop, step and axes. The specification of Slice operator clearly says
+        // that those tensors are 1D tensors - and this is what is expected
+        // in shape_infer(). However, people in tests and other places,
+        // put 4D tensors instead of 1D(e.g. [4,1,1,1] instead of [4]).
+        // At the time of writing this comment - the hack for such situation
+        // was already there, so adding an ASSERT will effectively make
+        // some tests and graph transformations fail.
+        // There should be some kind of warning to the user about it, but AFAIK
+        // we don't have warning logs that could be enabled/disabled without
+        // affecting performance...
+        ov::PartialShape input_shape = ov::PartialShape::dynamic(1);
+        if (impl_param.memory_deps.find(i) != impl_param.memory_deps.end()) {
+            auto gpu_mem = impl_param.memory_deps.at(i);
+            input_shape = {static_cast<ov::Dimension::value_type>(gpu_mem->count())};
+            cldnn::mem_lock<uint8_t, mem_lock_type::read> gpu_mem_lock(gpu_mem, impl_param.get_stream());
+            const_data.emplace(
+                i,
+                make_tensor(layout{input_shape, gpu_mem->get_layout().data_type, gpu_mem->get_layout().format},
+                            gpu_mem_lock.data()));
+        }
         input_shapes.push_back(input_shape);
-        auto gpu_mem = impl_param.memory_deps.at(i);
-        cldnn::mem_lock<uint8_t, mem_lock_type::read> gpu_mem_lock(gpu_mem, impl_param.get_stream());
-        const_data.emplace(i, make_tensor(layout {input_shape, gpu_mem->get_layout().data_type, gpu_mem->get_layout().format },
-            gpu_mem_lock.data()));
     }
     ov::op::v8::Slice op;
     auto output_shapes = shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data));
@@ -42,7 +81,6 @@ inline std::vector<layout> slice_inst::calc_output_layouts(const slice_node&, co
     return output_layouts;
 }
 
-
 std::string slice_inst::to_string(slice_node const& node) {
     auto node_info = node.desc_to_json();
     json_composite slice_info;
@@ -57,4 +95,24 @@ std::string slice_inst::to_string(slice_node const& node) {
     return primitive_description.str();
 }
 
+void slice_inst::update_shape_info_tensor(const kernel_impl_params& params) {
+    mem_lock<int32_t> lock(_shape_info_memory, _network.get_stream());
+    auto shape_info_ptr = lock.data();
+    size_t offset = 0;
+    const SliceKernelRefNeededInputs inputs = SliceKernelRefNeededInputs::Create(*_node);
+
+    for (auto idx : inputs.GetNeededInputIndexes()) {
+        GPU_DEBUG_TRACE_DETAIL << id() << " : update shape_info for input[" << idx << "]" << std::endl;
+        const auto& node_in_lay = _node->get_input_layout(idx);
+        const auto& runtime_in_lay = params.input_layouts[idx];
+        fill_shape_info_data(runtime_in_lay, node_in_lay, shape_info_ptr, offset);
+    }
+    for (size_t i = 0; i < _node->get_output_layouts().size(); i++) {
+        GPU_DEBUG_TRACE_DETAIL << id() << " : update shape_info for output[" << i << "]" << std::endl;
+        const auto& node_out_lay = _node->get_output_layout(i);
+        const auto& runtime_out_lay = params.output_layouts[i];
+        fill_shape_info_data(runtime_out_lay, node_out_lay, shape_info_ptr, offset);
+    }
+}
+
 } // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl
index f90254ce89f0e4..a67aa925060c73 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl
@@ -4,33 +4,88 @@
 
 #include "include/batch_headers/fetch_data.cl"
 
-KERNEL(slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
+#define BRING_INTO_RANGE(VAL, MAX) \
+    clamp((long)VAL < 0l ? (long)VAL + (long)MAX : (long)VAL, 0l, (long)MAX-1l);
+
+#if INPUT0_DIMS < 5
+#define LOAD_BUFFER(in_prefix, out_name)  \
+    long out_name[INPUT0_DIMS];           \
+    out_name[0] = in_prefix##_VAL0;       \
+    out_name[1] = in_prefix##_VAL1;       \
+    out_name[2] = in_prefix##_VAL2;       \
+    out_name[3] = in_prefix##_VAL3;
+#else
+#define LOAD_BUFFER(in_prefix, out_name)  \
+    long out_name[INPUT0_DIMS];           \
+    out_name[0] = in_prefix##_VAL0;       \
+    out_name[1] = in_prefix##_VAL1;       \
+    out_name[2] = in_prefix##_VAL2;       \
+    out_name[3] = in_prefix##_VAL3;       \
+    out_name[4] = in_prefix##_VAL4;
+#endif
+
+KERNEL(slice_ref)(OPTIONAL_SHAPE_INFO_ARG 
+                  const __global INPUT0_TYPE* restrict input,
+                  START_BUFFER
+                  STEP_BUFFER
+                  AXES_BUFFER
+                  __global OUTPUT_TYPE* restrict output)
 {
-    const uint batch = get_global_id(0);
-    const uint feature = get_global_id(1);
+    LOAD_BUFFER(START, start_buff);
+    LOAD_BUFFER(STEP, step_buff);
+    LOAD_BUFFER(AXES, axes_buff);
+
+    long slice_step[INPUT0_DIMS];
+    long slice_start[INPUT0_DIMS];
+
+    unroll_for(int i = 0; i < INPUT0_DIMS; ++i) {
+        slice_step[i] = 1;
+        slice_start[i] = 0;
+    }
+
+    unroll_for(int i = 0; i < AXES_BUFFER_SIZE; ++i) {
+        const long axis = axes_buff[i];
+        slice_step[axis] = step_buff[i];
+        slice_start[axis] = start_buff[i];
+    }
+
+    const long output_dim0 = get_global_id(0);
+    const long output_dim1 = get_global_id(1);
+    const long slice_begin_dim0 = BRING_INTO_RANGE(slice_start[0], INPUT0_BATCH_NUM);
+    const long slice_begin_dim1 = BRING_INTO_RANGE(slice_start[1], INPUT0_FEATURE_NUM);
+
 #if INPUT0_DIMS <= 4
-    const uint xy = get_global_id(2);
-    const uint y = xy / OUTPUT_SIZE_X;
-    const uint x = xy % OUTPUT_SIZE_X;
-    const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x);
-    const uint input_index = INPUT0_GET_INDEX(
-        SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
-        SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
-        SLICE_BEGIN_Y + y * SLICE_STEP_Y,
-        SLICE_BEGIN_X + x * SLICE_STEP_X);
+    const long slice_begin_dim2 = BRING_INTO_RANGE(slice_start[2], INPUT0_SIZE_Y);
+    const long slice_begin_dim3 = BRING_INTO_RANGE(slice_start[3], INPUT0_SIZE_X);
+    const long output_dim23 = get_global_id(2);
+    const long output_dim2 = output_dim23 / OUTPUT_SIZE_X;
+    const long output_dim3 = output_dim23 % OUTPUT_SIZE_X;
+    const long output_index = OUTPUT_GET_INDEX(output_dim0, output_dim1, output_dim2, output_dim3);
+    const long input_index = INPUT0_GET_INDEX(
+        slice_begin_dim0 + output_dim0 * slice_step[0],
+        slice_begin_dim1 + output_dim1 * slice_step[1],
+        slice_begin_dim2 + output_dim2 * slice_step[2],
+        slice_begin_dim3 + output_dim3 * slice_step[3]);
 #elif INPUT0_DIMS == 5
-    const uint xyz = get_global_id(2);
-    const uint yx = xyz % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
-    const uint z = xyz / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
-    const uint y = yx / OUTPUT_SIZE_X;
-    const uint x = yx % OUTPUT_SIZE_X;
-    const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x);
-    const uint input_index = INPUT0_GET_INDEX(
-        SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
-        SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
-        SLICE_BEGIN_Z + z * SLICE_STEP_Z,
-        SLICE_BEGIN_Y + y * SLICE_STEP_Y,
-        SLICE_BEGIN_X + x * SLICE_STEP_X);
+    const long slice_begin_dim2 = BRING_INTO_RANGE(slice_start[2], INPUT0_SIZE_Z);
+    const long slice_begin_dim3 = BRING_INTO_RANGE(slice_start[3], INPUT0_SIZE_Y);
+    const long slice_begin_dim4 = BRING_INTO_RANGE(slice_start[4], INPUT0_SIZE_X);
+    const long output_dim234 = get_global_id(2);
+    const long output_dim34 = output_dim234 % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const long output_dim2 = output_dim234 / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const long output_dim3 = output_dim34 / OUTPUT_SIZE_X;
+    const long output_dim4 = output_dim34 % OUTPUT_SIZE_X;
+    const long output_index = OUTPUT_GET_INDEX(output_dim0, output_dim1, output_dim2, output_dim3, output_dim4);
+    const long input_index = INPUT0_GET_INDEX(
+        slice_begin_dim0 + output_dim0 * slice_step[0],
+        slice_begin_dim1 + output_dim1 * slice_step[1],
+        slice_begin_dim2 + output_dim2 * slice_step[2],
+        slice_begin_dim3 + output_dim3 * slice_step[3],
+        slice_begin_dim4 + output_dim4 * slice_step[4]);
 #endif
+
     output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
 }
+
+#undef LOAD_BUFFER;
+#undef BRING_INTO_RANGE;
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp
index 1952afa5378bb3..312cd0fa2f7c2c 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp
@@ -7,23 +7,49 @@
 #include <vector>
 
 namespace {
-
-void addJitConstantsForAttribute(kernel_selector::JitConstants &jit,
-        const std::string &name, const std::vector<std::int32_t> &attribute) {
+static constexpr size_t MAX_SUPPORTED_DIM = 5;
+static constexpr char JIT_AXES_BUFF_SIZE_NAME[] = "AXES_BUFFER_SIZE";
+
+// Generates macros:
+// - name_BUFFER
+// - name_VAL0, name_VAL1 ...
+void addJitConstantsForParam(kernel_selector::JitConstants& jit,
+                             const std::string& name,
+                             const std::vector<std::int64_t>& compile_time_param,
+                             kernel_selector::Datatype type,
+                             const std::function<std::string(std::string, size_t)>& dynamic_access_decorator) {
     using namespace kernel_selector;
-    jit.AddConstant(MakeJitConstant(name + "_BATCH", attribute[0]));
-    jit.AddConstant(MakeJitConstant(name + "_FEATURE", attribute[1]));
-    if (attribute.size() == 5) {  // BFZYX
-        jit.AddConstant(MakeJitConstant(name + "_Z", attribute[2]));
-        jit.AddConstant(MakeJitConstant(name + "_Y", attribute[3]));
-        jit.AddConstant(MakeJitConstant(name + "_X", attribute[4]));
-    } else {  // BFYX
-        jit.AddConstant(MakeJitConstant(name + "_Y", attribute[2]));
-        jit.AddConstant(MakeJitConstant(name + "_X", attribute[3]));
+    const std::string BUFF_CONST_NAME = name + "_BUFFER";
+    const std::string BUFF_PTR_NAME = name + "_buffer_ptr";
+    const auto jit_name_decorator = [](std::string name, size_t i) {
+        return name + "_VAL" + std::to_string(i);
+    };
+
+    if (compile_time_param.empty()) {
+        // Dynamic param:
+        const std::string type_str = toCLType(type);
+        jit.AddConstant(
+            MakeJitConstant(BUFF_CONST_NAME, "__global const " + type_str + "* restrict " + BUFF_PTR_NAME + ","));
+
+        for (size_t i = 0; i < MAX_SUPPORTED_DIM; ++i) {
+            const std::string i_str = std::to_string(i);
+            const std::string jit_name = jit_name_decorator(name, i);
+            const std::string access_str = dynamic_access_decorator(BUFF_PTR_NAME, i);
+            jit.AddConstant(
+                MakeJitConstant(jit_name, i_str + " < " + JIT_AXES_BUFF_SIZE_NAME + " ? (" + access_str + ") : -1"));
+        }
+    } else {
+        // Static param:
+        jit.AddConstant(MakeJitConstant(BUFF_CONST_NAME, ""));
+        for (size_t i = 0; i < MAX_SUPPORTED_DIM; ++i) {
+            const std::string jit_name = jit_name_decorator(name, i);
+            const int64_t val = i < compile_time_param.size() ? compile_time_param[i] : -1;
+            jit.AddConstant(MakeJitConstant(jit_name, val));
+        }
     }
 }
 
-} // anonymous namespace
+}  // anonymous namespace
 
 namespace kernel_selector {
 
@@ -39,8 +65,11 @@ KernelsData SliceKernelRef::GetKernelsData(const Params &params) const {
     auto slice_specific_jit = GetJitConstants(new_params);
     auto jit = CreateJit(kernelName, slice_specific_jit, entry_point);
 
-    FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo,
-            kernelName, jit, entry_point);
+    GetUpdateDispatchDataFunc(kernel_data);
+
+    FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo, kernelName, jit, entry_point,
+                     "", false, false, static_cast<int>(new_params.inputs.size()),
+                     0, 1, new_params.has_dynamic_tensors());
 
     return {kernel_data};
 }
@@ -68,6 +97,8 @@ ParamsKey SliceKernelRef::GetSupportedKey() const {
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
+    k.EnableDynamicShapesSupport();
+    k.EnableDifferentTypes();
     return k;
 }
 
@@ -80,17 +111,37 @@ bool SliceKernelRef::Validate(const Params &p) const {
     if (params.inputs.empty())
         return false;
 
-    if (params.outputs[0].Dimentions() > 5 || params.inputs[0].Dimentions() > 5)
+    if (params.outputs[0].Dimentions() > MAX_SUPPORTED_DIM || params.inputs[0].Dimentions() > MAX_SUPPORTED_DIM)
         return false;
 
     return true;
 }
 
-JitConstants SliceKernelRef::GetJitConstants(const slice_params &params) const {
+JitConstants SliceKernelRef::GetJitConstants(const slice_params& params) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
-    addJitConstantsForAttribute(jit, "SLICE_BEGIN", params.start);
-    addJitConstantsForAttribute(jit, "SLICE_END", params.end);
-    addJitConstantsForAttribute(jit, "SLICE_STEP", params.step);
+
+    // Define axes size as constant:
+    if (params.compile_time_axes.empty()) {
+        kernel_selector::DimensionAccessHelper dims(params.inputs.back());
+        jit.AddConstant(MakeJitConstant(JIT_AXES_BUFF_SIZE_NAME,
+                                        toVectorMulString({dims.b(), dims.f(), dims.x(), dims.y(), dims.z()})));
+    } else {
+        jit.AddConstant(MakeJitConstant(JIT_AXES_BUFF_SIZE_NAME, params.compile_time_axes.size()));
+    }
+
+    // Prepare axes, start and step params:
+    const auto axes_decorator = [](std::string name, size_t i) {
+        const std::string i_str = std::to_string(i);
+        return name + "[" + i_str + "] < 0 ? INPUT0_DIMS + " + name + "[" + i_str + "] : " + name + "[" + i_str + "]";
+    };
+    addJitConstantsForParam(jit, "AXES", params.compile_time_axes, params.axes_data_type, axes_decorator);
+
+    const auto default_decorator = [](std::string name, size_t i) {
+        return name + "[" + std::to_string(i) + "]";
+    };
+    addJitConstantsForParam(jit, "START", params.compile_time_start, params.start_data_type, default_decorator);
+    addJitConstantsForParam(jit, "STEP", params.compile_time_step, params.step_data_type, default_decorator);
+
     return jit;
 }
 
@@ -105,4 +156,15 @@ CommonDispatchData SliceKernelRef::SetDefault(const slice_params &params) const
     return dispatchData;
 }
 
+void SliceKernelRef::GetUpdateDispatchDataFunc(KernelData& kd) const {
+    kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
+        const auto& prim_params = static_cast<const slice_params&>(params);
+        auto dispatchData = SetDefault(prim_params);
+        OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
+        kd.kernels[0].params.workGroups.global = dispatchData.gws;
+        kd.kernels[0].params.workGroups.local = dispatchData.lws;
+        kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
+    };
+}
+
 } // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h
index 67449aed56b22b..b1d331cc94921e 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.h
@@ -12,9 +12,12 @@ namespace kernel_selector {
 struct slice_params: public base_params {
     slice_params() : base_params(KernelType::SLICE) {}
 
-    std::vector<std::int32_t> start;
-    std::vector<std::int32_t> end;
-    std::vector<std::int32_t> step;
+    std::vector<std::int64_t> compile_time_start;
+    std::vector<std::int64_t> compile_time_step;
+    std::vector<std::int64_t> compile_time_axes;
+    kernel_selector::Datatype start_data_type;
+    kernel_selector::Datatype step_data_type;
+    kernel_selector::Datatype axes_data_type;
 };
 
 class SliceKernelRef: public KernelBaseOpenCL {
@@ -30,6 +33,7 @@ class SliceKernelRef: public KernelBaseOpenCL {
 private:
     JitConstants GetJitConstants(const slice_params &params) const;
     CommonDispatchData SetDefault(const slice_params &params) const;
+    void GetUpdateDispatchDataFunc(KernelData& kd) const override;
 };
 
 } // namespace kernel_selector
diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp
index 19d386e8a8351b..1f0381ef95100e 100644
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/slice.cpp
@@ -46,5 +46,20 @@ INSTANTIATE_TEST_SUITE_P(
             ::testing::Values(ov::test::utils::DEVICE_GPU)),
         Slice8LayerTest::getTestCaseName);
 
+std::vector<Slice8SpecificParams> dynamic_params = {
+        Slice8SpecificParams{ {{{ -1 }, {{ 8 }, { 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
+        Slice8SpecificParams{ {{{ ov::Dimension(2, 20) }, {{ 5 }, { 15 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
+        Slice8SpecificParams{ {{{ -1, -1, -1 }, {{ 20, 10, 5 }, {5, 10, 20}}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
+        Slice8SpecificParams{ {{{ -1, -1, -1, -1 }, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
+        Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(1, 7), ov::Dimension(1, 35), ov::Dimension(1, 35)},
+            {{ 1, 5, 32, 32 }, { 2, 5, 32, 20 }, { 2, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_GPU_dynamic, Slice8LayerTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(dynamic_params),
+            ::testing::ValuesIn(types),
+            ::testing::Values(ov::test::utils::DEVICE_GPU)),
+        Slice8LayerTest::getTestCaseName);
 
 }  // namespace
\ No newline at end of file
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp
index c33a287d2e4ddb..3cff74940daf57 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/slice_gpu_test.cpp
@@ -46,7 +46,14 @@ struct SliceTestParams {
     memory::ptr start;
     memory::ptr stop;
     memory::ptr step;
+    memory::ptr axes;
     memory::ptr wanted_output;
+    bool is_input_dynamic = false;
+    bool is_start_dynamic = false;
+    bool is_stop_dynamic = false;
+    bool is_step_dynamic = false;
+    bool is_axes_dynamic = false;
+    bool is_caching_test = false;
 };
 
 template<typename T>
@@ -54,8 +61,7 @@ class SliceTest : public ::testing::Test {
 public:
     // Runs all test cases for given params.
     void RunAllTestCasesForParams(const SliceTestParams& params) {
-        RunTestCase(params, false);
-        RunTestCase(params, true);
+        RunTestCase(params);
     }
 
     // Allocates tensoer with given shape and data.
@@ -69,25 +75,83 @@ class SliceTest : public ::testing::Test {
         return tensor;
     }
 
+    template<typename TypeParam>
+    void FillWithBasicBfyxPositiveStepAxesLessThanRankData(SliceTestParams& params) {
+        const ov::PartialShape input_shape{ 1, 2, 12, 100 };
+        params.input = this->template AllocateTensor<TypeParam>(
+            input_shape, format::bfyx, helpers::GenInput<TypeParam>(input_shape));
+        params.start = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 1, 1, 0 });
+        params.stop = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 2, 120, 5 });
+        params.step = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 1, 10, 1 });
+        params.axes = this->template AllocateTensor<int64_t>(
+            ov::PartialShape{ 3 }, format::bfyx, { 1, 3, 2 });
+        params.wanted_output = this->template AllocateTensor<TypeParam>(
+            ov::PartialShape{ 1, 1, 5, 10 }, format::bfyx, { 
+                1201, 1211, 1221, 1231, 1241, 1251, 1261, 1271, 1281, 1291,
+                1301, 1311, 1321, 1331, 1341, 1351, 1361, 1371, 1381, 1391,
+                1401, 1411, 1421, 1431, 1441, 1451, 1461, 1471, 1481, 1491,
+                1501, 1511, 1521, 1531, 1541, 1551, 1561, 1571, 1581, 1591,
+                1601, 1611, 1621, 1631, 1641, 1651, 1661, 1671, 1681, 1691,
+            });
+    }
+
 private:
+    void SetParameterInput(const std::string& name, topology& topology, const memory::ptr& data_ptr, bool is_dynamic ) {
+        if(is_dynamic) {
+            auto dynamic_shape = data_ptr->get_layout();
+            dynamic_shape.set_partial_shape(ov::PartialShape::dynamic(dynamic_shape.get_rank()));
+            topology.add(input_layout(name, dynamic_shape));
+        } else {
+            topology.add(data(name, data_ptr));
+        }
+    }
+
     // Runs single tests case for given params.
-    void RunTestCase(const SliceTestParams& params, bool is_caching_test) {
+    void RunTestCase(const SliceTestParams& params) {
+
+        auto dynamic_input = params.input->get_layout();
+        dynamic_input.set_partial_shape(ov::PartialShape::dynamic(dynamic_input.get_rank()));
         topology topology;
-        topology.add(input_layout("input", params.input->get_layout()));
-        topology.add(data("start", params.start));
-        topology.add(data("stop", params.stop));
-        topology.add(data("step", params.step));
+        topology.add(input_layout("input", params.is_input_dynamic ? dynamic_input : params.input->get_layout()));
+
+        SetParameterInput("start", topology, params.start, params.is_start_dynamic);
+        SetParameterInput("stop", topology, params.stop, params.is_stop_dynamic);
+        SetParameterInput("step", topology, params.step, params.is_step_dynamic);
+
+        if(params.axes) {
+            SetParameterInput("axes", topology, params.axes, params.is_axes_dynamic);
+        }
+
         std::vector<input_info> inputs{input_info("input"),
                                        input_info("start"),
                                        input_info("stop"),
                                        input_info("step")};
+        if (params.axes) {
+            inputs.push_back(input_info("axes"));
+        }
         topology.add(slice("slice", inputs));
 
         ExecutionConfig config = get_test_default_config(engine_);
+        config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+
         cldnn::network::ptr network =
-            get_network(engine_, topology, config, get_test_stream_ptr(), is_caching_test);
+            get_network(engine_, topology, config, get_test_stream_ptr(), params.is_caching_test);
 
         network->set_input_data("input", params.input);
+
+        if (params.is_start_dynamic)
+            network->set_input_data("start", params.start);
+        if (params.is_stop_dynamic)
+            network->set_input_data("stop", params.stop);
+        if (params.is_step_dynamic)
+            network->set_input_data("step", params.step);
+        if(params.axes && params.is_axes_dynamic) {
+             network->set_input_data("axes", params.axes);
+        }
+
         auto outputs = network->execute();
 
         ASSERT_EQ(outputs.size(), size_t(1));
@@ -113,24 +177,105 @@ TYPED_TEST_SUITE(SliceTest, DataTypes);
 
 TYPED_TEST(SliceTest, bfyx_positive_step) {
     SliceTestParams params;
-    const ov::PartialShape input_shape{ 1, 2, 12, 100 };
-    params.input = this->template AllocateTensor<TypeParam>(
-        input_shape, format::bfyx, helpers::GenInput<TypeParam>(input_shape));
-    params.start = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 0, 1, 0, 1 });
-    params.stop = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 2, 5, 100 });
-    params.step = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 1, 1, 10 });
-    params.wanted_output = this->template AllocateTensor<TypeParam>(
-        ov::PartialShape{ 1, 1, 5, 10 }, format::bfyx, { 
-            1201, 1211, 1221, 1231, 1241, 1251, 1261, 1271, 1281, 1291,
-            1301, 1311, 1321, 1331, 1341, 1351, 1361, 1371, 1381, 1391,
-            1401, 1411, 1421, 1431, 1441, 1451, 1461, 1471, 1481, 1491,
-            1501, 1511, 1521, 1531, 1541, 1551, 1561, 1571, 1581, 1591,
-            1601, 1611, 1621, 1631, 1641, 1651, 1661, 1671, 1681, 1691,
-        });
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    this->RunAllTestCasesForParams(params);
+}
 
+TYPED_TEST(SliceTest, bfyx_positive_step_all_static_caching) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_caching_test = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, bfyx_positive_step_all_dynamic_caching) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_start_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    params.is_axes_dynamic = true;
+    params.is_caching_test = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, stop_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_step_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, stop_step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_stop_step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, start_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_start_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_start_stop_step_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_start_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, axes_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_axes_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_axes_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_axes_dynamic = true;
+    this->RunAllTestCasesForParams(params);
+}
+
+TYPED_TEST(SliceTest, input_step_axes_dynamic) {
+    SliceTestParams params;
+    this->template FillWithBasicBfyxPositiveStepAxesLessThanRankData<TypeParam>(params);
+    params.is_input_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_axes_dynamic = true;
     this->RunAllTestCasesForParams(params);
 }
 
@@ -140,11 +285,11 @@ TYPED_TEST(SliceTest, bfyx_negative_step) {
     params.input = this->template AllocateTensor<TypeParam>(
         input_shape, format::bfyx, helpers::GenInput<TypeParam>(input_shape));
     params.start = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 0, 1, 5, 90 });
+        ov::PartialShape{ 4 }, format::bfyx, { 0, 1, 5, 90 });
     params.stop = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, 0, 0, 10 });
+        ov::PartialShape{ 4 }, format::bfyx, { 1, 0, 0, 10 });
     params.step = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 4, 1, 1, 1 }, format::bfyx, { 1, -1, -1, -10 });
+        ov::PartialShape{ 4 }, format::bfyx, { 1, -1, -1, -10 });
     params.wanted_output = this->template AllocateTensor<TypeParam>(
         ov::PartialShape{ 1, 1, 5, 8 }, format::bfyx, { 
             1789, 1779, 1769, 1759, 1749, 1739, 1729, 1719,
@@ -163,17 +308,26 @@ TYPED_TEST(SliceTest, bfzyx) {
     params.input = this->template AllocateTensor<TypeParam>(
         input_shape, format::bfzyx, helpers::GenInput<TypeParam>(input_shape));
     params.start = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 0, 0, 0, 0, 0 });
+        ov::PartialShape{ 5 }, format::bfzyx, { 0, 0, 0, 0, 0 });
     params.stop = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 1, 2, 2, 2, 2 });
+        ov::PartialShape{ 5 }, format::bfzyx, { 1, 2, 2, 2, 2 });
     params.step = this->template AllocateTensor<int64_t>(
-        ov::PartialShape{ 5, 1, 1, 1 }, format::bfzyx, { 1, 1, 1, 1, 1 });
+        ov::PartialShape{ 5 }, format::bfzyx, { 1, 1, 1, 1, 1 });
     params.wanted_output = this->template AllocateTensor<TypeParam>(
         ov::PartialShape{ 1, 2, 2, 2, 2 }, format::bfzyx, { 
             0,   1,   5,   6,   60,  61,  65,  66,
             600, 601, 605, 606, 660, 661, 665, 666
         });
 
+    params.is_caching_test = true;
+
+    this->RunAllTestCasesForParams(params);
+    params.is_input_dynamic = true;
+    params.is_start_dynamic = true;
+    params.is_step_dynamic = true;
+    params.is_stop_dynamic = true;
+    params.is_axes_dynamic = true;
+
     this->RunAllTestCasesForParams(params);
 }
 

From 50e503892755fcb00d1e48b6be82dd863fa718f0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Mar 2024 08:55:51 +0000
Subject: [PATCH 16/28] Bump awalsh128/cache-apt-pkgs-action from 1.4.1 to
 1.4.2 (#23225)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[awalsh128/cache-apt-pkgs-action](https://github.com/awalsh128/cache-apt-pkgs-action)
from 1.4.1 to 1.4.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/awalsh128/cache-apt-pkgs-action/releases">awalsh128/cache-apt-pkgs-action's
releases</a>.</em></p>
<blockquote>
<h2>v1.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Add arm64 binary by <a
href="https://github.com/koppor"><code>@​koppor</code></a> in <a
href="https://redirect.github.com/awalsh128/cache-apt-pkgs-action/pull/127">awalsh128/cache-apt-pkgs-action#127</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/koppor"><code>@​koppor</code></a> made
their first contribution in <a
href="https://redirect.github.com/awalsh128/cache-apt-pkgs-action/pull/127">awalsh128/cache-apt-pkgs-action#127</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1...v1.4.2">https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1...v1.4.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/awalsh128/cache-apt-pkgs-action/commit/a6c3917cc929dd0345bfb2d3feaf9101823370ad"><code>a6c3917</code></a>
Add arm64 binary (<a
href="https://redirect.github.com/awalsh128/cache-apt-pkgs-action/issues/127">#127</a>)</li>
<li>See full diff in <a
href="https://github.com/awalsh128/cache-apt-pkgs-action/compare/v1.4.1...v1.4.2">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=awalsh128/cache-apt-pkgs-action&package-manager=github_actions&previous-version=1.4.1&new-version=1.4.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_doc.yml     | 2 +-
 .github/workflows/code_snippets.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml
index 5fc223d9746bdf..fe52646914a40b 100644
--- a/.github/workflows/build_doc.yml
+++ b/.github/workflows/build_doc.yml
@@ -21,7 +21,7 @@ jobs:
           lfs: 'true'
 
       - name: Install apt-get dependencies
-        uses: awalsh128/cache-apt-pkgs-action@v1.4.1
+        uses: awalsh128/cache-apt-pkgs-action@v1.4.2
         with:
           packages: graphviz texlive liblua5.2-0 libclang1-9 libclang-cpp9
           version: 3.0
diff --git a/.github/workflows/code_snippets.yml b/.github/workflows/code_snippets.yml
index 5181e26f378da0..856f85afa29961 100644
--- a/.github/workflows/code_snippets.yml
+++ b/.github/workflows/code_snippets.yml
@@ -30,7 +30,7 @@ jobs:
           submodules: 'true'
 
       - name: Install OpenCL
-        uses: awalsh128/cache-apt-pkgs-action@v1.4.1
+        uses: awalsh128/cache-apt-pkgs-action@v1.4.2
         if: runner.os == 'Linux'
         with:
           packages: ocl-icd-opencl-dev opencl-headers

From caf9148bc8c3530cd8b6d4711f79f1c329644815 Mon Sep 17 00:00:00 2001
From: Pawel Raasz <pawel.raasz@intel.com>
Date: Mon, 4 Mar 2024 10:50:54 +0100
Subject: [PATCH 17/28] [TEMPLATE] GridSample in template plugin supports more
 precisions (#23052)

### Details:
 - Add more precisions support for `GridSample` in Template plugin.

### Tickets:
 - [CVS-133057](https://jira.devtools.intel.com/browse/CVS-133057)
---
 .../openvino/reference/grid_sample.hpp        |   4 +-
 .../template/backend/ops/grid_sample.cpp      |  60 ++-
 .../functional/op_reference/grid_sample.cpp   | 478 ++++++++++--------
 3 files changed, 308 insertions(+), 234 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/grid_sample.hpp b/src/core/reference/include/openvino/reference/grid_sample.hpp
index 6c765881e536db..7daffa1011f373 100644
--- a/src/core/reference/include/openvino/reference/grid_sample.hpp
+++ b/src/core/reference/include/openvino/reference/grid_sample.hpp
@@ -141,8 +141,8 @@ DATA_ET bilinear(const DATA_ET* data,
     const auto x_d = denormalize(x_n, data_shape[3]);
     const auto y_topleft = std::floor(y_d);
     const auto x_topleft = std::floor(x_d);
-    const auto dy = y_d - y_topleft;
-    const auto dx = x_d - x_topleft;
+    const auto dy = static_cast<DATA_ET>(y_d - y_topleft);
+    const auto dx = static_cast<DATA_ET>(x_d - x_topleft);
     const auto v00 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft));
     const auto v01 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft), static_cast<long>(x_topleft + 1));
     const auto v10 = get_padded(data, data_shape, n, c, static_cast<long>(y_topleft + 1), static_cast<long>(x_topleft));
diff --git a/src/plugins/template/backend/ops/grid_sample.cpp b/src/plugins/template/backend/ops/grid_sample.cpp
index f47dba333f3c4e..1ea86f9c0ec03b 100644
--- a/src/plugins/template/backend/ops/grid_sample.cpp
+++ b/src/plugins/template/backend/ops/grid_sample.cpp
@@ -6,18 +6,48 @@
 
 #include "evaluate_node.hpp"
 
-template <ov::element::Type_t T>
+template <ov::element::Type_t DATA_ET>
 bool evaluate(const std::shared_ptr<ov::op::v9::GridSample>& op,
               ov::TensorVector& outputs,
               const ov::TensorVector& inputs) {
-    using ET = typename ov::element_type_traits<T>::value_type;
+    using DT = ov::fundamental_type_for<DATA_ET>;
     const auto& attributes = op->get_attributes();
-    ov::element::Type grid_et = op->get_input_element_type(1);
-    switch (grid_et) {
+
+    switch (op->get_input_element_type(1)) {
+    case ov::element::f16:
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::f16>>(),
+                                   inputs[0].get_shape(),
+                                   inputs[1].get_shape(),
+                                   attributes.align_corners,
+                                   attributes.mode,
+                                   attributes.padding_mode);
+        break;
+    case ov::element::bf16:
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::bf16>>(),
+                                   inputs[0].get_shape(),
+                                   inputs[1].get_shape(),
+                                   attributes.align_corners,
+                                   attributes.mode,
+                                   attributes.padding_mode);
+        break;
     case ov::element::f32:
-        ov::reference::grid_sample(outputs[0].data<ET>(),
-                                   inputs[0].data<ET>(),
-                                   inputs[1].data<float>(),
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::f32>>(),
+                                   inputs[0].get_shape(),
+                                   inputs[1].get_shape(),
+                                   attributes.align_corners,
+                                   attributes.mode,
+                                   attributes.padding_mode);
+        break;
+    case ov::element::f64:
+        ov::reference::grid_sample(outputs[0].data<DT>(),
+                                   inputs[0].data<const DT>(),
+                                   inputs[1].data<const ov::fundamental_type_for<ov::element::f64>>(),
                                    inputs[0].get_shape(),
                                    inputs[1].get_shape(),
                                    attributes.align_corners,
@@ -34,23 +64,17 @@ template <>
 bool evaluate_node<ov::op::v9::GridSample>(std::shared_ptr<ov::Node> node,
                                            ov::TensorVector& outputs,
                                            const ov::TensorVector& inputs) {
-    auto element_type = node->get_output_element_type(0);
-    if (ov::is_type<ov::op::v1::Select>(node) || ov::is_type<ov::op::util::BinaryElementwiseComparison>(node))
-        element_type = node->get_input_element_type(1);
-
-    switch (element_type) {
+    switch (node->get_output_element_type(0)) {
     case ov::element::boolean:
         return evaluate<ov::element::boolean>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::bf16:
         return evaluate<ov::element::bf16>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::f16:
         return evaluate<ov::element::f16>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::f64:
-        return evaluate<ov::element::f64>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::f32:
         return evaluate<ov::element::f32>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::i4:
-        return evaluate<ov::element::i4>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
+    case ov::element::f64:
+        return evaluate<ov::element::f64>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::i8:
         return evaluate<ov::element::i8>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::i16:
@@ -59,10 +83,6 @@ bool evaluate_node<ov::op::v9::GridSample>(std::shared_ptr<ov::Node> node,
         return evaluate<ov::element::i32>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::i64:
         return evaluate<ov::element::i64>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::u1:
-        return evaluate<ov::element::u1>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
-    case ov::element::u4:
-        return evaluate<ov::element::u4>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::u8:
         return evaluate<ov::element::u8>(ov::as_type_ptr<ov::op::v9::GridSample>(node), outputs, inputs);
     case ov::element::u16:
diff --git a/src/plugins/template/tests/functional/op_reference/grid_sample.cpp b/src/plugins/template/tests/functional/op_reference/grid_sample.cpp
index 1923fe011a9f6e..df4f7e5a34845b 100644
--- a/src/plugins/template/tests/functional/op_reference/grid_sample.cpp
+++ b/src/plugins/template/tests/functional/op_reference/grid_sample.cpp
@@ -65,24 +65,33 @@ constexpr auto GS_ZEROS{op::v9::GridSample::PaddingMode::ZEROS};
 constexpr std::array<op::v9::GridSample::PaddingMode, 3> padding_modes{GS_ZEROS, GS_BORDER, GS_REFLECTION};
 constexpr std::array<bool, 2> align_corners_modes{false, true};
 
+std::string param_types_str(const element::Type& data_et, const element::Type& grid_et) {
+    std::stringstream types;
+    types << "_data_et_" << data_et << "_grid_et_" << grid_et;
+    return types.str();
+}
+
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateNearestParamsOddDimensionsInnerGrids() {
     std::vector<GridSampleParams> params;
 
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
-    reference_tests::Tensor grid_inner{
-        {1, 3, 4, 2},
-        element::f32,
-        std::vector<float>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5,
-                           0.5,  -0.5, 0.5,  0.5, -1., -1.,  -1., 1.,  1.,   -1.,  1.,   1.}};
-    reference_tests::Tensor output{{1, 1, 3, 4},
-                                   element::f32,
-                                   std::vector<float>{8, 8, 8, 8, 2, 12, 4, 14, 1, 11, 5, 15}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+    reference_tests::Tensor grid_inner{{1, 3, 4, 2}, GRID_ET, std::vector<GT>{-0.1, -0.1, -0.1, 0.1,  0.1,  -0.1,
+                                                                              0.1,  0.1,  -0.5, -0.5, -0.5, 0.5,
+                                                                              0.5,  -0.5, 0.5,  0.5,  -1.,  -1.,
+                                                                              -1.,  1.,   1.,   -1.,  1.,   1.}};
+    reference_tests::Tensor output{{1, 1, 3, 4}, DATA_ET, std::vector<DT>{8, 8, 8, 8, 2, 12, 4, 14, 1, 11, 5, 15}};
+
     for (const auto& padding : padding_modes) {
         for (const auto align : align_corners_modes) {
             std::stringstream name;
             name << "nearest_" << padding << (align ? "_align" : "_noalign") << "_odd_dims_inner";
+            name << param_types_str(DATA_ET, GRID_ET);
             params.emplace_back(data_odd_dims,
                                 grid_inner,
                                 op::v9::GridSample::Attributes{align, GS_NEAREST, padding},
@@ -93,78 +102,83 @@ std::vector<GridSampleParams> generateNearestParamsOddDimensionsInnerGrids() {
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateNearestParamsOddDimensionsOuterGrids() {
     std::vector<GridSampleParams> params;
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
     reference_tests::Tensor grid_outer{
         {1, 1, 7, 2},
-        element::f32,
-        std::vector<float>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
+        GRID_ET,
+        std::vector<GT>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
+
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{false, GS_NEAREST, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 0}},
-                        "nearest_zeros_noalign_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{0, 0, 0, 0, 0, 0, 0}},
+                        "nearest_zeros_noalign_odd_dims_outer" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{true, GS_NEAREST, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 0}},
-                        "nearest_zeros_align_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, GRID_ET, std::vector<GT>{0, 0, 0, 0, 0, 0, 0}},
+                        "nearest_zeros_align_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{false, GS_NEAREST, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 11, 11, 14, 15, 10, 5}},
-        "nearest_border_noalign_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{false, GS_NEAREST, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 11, 11, 14, 15, 10, 5}},
+                        "nearest_border_noalign_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 6, 11, 14, 15, 10, 5}},
-        "nearest_border_align_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 6, 11, 14, 15, 10, 5}},
+                        "nearest_border_align_odd_dims_outer" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{false, GS_NEAREST, GS_REFLECTION},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{8, 14, 1, 4, 14, 6, 5}},
-                        "nearest_reflection_noalign_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{8, 14, 1, 4, 14, 6, 5}},
+                        "nearest_reflection_noalign_odd_dims_outer" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{true, GS_NEAREST, GS_REFLECTION},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{8, 9, 6, 4, 14, 6, 4}},
-                        "nearest_reflection_align_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{8, 9, 6, 4, 14, 6, 4}},
+                        "nearest_reflection_align_odd_dims_outer" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateNearestParamsEvenDimensions() {
     std::vector<GridSampleParams> params;
-    reference_tests::Tensor data_even_dims{
-        {1, 1, 4, 6},
-        element::f32,
-        std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}};
+    reference_tests::Tensor data_even_dims{{1, 1, 4, 6}, DATA_ET, std::vector<DT>{1,  2,  3,  4,  5,  6,  7,  8,
+                                                                                  9,  10, 11, 12, 13, 14, 15, 16,
+                                                                                  17, 18, 19, 20, 21, 22, 23, 24}};
     reference_tests::Tensor grid_inner{
         {1, 1, 8, 2},
-        element::f32,
-        std::vector<float>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
+        GRID_ET,
+        std::vector<GT>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
 
-    reference_tests::Tensor output_align{{1, 1, 1, 8}, element::f32, std::vector<float>{8, 14, 11, 17, 19, 6, 9, 16}};
-    reference_tests::Tensor output_noalign{{1, 1, 1, 8}, element::f32, std::vector<float>{2, 14, 5, 17, 19, 6, 9, 16}};
-    reference_tests::Tensor output_zeros_noalign{{1, 1, 1, 8},
-                                                 element::f32,
-                                                 std::vector<float>{2, 14, 5, 17, 0, 0, 9, 16}};
+    reference_tests::Tensor output_align{{1, 1, 1, 8}, DATA_ET, std::vector<DT>{8, 14, 11, 17, 19, 6, 9, 16}};
+    reference_tests::Tensor output_noalign{{1, 1, 1, 8}, DATA_ET, std::vector<DT>{2, 14, 5, 17, 19, 6, 9, 16}};
+    reference_tests::Tensor output_zeros_noalign{{1, 1, 1, 8}, DATA_ET, std::vector<DT>{2, 14, 5, 17, 0, 0, 9, 16}};
 
     for (const auto& padding : padding_modes) {
         std::stringstream name1, name2;
         name1 << "nearest_" << padding << "_noalign"
-              << "_even_dims_inner";
+              << "_even_dims_inner" << param_types_str(DATA_ET, GRID_ET);
         params.emplace_back(data_even_dims,
                             grid_inner,
                             op::v9::GridSample::Attributes{false, GS_NEAREST, padding},
@@ -172,7 +186,7 @@ std::vector<GridSampleParams> generateNearestParamsEvenDimensions() {
                             name1.str());
 
         name2 << "nearest_" << padding << "_align"
-              << "_even_dims_inner";
+              << "_even_dims_inner" << param_types_str(DATA_ET, GRID_ET);
         params.emplace_back(data_even_dims,
                             grid_inner,
                             op::v9::GridSample::Attributes{true, GS_NEAREST, padding},
@@ -183,199 +197,209 @@ std::vector<GridSampleParams> generateNearestParamsEvenDimensions() {
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBilinearParamsOddDimensionsInnerGrids() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
-    reference_tests::Tensor grid_inner{
-        {1, 3, 4, 2},
-        element::f32,
-        std::vector<float>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5, -0.5, -0.5, 0.5,
-                           0.5,  -0.5, 0.5,  0.5, -1., -1.,  -1., 1.,  1.,   -1.,  1.,   1.}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+    reference_tests::Tensor grid_inner{{1, 3, 4, 2}, GRID_ET, std::vector<GT>{-0.1, -0.1, -0.1, 0.1,  0.1,  -0.1,
+                                                                              0.1,  0.1,  -0.5, -0.5, -0.5, 0.5,
+                                                                              0.5,  -0.5, 0.5,  0.5,  -1.,  -1.,
+                                                                              -1.,  1.,   1.,   -1.,  1.,   1.}};
 
     reference_tests::Tensor output_align{{1, 1, 3, 4},
-                                         element::f32,
-                                         std::vector<float>{7.3, 8.3, 7.7, 8.7, 4.5, 9.5, 6.5, 11.5, 1, 11, 5, 15}};
+                                         DATA_ET,
+                                         std::vector<DT>{7.3, 8.3, 7.7, 8.7, 4.5, 9.5, 6.5, 11.5, 1, 11, 5, 15}};
     reference_tests::Tensor output_noalign{{1, 1, 3, 4},
-                                           element::f32,
-                                           std::vector<float>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 1, 11, 5, 15}};
+                                           DATA_ET,
+                                           std::vector<DT>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 1, 11, 5, 15}};
     reference_tests::Tensor output_zeros_noalign{
         {1, 1, 3, 4},
-        element::f32,
-        std::vector<float>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 0.25, 2.75, 1.25, 3.75}};
+        DATA_ET,
+        std::vector<DT>{7, 8.5, 7.5, 9, 3, 10.5, 5.5, 13, 0.25, 2.75, 1.25, 3.75}};
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
                         output_zeros_noalign,
-                        "bilinear_zeros_noalign_odd_dims_inner");
+                        "bilinear_zeros_noalign_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
                         output_align,
-                        "bilinear_zeros_align_odd_dims_inner");
+                        "bilinear_zeros_align_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
                         output_noalign,
-                        "bilinear_border_noalign_odd_dims_inner");
+                        "bilinear_border_noalign_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
                         output_align,
-                        "bilinear_border_align_odd_dims_inner");
+                        "bilinear_border_align_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
                         output_noalign,
-                        "bilinear_reflection_noalign_odd_dims_inner");
+                        "bilinear_reflection_noalign_odd_dims_inner" + types_str);
 
     params.emplace_back(data_odd_dims,
                         grid_inner,
                         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
                         output_align,
-                        "bilinear_reflection_align_odd_dims_inner");
+                        "bilinear_reflection_align_odd_dims_inner" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBilinearParamsOddDimensionsOuterGrids() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
     reference_tests::Tensor data_odd_dims{{1, 1, 3, 5},
-                                          element::f32,
-                                          std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
+                                          DATA_ET,
+                                          std::vector<DT>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
 
     reference_tests::Tensor grid_outer{
         {1, 1, 7, 2},
-        element::f32,
-        std::vector<float>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
+        GRID_ET,
+        std::vector<GT>{-10.1, -9.7, -7.55, 0.37, -77., 11.56, 0.5, 2.55, 1.7, 1.1, 3., -0.17, 1.301, -1.001}};
 
     params.emplace_back(data_odd_dims,
                         grid_outer,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 0}},
-                        "bilinear_zeros_noalign_odd_dims_outer");
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{0, 0, 0, 0, 0, 0, 0}},
+                        "bilinear_zeros_noalign_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{0, 0, 0, 0, 0, 0, 1.9880099}},
-        "bilinear_zeros_align_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{0, 0, 0, 0, 0, 0, 1.9880099}},
+                        "bilinear_zeros_align_odd_dims_outer" + types_str);
 
     params.emplace_back(
         data_odd_dims,
         grid_outer,
         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 8.775, 11, 14.25, 15, 8.725, 5}},
-        "bilinear_border_noalign_odd_dims_outer");
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 8.775, 11, 14.25, 15, 8.725, 5}},
+        "bilinear_border_noalign_odd_dims_outer" + types_str);
 
-    params.emplace_back(
-        data_odd_dims,
-        grid_outer,
-        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{1, 7.85, 11, 14, 15, 9.15, 5}},
-        "bilinear_border_align_odd_dims_outer");
+    params.emplace_back(data_odd_dims,
+                        grid_outer,
+                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{1, 7.85, 11, 14, 15, 9.15, 5}},
+                        "bilinear_border_align_odd_dims_outer" + types_str);
 
     params.emplace_back(
         data_odd_dims,
         grid_outer,
         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
         reference_tests::Tensor{{1, 1, 1, 7},
-                                element::f32,
-                                std::vector<float>{5.9999995, 11.9, 2.7000031, 5.1250005, 13.75, 4.725, 4.7475}},
-        "bilinear_reflection_noalign_odd_dims_outer");
+                                DATA_ET,
+                                std::vector<DT>{5.9999995, 11.9, 2.7000031, 5.1250005, 13.75, 4.725, 4.7475}},
+        "bilinear_reflection_noalign_odd_dims_outer" + types_str);
 
     params.emplace_back(
         data_odd_dims,
         grid_outer,
         op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
         reference_tests::Tensor{{1, 1, 1, 7},
-                                element::f32,
-                                std::vector<float>{6.7, 10.75, 3.800002, 6.25, 13.099999, 5.15, 4.4030004}},
-        "bilinear_reflection_align_odd_dims_outer");
+                                DATA_ET,
+                                std::vector<DT>{6.7, 10.75, 3.800002, 6.25, 13.099999, 5.15, 4.4030004}},
+        "bilinear_reflection_align_odd_dims_outer" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBilinearParamsEvenDimensions() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
-    reference_tests::Tensor data_even_dims{
-        {1, 1, 4, 6},
-        element::f32,
-        std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}};
+    reference_tests::Tensor data_even_dims{{1, 1, 4, 6}, DATA_ET, std::vector<DT>{1,  2,  3,  4,  5,  6,  7,  8,
+                                                                                  9,  10, 11, 12, 13, 14, 15, 16,
+                                                                                  17, 18, 19, 20, 21, 22, 23, 24}};
     reference_tests::Tensor grid_inner{
         {1, 1, 8, 2},
-        element::f32,
-        std::vector<float>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
+        GRID_ET,
+        std::vector<GT>{-0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5, -1, 1, 1, -1, -0.1, -0.1, 0.1, 0.1}};
 
     params.emplace_back(
         data_even_dims,
         grid_inner,
         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{5, 17, 8, 20, 4.75, 1.5, 11, 14}},
-        "bilinear_zeros_noalign_even_dims_inner");
-
-    params.emplace_back(data_even_dims,
-                        grid_inner,
-                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 7},
-                                                element::f32,
-                                                std::vector<float>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
-                        "bilinear_zeros_align_even_dims_inner");
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{5, 17, 8, 20, 4.75, 1.5, 11, 14}},
+        "bilinear_zeros_noalign_even_dims_inner" + types_str);
 
     params.emplace_back(
         data_even_dims,
         grid_inner,
-        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{5, 17, 8, 20, 19, 6, 11, 14}},
-        "bilinear_border_noalign_even_dims_inner");
+        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_ZEROS},
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
+        "bilinear_zeros_align_even_dims_inner" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid_inner,
-                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
-                        reference_tests::Tensor{{1, 1, 1, 7},
-                                                element::f32,
-                                                std::vector<float>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
-                        "bilinear_border_align_even_dims_inner");
+                        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_BORDER},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{5, 17, 8, 20, 19, 6, 11, 14}},
+                        "bilinear_border_noalign_even_dims_inner" + types_str);
 
     params.emplace_back(
         data_even_dims,
         grid_inner,
-        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
-        reference_tests::Tensor{{1, 1, 1, 7}, element::f32, std::vector<float>{5, 17, 8, 20, 19, 6, 11, 14}},
-        "bilinear_reflection_noalign_even_dims_inner");
+        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_BORDER},
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
+        "bilinear_border_align_even_dims_inner" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid_inner,
-                        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
-                        reference_tests::Tensor{{1, 1, 1, 7},
-                                                element::f32,
-                                                std::vector<float>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
-                        "bilinear_reflection_align_even_dims_inner");
+                        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
+                        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{5, 17, 8, 20, 19, 6, 11, 14}},
+                        "bilinear_reflection_noalign_even_dims_inner" + types_str);
+
+    params.emplace_back(
+        data_even_dims,
+        grid_inner,
+        op::v9::GridSample::Attributes{true, GS_BILINEAR, GS_REFLECTION},
+        reference_tests::Tensor{{1, 1, 1, 7}, DATA_ET, std::vector<DT>{6.75, 15.75, 9.25, 18.25, 19, 6, 11.35, 13.65}},
+        "bilinear_reflection_align_even_dims_inner" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET, class DT = ov::fundamental_type_for<DATA_ET>>
 std::vector<GridSampleParams> generateBicubicParams() {
+    constexpr auto GRID_ET = ov::element::Type_t::f32;
+    using GT = ov::fundamental_type_for<GRID_ET>;
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
     // clang-format off
-    reference_tests::Tensor data_even_dims{{1, 1, 4, 7}, element::f32,
-        std::vector<float>{ 1, 1, 1, 1, 1, 1, 1,
+    reference_tests::Tensor data_even_dims{{1, 1, 4, 7}, DATA_ET,
+            std::vector<DT>{1, 1, 1, 1, 1, 1, 1,
                             1, 2, 2, 2, 2, 2, 1,
                             1, 2, 3, 5, 3, 2, 1,
                             1, 2, 5, 9, 5, 2, 1}};
     reference_tests::Tensor grid{
         {1, 4, 4, 2},
-        element::f32,
-        std::vector<float>{ -0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1,
+        GRID_ET,
+            std::vector<GT>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1,
                             -0.5, -0.5, -0.5, 0.5, 0.5, -0.5, 0.5, 0.5,
                             -0.9, -0.9, -0.9, 0.9, 0.9, -0.9, 0.9, 0.9,
                             -1.75, 0.7, 1.33, -1.11, 0.965, 1.007, 21, 37}};
@@ -384,85 +408,90 @@ std::vector<GridSampleParams> generateBicubicParams() {
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_ZEROS},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{2.6663566, 3.527928,   2.6663566,  3.527928,
-                                                                   1.6318359, 2.7156982,  1.6318359,  2.7156982,
-                                                                   0.6378987, 0.57033366, 0.6378987,  0.57033366,
-                                                                   0,        -0.01507522, 0.25528803, 0 }},
-                        "bicubic_zeros_noalign");
+                                                DATA_ET,
+                                                std::vector<DT>{2.6663566, 3.527928,   2.6663566,  3.527928,
+                                                                1.6318359, 2.7156982,  1.6318359,  2.7156982,
+                                                                0.6378987, 0.57033366, 0.6378987,  0.57033366,
+                                                                0,        -0.01507522, 0.25528803, 0 }},
+                        "bicubic_zeros_noalign" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_ZEROS},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.7887204, 3.4506166,  2.7887204, 3.4506166,
-                                                                    1.8481445, 2.7364502,  1.8481445, 2.7364502,
-                                                                    1.2367951, 1.3602872,  1.2367951, 1.3602872,
-                                                                    0,         0.00650583, 1.1182348, 0 }},
-                        "bicubic_zeros_align");
+                                                DATA_ET,
+                                                std::vector<DT>{2.7887204, 3.4506166,  2.7887204, 3.4506166,
+                                                                1.8481445, 2.7364502,  1.8481445, 2.7364502,
+                                                                1.2367951, 1.3602872,  1.2367951, 1.3602872,
+                                                                0,         0.00650583, 1.1182348, 0 }},
+                        "bicubic_zeros_align" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_BORDER},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.6663566, 3.527928,   2.6663566, 3.527928,
-                                                                    1.5380859, 2.4677734,  1.5380859, 2.4677734,
-                                                                    1.0089612, 0.91871876, 1.0089612, 0.91871876,
-                                                                    1,         1,          0.8902873, 1 }},
-                        "bicubic_border_noalign");
+                                                DATA_ET,
+                                                std::vector<DT>{2.6663566, 3.527928,   2.6663566, 3.527928,
+                                                                1.5380859, 2.4677734,  1.5380859, 2.4677734,
+                                                                1.0089612, 0.91871876, 1.0089612, 0.91871876,
+                                                                1,         1,          0.8902873, 1 }},
+                        "bicubic_border_noalign" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_BORDER},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.7887204, 3.4506166, 2.7887204, 3.4506166,
-                                                                    1.8129883, 2.623291,  1.8129883, 2.623291,
-                                                                    1.0363026, 1.1486388, 1.0363026, 1.1486388,
-                                                                    1,         1.0000064, 1.0641243, 1 }},
-                        "bicubic_border_align");
+                                                DATA_ET,
+                                                std::vector<DT>{2.7887204, 3.4506166, 2.7887204, 3.4506166,
+                                                                1.8129883, 2.623291,  1.8129883, 2.623291,
+                                                                1.0363026, 1.1486388, 1.0363026, 1.1486388,
+                                                                1,         1.0000064, 1.0641243, 1 }},
+                        "bicubic_border_align" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_REFLECTION},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.6663566, 3.527928,  2.6663566, 3.527928,
-                                                                    1.5380859, 2.4677734, 1.5380859, 2.4677734,
-                                                                    1.0150609, 0.904375,  1.0150609, 0.904375,
-                                                                    5.48851,   0.898316,  0.8237547, 0.8125 }},
-                        "bicubic_reflection_noalign");
+                                                DATA_ET,
+                                                std::vector<DT>{2.6663566, 3.527928,  2.6663566, 3.527928,
+                                                                1.5380859, 2.4677734, 1.5380859, 2.4677734,
+                                                                1.0150609, 0.904375,  1.0150609, 0.904375,
+                                                                5.48851,   0.898316,  0.8237547, 0.8125 }},
+                        "bicubic_reflection_noalign" + types_str);
 
     params.emplace_back(data_even_dims,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_REFLECTION},
                         reference_tests::Tensor{{1, 1, 4, 4},
-                                                element::f32,
-                                                std::vector<float>{ 2.7887204, 3.4506166, 2.7887204, 3.4506166,
-                                                                    1.7745361, 2.6518555, 1.7745361, 2.6518555,
-                                                                    1.0085088, 1.0307077, 1.0085088, 1.0307077,
-                                                                    5.5649586, 1.0553409, 1.0011607, 1 }},
-                        "bicubic_reflection_align");
+                                                DATA_ET,
+                                                std::vector<DT>{2.7887204, 3.4506166, 2.7887204, 3.4506166,
+                                                                1.7745361, 2.6518555, 1.7745361, 2.6518555,
+                                                                1.0085088, 1.0307077, 1.0085088, 1.0307077,
+                                                                5.5649586, 1.0553409, 1.0011607, 1 }},
+                        "bicubic_reflection_align" + types_str);
     // clang-format on
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateBicubicBatchesParams() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
     reference_tests::Tensor data{{2, 2, 4, 3},
-                                 element::f32,
-                                 std::vector<float>{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
-                                                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-                                                    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}};
-    reference_tests::Tensor grid{{2, 2, 4, 2},
-                                 element::f32,
-                                 std::vector<float>{-0.1, -0.1, -0.1,  0.1, 0.1,  -0.1,  0.1,   0.1,   -0.5, -0.5, -0.5,
-                                                    0.5,  0.5,  -0.5,  0.5, 0.5,  -0.9,  -0.9,  -0.9,  0.9,  0.9,  -0.9,
-                                                    0.9,  0.9,  -1.75, 0.7, 1.33, -1.11, 0.965, 1.007, 21,   37}};
+                                 DATA_ET,
+                                 std::vector<DT>{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}};
+    reference_tests::Tensor grid{
+        {2, 2, 4, 2},
+        GRID_ET,
+        std::vector<GT>{-0.1, -0.1, -0.1, 0.1, 0.1, -0.1, 0.1, 0.1, -0.5,  -0.5, -0.5, 0.5,   0.5,   -0.5,  0.5, 0.5,
+                        -0.9, -0.9, -0.9, 0.9, 0.9, -0.9, 0.9, 0.9, -1.75, 0.7,  1.33, -1.11, 0.965, 1.007, 21,  37}};
 
     params.emplace_back(
         data,
@@ -470,12 +499,12 @@ std::vector<GridSampleParams> generateBicubicBatchesParams() {
         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_BORDER},
         reference_tests::Tensor{
             {2, 2, 2, 4},
-            element::f32,
-            std::vector<float>{6.0096254, 6.7048755, 6.2951245, 6.9903746, 3.4101562, 8.402344,  4.5976562, 9.589844,
-                               18.009624, 18.704876, 18.295124, 18.990376, 15.410156, 20.402344, 16.597656, 21.589844,
-                               25.415281, 33.735218, 27.26478,  35.58472,  32.884,    26.852259, 35.996872, 36.,
-                               37.41528,  45.735218, 39.264782, 47.58472,  44.884,    38.852257, 47.996872, 48.}},
-        "bicubic_border_align_batches");
+            DATA_ET,
+            std::vector<DT>{6.0096254, 6.7048755, 6.2951245, 6.9903746, 3.4101562, 8.402344,  4.5976562, 9.589844,
+                            18.009624, 18.704876, 18.295124, 18.990376, 15.410156, 20.402344, 16.597656, 21.589844,
+                            25.415281, 33.735218, 27.26478,  35.58472,  32.884,    26.852259, 35.996872, 36.,
+                            37.41528,  45.735218, 39.264782, 47.58472,  44.884,    38.852257, 47.996872, 48.}},
+        "bicubic_border_align_batches" + types_str);
 
     params.emplace_back(
         data,
@@ -483,76 +512,101 @@ std::vector<GridSampleParams> generateBicubicBatchesParams() {
         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_REFLECTION},
         reference_tests::Tensor{
             {2, 2, 2, 4},
-            element::f32,
-            std::vector<float>{5.8170314, 6.7650313, 6.2349687, 7.182969,  2.4101562, 8.972656,  4.0273438, 10.589844,
-                               17.81703,  18.765032, 18.234968, 19.18297,  14.410156, 20.972656, 16.027344, 22.589844,
-                               24.356874, 34.301876, 26.698126, 36.643124, 34.304035, 26.55013,  36.74749,  36.75,
-                               36.356876, 46.301876, 38.698124, 48.643124, 46.304035, 38.55013,  48.74749,  48.75}},
-        "bicubic_reflection_noalign_batches");
+            DATA_ET,
+            std::vector<DT>{5.8170314, 6.7650313, 6.2349687, 7.182969,  2.4101562, 8.972656,  4.0273438, 10.589844,
+                            17.81703,  18.765032, 18.234968, 19.18297,  14.410156, 20.972656, 16.027344, 22.589844,
+                            24.356874, 34.301876, 26.698126, 36.643124, 34.304035, 26.55013,  36.74749,  36.75,
+                            36.356876, 46.301876, 38.698124, 48.643124, 46.304035, 38.55013,  48.74749,  48.75}},
+        "bicubic_reflection_noalign_batches" + types_str);
 
     return params;
 }
 
+template <ov::element::Type_t DATA_ET,
+          ov::element::Type_t GRID_ET,
+          class DT = ov::fundamental_type_for<DATA_ET>,
+          class GT = ov::fundamental_type_for<GRID_ET>>
 std::vector<GridSampleParams> generateCornerCaseData1x1Params() {
+    const auto types_str = param_types_str(DATA_ET, GRID_ET);
     std::vector<GridSampleParams> params;
 
-    const reference_tests::Tensor data{{1, 1, 1, 1}, element::f32, std::vector<float>{7}};
-    const reference_tests::Tensor grid{{1, 1, 5, 2},
-                                       element::f32,
-                                       std::vector<float>{1, -1, 0, 0, -1, 0, 0.5, 0.5, 2, -4}};
-    const reference_tests::Tensor sevens{{1, 1, 1, 5}, element::f32, std::vector<float>{7, 7, 7, 7, 7}};
+    const reference_tests::Tensor data{{1, 1, 1, 1}, DATA_ET, std::vector<DT>{7}};
+    const reference_tests::Tensor grid{{1, 1, 5, 2}, GRID_ET, std::vector<GT>{1, -1, 0, 0, -1, 0, 0.5, 0.5, 2, -4}};
+    const reference_tests::Tensor sevens{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{7, 7, 7, 7, 7}};
 
-    params.emplace_back(
-        data,
-        grid,
-        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector<float>{1.75, 7, 3.5, 3.9375, 0}},
-        "bilinear_zeros_no_align_data1x1");
+    params.emplace_back(data,
+                        grid,
+                        op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_ZEROS},
+                        reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{1.75, 7, 3.5, 3.9375, 0}},
+                        "bilinear_zeros_no_align_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_NEAREST, GS_ZEROS},
-                        reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector<float>{7, 7, 7, 7, 0}},
-                        "nearest_zeros_no_align_data1x1");
+                        reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{7, 7, 7, 7, 0}},
+                        "nearest_zeros_no_align_data1x1" + types_str);
 
     params.emplace_back(
         data,
         grid,
         op::v9::GridSample::Attributes{false, GS_BICUBIC, GS_ZEROS},
-        reference_tests::Tensor{{1, 1, 1, 5}, element::f32, std::vector<float>{2.4677734, 7, 4.15625, 5.4073334, 0}},
-        "bicubic_zeros_no_align_data1x1");
+        reference_tests::Tensor{{1, 1, 1, 5}, DATA_ET, std::vector<DT>{2.4677734, 7, 4.15625, 5.4073334, 0}},
+        "bicubic_zeros_no_align_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_BICUBIC, GS_ZEROS},
                         sevens,
-                        "bicubic_zeros_align_data1x1");
+                        "bicubic_zeros_align_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{false, GS_BILINEAR, GS_REFLECTION},
                         sevens,
-                        "bilinear_reflection_noalign_data1x1");
+                        "bilinear_reflection_noalign_data1x1" + types_str);
 
     params.emplace_back(data,
                         grid,
                         op::v9::GridSample::Attributes{true, GS_NEAREST, GS_BORDER},
                         sevens,
-                        "nearest_border_align_data1x1");
+                        "nearest_border_align_data1x1" + types_str);
 
     return params;
 }
 
 std::vector<GridSampleParams> generateGridSampleParams() {
-    std::vector<std::vector<GridSampleParams>> combo_params{generateNearestParamsOddDimensionsInnerGrids(),
-                                                            generateNearestParamsOddDimensionsOuterGrids(),
-                                                            generateNearestParamsEvenDimensions(),
-                                                            generateBilinearParamsOddDimensionsInnerGrids(),
-                                                            generateBilinearParamsOddDimensionsOuterGrids(),
-                                                            generateBilinearParamsEvenDimensions(),
-                                                            generateBicubicParams(),
-                                                            generateBicubicBatchesParams(),
-                                                            generateCornerCaseData1x1Params()};
+    using namespace ov::element;
+    std::vector<std::vector<GridSampleParams>> combo_params{generateNearestParamsOddDimensionsInnerGrids<f32, f32>(),
+                                                            generateNearestParamsOddDimensionsInnerGrids<f32, f16>(),
+                                                            generateNearestParamsOddDimensionsInnerGrids<f16, f32>(),
+
+                                                            generateNearestParamsEvenDimensions<f32, f32>(),
+                                                            generateNearestParamsEvenDimensions<f32, f16>(),
+                                                            generateNearestParamsEvenDimensions<f16, f32>(),
+
+                                                            generateBilinearParamsOddDimensionsInnerGrids<f32, f32>(),
+                                                            generateBilinearParamsOddDimensionsInnerGrids<f32, f16>(),
+                                                            generateBilinearParamsOddDimensionsInnerGrids<f16, f32>(),
+
+                                                            generateBilinearParamsOddDimensionsOuterGrids<f32, f32>(),
+                                                            generateBilinearParamsOddDimensionsOuterGrids<f32, f16>(),
+                                                            generateBilinearParamsOddDimensionsOuterGrids<f16, f32>(),
+
+                                                            generateBilinearParamsEvenDimensions<f32, f32>(),
+                                                            generateBilinearParamsEvenDimensions<f32, f16>(),
+                                                            generateBilinearParamsEvenDimensions<f16, f32>(),
+
+                                                            generateBicubicParams<f32>(),
+                                                            generateBicubicParams<f64>(),
+                                                            generateBicubicParams<f16>(),
+
+                                                            generateBicubicBatchesParams<f32, f32>(),
+                                                            generateBicubicBatchesParams<f32, f16>(),
+                                                            generateBicubicBatchesParams<f16, f32>(),
+
+                                                            generateCornerCaseData1x1Params<f32, f32>(),
+                                                            generateCornerCaseData1x1Params<f32, bf16>(),
+                                                            generateCornerCaseData1x1Params<f32, f16>()};
     std::vector<GridSampleParams> test_params;
     for (auto& params : combo_params)
         std::move(params.begin(), params.end(), std::back_inserter(test_params));

From e2a74956c0d136fcfee4d07488d437dfc3184390 Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Mon, 4 Mar 2024 10:53:09 +0100
Subject: [PATCH 18/28] [Ref] Concat op reference implementation code
 improvements (#23048)

### Details:
- Concat op reference implementation code improvements, removal of code
duplication
- Leftovers from PR:
https://github.com/openvinotoolkit/openvino/pull/22686
- Reduce Concat template code, pass element_type to distinguish copy
method for string
 (element::Type_t::undefined as default to keep compatibility)
- ~Corresponding update of places where reference::concat is used
(including one gpu file)~ (reverted)

(*First approach
(https://github.com/openvinotoolkit/openvino/pull/23048/commits/6308f9d1cfe7e8de5848f7e8e04fb2907b129066)
was to simply introduce common template to reuse the Concat reference
code, but it still results in generation of string and char
specializations of the whole template function*
*Current approach is to use the same function and choose the copy method
inside, based on the element type*)

### Tickets:
 -Related to 131838
---
 .../include/openvino/reference/concat.hpp     | 11 +---
 src/core/reference/src/op/concat.cpp          | 59 ++++++++-----------
 src/core/src/op/concat.cpp                    | 34 ++++-------
 3 files changed, 41 insertions(+), 63 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/concat.hpp b/src/core/reference/include/openvino/reference/concat.hpp
index b7a18ecef5d650..751226d83251f8 100644
--- a/src/core/reference/include/openvino/reference/concat.hpp
+++ b/src/core/reference/include/openvino/reference/concat.hpp
@@ -7,6 +7,7 @@
 #include <vector>
 
 #include "openvino/core/shape.hpp"
+#include "openvino/core/type/element_type.hpp"
 
 namespace ov {
 namespace reference {
@@ -15,14 +16,8 @@ void concat(const std::vector<const char*>& args,
             const std::vector<Shape>& in_shapes,
             const Shape& out_shape,
             int64_t concatenation_axis,
-            size_t elem_size);
-
-void concat(const std::vector<const std::string*>& args,
-            std::string* out,
-            const std::vector<Shape>& in_shapes,
-            const Shape& out_shape,
-            int64_t concatenation_axis,
-            size_t);
+            size_t elem_size,
+            const ov::element::Type& elem_type = ov::element::Type_t::undefined);
 
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/reference/src/op/concat.cpp b/src/core/reference/src/op/concat.cpp
index 81cdbc25e5b805..815925274ea6a5 100644
--- a/src/core/reference/src/op/concat.cpp
+++ b/src/core/reference/src/op/concat.cpp
@@ -17,6 +17,26 @@ std::vector<size_t> calculate_shape_sizes(const std::vector<Shape>& in_shapes) {
     });
     return sizes;
 }
+
+void copy_elements(const char* arg,
+                   char* out,
+                   size_t in_offset,
+                   size_t out_offset,
+                   size_t num_of_elements,
+                   size_t elem_size) {
+    std::memcpy(out + (out_offset * elem_size), arg + (in_offset * elem_size), num_of_elements * elem_size);
+}
+
+void copy_string_elements(const char* arg,
+                          char* out,
+                          size_t in_offset,
+                          size_t out_offset,
+                          size_t num_of_elements,
+                          size_t) {
+    const auto src_begin = std::next(reinterpret_cast<const std::string*>(arg), in_offset);
+    const auto out_ptr = std::next(reinterpret_cast<std::string*>(out), out_offset);
+    std::copy_n(src_begin, num_of_elements, out_ptr);
+}
 }  // namespace
 
 void concat(const std::vector<const char*>& args,
@@ -24,38 +44,12 @@ void concat(const std::vector<const char*>& args,
             const std::vector<Shape>& in_shapes,
             const Shape& out_shape,
             int64_t concatenation_axis,
-            size_t elem_size) {
-    size_t steps = 1;
-    for (int i = 0; i < concatenation_axis; ++i) {
-        steps *= out_shape[i];
-    }
-
+            size_t elem_size,
+            const ov::element::Type& elem_type) {
+    const auto steps = shape_size(out_shape.begin(), out_shape.begin() + concatenation_axis);
     const auto& shape_sizes = calculate_shape_sizes(in_shapes);
 
-    size_t out_offset = 0;
-    for (size_t step = 0; step < steps; ++step) {
-        for (size_t in_index = 0; in_index < args.size(); ++in_index) {
-            const size_t size = shape_sizes[in_index] / steps;
-            const size_t in_offset = step * size;
-
-            std::memcpy(&out[out_offset * elem_size], &args[in_index][in_offset * elem_size], size * elem_size);
-
-            out_offset += size;
-        }
-    }
-}
-
-void concat(const std::vector<const std::string*>& args,
-            std::string* out,
-            const std::vector<Shape>& in_shapes,
-            const Shape& out_shape,
-            int64_t concatenation_axis,
-            size_t) {
-    size_t steps = 1;
-    for (int i = 0; i < concatenation_axis; ++i) {
-        steps *= out_shape[i];
-    }
-    const auto& shape_sizes = calculate_shape_sizes(in_shapes);
+    const auto copy_func = elem_type == ov::element::string ? copy_string_elements : copy_elements;
 
     size_t out_offset = 0;
     for (size_t step = 0; step < steps; ++step) {
@@ -63,14 +57,11 @@ void concat(const std::vector<const std::string*>& args,
             const size_t size = shape_sizes[in_index] / steps;
             const size_t in_offset = step * size;
 
-            const auto src_begin = std::next(args[in_index], in_offset);
-            const auto out_ptr = std::next(out, out_offset);
-            std::copy_n(src_begin, size, out_ptr);
+            copy_func(args[in_index], out, in_offset, out_offset, size, elem_size);
 
             out_offset += size;
         }
     }
 }
-
 }  // namespace reference
 }  // namespace ov
diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp
index 4ec7743d64167e..b670af7d4e03ea 100644
--- a/src/core/src/op/concat.cpp
+++ b/src/core/src/op/concat.cpp
@@ -52,43 +52,35 @@ std::shared_ptr<Node> Concat::clone_with_new_inputs(const OutputVector& new_args
     return std::make_shared<Concat>(new_args, m_axis);
 }
 
-template <typename T>
-void evaluate_concat(const Concat* node, TensorVector& outputs, const TensorVector& inputs) {
+bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
+    OV_OP_SCOPE(v0_Concat_evaluate);
+    OPENVINO_ASSERT(outputs.size() == 1);
+
     const auto inputs_count = inputs.size();
     std::vector<Shape> arg_shapes;
     std::vector<PartialShape> input_shapes;
+    std::vector<const char*> arg_bufs;
     arg_shapes.reserve(inputs_count);
     input_shapes.reserve(inputs_count);
+    arg_bufs.reserve(inputs_count);
 
-    std::vector<const T*> arg_bufs(inputs_count);
-    auto arg_buf = arg_bufs.begin();
     for (auto& input : inputs) {
-        *arg_buf = static_cast<const T*>(input.data());
-        ++arg_buf;
         const auto& input_shape = input.get_shape();
         arg_shapes.emplace_back(input_shape);
         input_shapes.emplace_back(input_shape);
+        arg_bufs.emplace_back(static_cast<const char*>(input.data()));
     }
 
-    const auto& out_shape = shape_infer(node, input_shapes).front().to_shape();
+    const auto& out_shape = shape_infer(this, input_shapes).front().to_shape();
     outputs.front().set_shape(out_shape);
+    const auto elem_type = outputs.front().get_element_type();
     reference::concat(arg_bufs,
-                      static_cast<T*>(outputs.front().data()),
+                      static_cast<char*>(outputs.front().data()),
                       arg_shapes,
                       out_shape,
-                      ov::util::normalize(node->get_axis(), out_shape.size()),
-                      outputs.front().get_element_type().size());
-}
-
-bool Concat::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
-    OV_OP_SCOPE(v0_Concat_evaluate);
-    OPENVINO_ASSERT(outputs.size() == 1);
-
-    if (outputs.front().get_element_type() == ov::element::string) {
-        evaluate_concat<std::string>(this, outputs, inputs);
-    } else {
-        evaluate_concat<char>(this, outputs, inputs);
-    }
+                      ov::util::normalize(this->get_axis(), out_shape.size()),
+                      elem_type.size(),
+                      elem_type);
 
     return true;
 }

From e6dc08651284e9798bd5bb3383136563e3c401ed Mon Sep 17 00:00:00 2001
From: Alicja Miloszewska <alicja.miloszewska@intel.com>
Date: Mon, 4 Mar 2024 11:06:19 +0100
Subject: [PATCH 19/28] [OV JS] Fix typescript method definition (#23185)

### Details:
 - Move `getAvailableDevices` to `Core` definition
---
 src/bindings/js/node/lib/addon.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bindings/js/node/lib/addon.ts b/src/bindings/js/node/lib/addon.ts
index ae75f479e655e1..7f4f9fd917cd13 100644
--- a/src/bindings/js/node/lib/addon.ts
+++ b/src/bindings/js/node/lib/addon.ts
@@ -36,6 +36,7 @@ interface Core {
     modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Promise<Model>;
   readModelSync(modelPath: string, weightsPath?: string): Model;
   readModelSync(modelBuffer: Uint8Array, weightsBuffer?: Uint8Array): Model;
+  getAvailableDevices(): string[];
 }
 interface CoreConstructor {
   new(): Core;
@@ -81,7 +82,6 @@ interface InferRequest {
   inferAsync(inputData: { [inputName: string]: Tensor}
     | Tensor[] ): Promise<{ [outputName: string] : Tensor}>;
   getCompiledModel(): CompiledModel;
-  getAvailableDevices(): string[];
 }
 
 type Dimension = number | [number, number];

From ca22b506d401fb9a2d40316d84d73448d8c78f5c Mon Sep 17 00:00:00 2001
From: Sofya Balandina <sofya.balandina@intel.com>
Date: Mon, 4 Mar 2024 14:06:30 +0000
Subject: [PATCH 20/28] [api conformance] Fix "No  devices" for sw plugins when
 creating new core (#23166)

### Details:
 - *item1*
 - *...*

### Tickets:
 - *ticket-id*
---
 .../common_test_utils/ov_plugin_cache.hpp     |  4 ++--
 .../common_test_utils/src/ov_plugin_cache.cpp | 20 ++++++++-----------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_plugin_cache.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_plugin_cache.hpp
index 26228d7d9b1a88..0805cbcba5e883 100644
--- a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_plugin_cache.hpp
+++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_plugin_cache.hpp
@@ -23,11 +23,11 @@ extern std::unordered_set<std::string> available_devices;
 
 void register_plugin(ov::Core& ov_core) noexcept;
 void register_template_plugin(ov::Core& ov_core) noexcept;
-ov::Core create_core(const std::string& in_target_device = target_device);
+ov::Core create_core(const std::string& in_target_device = std::string());
 
 class PluginCache {
 public:
-    std::shared_ptr<ov::Core> core(const std::string& in_target_device = target_device);
+    std::shared_ptr<ov::Core> core(const std::string& in_target_device = std::string());
 
     static PluginCache& get();
     void reset();
diff --git a/src/tests/test_utils/common_test_utils/src/ov_plugin_cache.cpp b/src/tests/test_utils/common_test_utils/src/ov_plugin_cache.cpp
index b6e4e76930bca8..e64f0ce1fa7f91 100644
--- a/src/tests/test_utils/common_test_utils/src/ov_plugin_cache.cpp
+++ b/src/tests/test_utils/common_test_utils/src/ov_plugin_cache.cpp
@@ -62,19 +62,15 @@ ov::Core create_core(const std::string& in_target_device) {
     }
 
     if (!global_plugin_config.empty()) {
-        if (in_target_device.empty()) {
-            ov_core.set_property(global_plugin_config);
-        } else {
-            const auto& supported_properties = ov_core.get_property(in_target_device, ov::supported_properties);
-            for (auto& property : global_plugin_config) {
-                if (std::find(supported_properties.begin(), supported_properties.end(), property.first) ==
-                    supported_properties.end()) {
-                    OPENVINO_THROW("Property " + property.first +
-                                   ", which was tryed to set in --config file, is not supported by " +
-                                   in_target_device);
-                }
+        // apply config to main device specified by user at launch or to special device specified when creating new сore
+        auto config_device = in_target_device.empty() ? target_device : in_target_device;
+        for (auto& property : global_plugin_config) {
+            try {
+                ov_core.set_property(config_device, global_plugin_config);
+            } catch (...) {
+                OPENVINO_THROW("Property " + property.first +
+                               ", which was tryed to set in --config file, is not supported by " + target_device);
             }
-            ov_core.set_property(in_target_device, global_plugin_config);
         }
     }
     return ov_core;

From e2693404a8d1e8776ae392049bfbb2d820ea4940 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Mon, 4 Mar 2024 16:32:53 +0100
Subject: [PATCH 21/28] [PT FE] Support aten::log_sigmoid (#23200)

### Details:
 - *Support `aten::log_sigmoid` in TS*
 - *Support `aten.expm1.default` and `aten.erfc.default` in FX*
 - *Unify unary ops testing*

### Tickets:
 - *CVS-134328*
---
 src/frontends/pytorch/src/op/log.cpp          |  50 ++--
 src/frontends/pytorch/src/op/reciprocal.cpp   |   2 +-
 src/frontends/pytorch/src/op/rsqrt.cpp        |   2 +-
 src/frontends/pytorch/src/op_table.cpp        |  62 ++---
 src/frontends/pytorch/src/utils.hpp           |  11 +
 tests/layer_tests/pytorch_tests/test_erf.py   |  61 -----
 tests/layer_tests/pytorch_tests/test_erfc.py  |  57 -----
 tests/layer_tests/pytorch_tests/test_expm1.py |  64 -----
 tests/layer_tests/pytorch_tests/test_log.py   |  60 -----
 tests/layer_tests/pytorch_tests/test_relu6.py |  34 ---
 tests/layer_tests/pytorch_tests/test_selu.py  |  34 ---
 tests/layer_tests/pytorch_tests/test_silu.py  |  33 ---
 .../pytorch_tests/test_unary_ops.py           | 222 ++++++++++++------
 13 files changed, 230 insertions(+), 462 deletions(-)
 delete mode 100644 tests/layer_tests/pytorch_tests/test_erf.py
 delete mode 100644 tests/layer_tests/pytorch_tests/test_erfc.py
 delete mode 100644 tests/layer_tests/pytorch_tests/test_expm1.py
 delete mode 100644 tests/layer_tests/pytorch_tests/test_log.py
 delete mode 100644 tests/layer_tests/pytorch_tests/test_relu6.py
 delete mode 100644 tests/layer_tests/pytorch_tests/test_selu.py
 delete mode 100644 tests/layer_tests/pytorch_tests/test_silu.py

diff --git a/src/frontends/pytorch/src/op/log.cpp b/src/frontends/pytorch/src/op/log.cpp
index 573033105d95ce..c4fc38a71c677b 100644
--- a/src/frontends/pytorch/src/op/log.cpp
+++ b/src/frontends/pytorch/src/op/log.cpp
@@ -8,6 +8,7 @@
 #include "openvino/op/add.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/convert.hpp"
+#include "openvino/op/convert_like.hpp"
 #include "openvino/op/divide.hpp"
 #include "openvino/op/exp.hpp"
 #include "openvino/op/reduce_sum.hpp"
@@ -21,44 +22,47 @@ namespace op {
 
 using namespace ov::op;
 
-OutputVector translate_log(const NodeContext& context) {
-    // torch.log returns a tensor with the natural logarithm of the elements of input.
-    num_inputs_check(context, 1, 1);
-    auto x = context.get_input(0);
-    x = context.mark_node(std::make_shared<v0::Convert>(x, element::f32));
-    auto log = context.mark_node(std::make_shared<v0::Log>(x));
-    return {log};
-};
-
 OutputVector translate_log_sigmoid(const NodeContext& context) {
     num_inputs_check(context, 1, 1);
-    auto x = context.get_input(0);
-    x = context.mark_node(std::make_shared<v0::Convert>(x, element::f32));
-    auto sigmoid = context.mark_node(std::make_shared<v0::Sigmoid>(x));
+    auto op_vector = op::translate_1to1_match_1_inputs_with_fp32_type_alignment<v0::Sigmoid>(context);
+    PYTORCH_OP_CONVERSION_CHECK(op_vector.size() == 1,
+                                "Expected exactly one element in the vector. Got: ",
+                                op_vector.size());
+    auto sigmoid = op_vector[0];
     auto log = context.mark_node(std::make_shared<v0::Log>(sigmoid));
     return {log};
 };
 
 OutputVector translate_log2(const NodeContext& context) {
     // torch.log2 returns a tensor with the logarithm to the base 2 of the elements of input.
-    num_inputs_check(context, 1, 1);
-    auto x = context.get_input(0);
+    num_inputs_check(context, 1, 2);
+    auto op_vector = op::translate_1to1_match_1_inputs_with_fp32_type_alignment<v0::Log>(context);
+    PYTORCH_OP_CONVERSION_CHECK(op_vector.size() == 1,
+                                "Expected exactly one element in the vector. Got: ",
+                                op_vector.size());
+    auto log = op_vector[0];
+
     auto two = context.mark_node(v0::Constant::create(element::f32, Shape{}, {2}));
-    x = context.mark_node(std::make_shared<v0::Convert>(x, element::f32));
+    two = context.mark_node(std::make_shared<v1::ConvertLike>(two, log));
     auto log2 = context.mark_node(std::make_shared<v0::Log>(two));
-    auto log = context.mark_node(std::make_shared<v0::Log>(x));
+
     auto res = context.mark_node(std::make_shared<v1::Divide>(log, log2));
     return {res};
 };
 
 OutputVector translate_log10(const NodeContext& context) {
     // torch.log10 returns a tensor with the logarithm to the base 10 of the elements of input.
-    num_inputs_check(context, 1, 1);
-    auto x = context.get_input(0);
+    num_inputs_check(context, 1, 2);
+    auto op_vector = op::translate_1to1_match_1_inputs_with_fp32_type_alignment<v0::Log>(context);
+    PYTORCH_OP_CONVERSION_CHECK(op_vector.size() == 1,
+                                "Expected exactly one element in the vector. Got: ",
+                                op_vector.size());
+    auto log = op_vector[0];
+
     auto ten = context.mark_node(v0::Constant::create(element::f32, Shape{}, {10}));
-    x = context.mark_node(std::make_shared<v0::Convert>(x, element::f32));
+    ten = context.mark_node(std::make_shared<v1::ConvertLike>(ten, log));
     auto log10 = context.mark_node(std::make_shared<v0::Log>(ten));
-    auto log = context.mark_node(std::make_shared<v0::Log>(x));
+
     auto res = context.mark_node(std::make_shared<v1::Divide>(log, log10));
     return {res};
 };
@@ -80,10 +84,10 @@ OutputVector translate_logsumexp(const NodeContext& context) {
 
 OutputVector translate_log1p(const NodeContext& context) {
     // torch.log1p returns a tensor with the natural logarithm of the elements of input + 1.
-    num_inputs_check(context, 1, 1);
+    num_inputs_check(context, 1, 2);
     auto x = context.get_input(0);
-    x = context.mark_node(std::make_shared<v0::Convert>(x, element::f32));
-    auto one = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}));
+    auto one = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}))->output(0);
+    align_eltwise_input_types(context, x, one);
     auto x_plus_one = context.mark_node(std::make_shared<v1::Add>(x, one));
     auto log = context.mark_node(std::make_shared<v0::Log>(x_plus_one));
     return {log};
diff --git a/src/frontends/pytorch/src/op/reciprocal.cpp b/src/frontends/pytorch/src/op/reciprocal.cpp
index 94d14dd5fb7d1b..04697ea7e7e925 100644
--- a/src/frontends/pytorch/src/op/reciprocal.cpp
+++ b/src/frontends/pytorch/src/op/reciprocal.cpp
@@ -16,7 +16,7 @@ namespace op {
 using namespace ov::op;
 
 OutputVector translate_reciprocal(const NodeContext& context) {
-    num_inputs_check(context, 1, 1);
+    num_inputs_check(context, 1, 2);
     auto x = context.get_input(0);
     auto const_neg_1 = context.mark_node(v0::Constant::create(element::f32, Shape{}, {-1}))->output(0);
     align_eltwise_input_types(context, x, const_neg_1, true);
diff --git a/src/frontends/pytorch/src/op/rsqrt.cpp b/src/frontends/pytorch/src/op/rsqrt.cpp
index b17f66bb9f572a..f782792e56502a 100644
--- a/src/frontends/pytorch/src/op/rsqrt.cpp
+++ b/src/frontends/pytorch/src/op/rsqrt.cpp
@@ -17,7 +17,7 @@ namespace op {
 using namespace ov::op;
 
 OutputVector translate_rsqrt(const NodeContext& context) {
-    num_inputs_check(context, 1, 1);
+    num_inputs_check(context, 1, 2);
     auto data = context.get_input(0);
     auto one_const = context.mark_node(v0::Constant::create(element::f32, Shape({}), {1}));
     Output<Node> fake_const_for_type = context.mark_node(v0::Constant::create(element::f32, Shape({}), {.5}));
diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp
index 26280066c90777..d846badb5ca629 100644
--- a/src/frontends/pytorch/src/op_table.cpp
+++ b/src/frontends/pytorch/src/op_table.cpp
@@ -116,7 +116,6 @@ OP_CONVERTER(translate_linear);
 OP_CONVERTER(translate_linspace);
 OP_CONVERTER(translate_list_construct);
 OP_CONVERTER(translate_list_unpack);
-OP_CONVERTER(translate_log);
 OP_CONVERTER(translate_log1p);
 OP_CONVERTER(translate_log_sigmoid);
 OP_CONVERTER(translate_log_softmax);
@@ -305,11 +304,12 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::_upsample_bicubic2d_aa", op::translate_upsample_bicubic2d_aa},
         {"aten::_upsample_bilinear2d_aa", op::translate_upsample_bilinear2d_aa},
         {"aten::_weight_norm", op::translate_weight_norm},
-        {"aten::abs", op::translate_1to1_match_1_inputs<opset10::Abs>},
+        {"aten::abs", op::optional_out<op::translate_1to1_match_1_inputs<opset10::Abs>, 1>},
         {"aten::abs_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Abs>>},
-        {"aten::acos", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Acos>},
+        {"aten::acos", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Acos>, 1>},
         {"aten::acos_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Acos>>},
-        {"aten::acosh", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Acosh>},
+        {"aten::acosh",
+         op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Acosh>, 1>},
         {"aten::acosh_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Acosh>>},
         {"aten::adaptive_avg_pool1d", op::quantizable_op<op::translate_adaptive_avg_pool1d>},
         {"aten::adaptive_avg_pool2d", op::quantizable_op<op::translate_adaptive_avg_pool2d>},
@@ -333,13 +333,15 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::argsort", op::translate_argsort},
         {"aten::as_strided", op::translate_as_strided},
         {"aten::as_tensor", op::translate_as_tensor},
-        {"aten::asin", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Asin>},
+        {"aten::asin", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Asin>, 1>},
         {"aten::asin_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Asin>>},
-        {"aten::asinh", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Asinh>},
+        {"aten::asinh",
+         op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Asinh>, 1>},
         {"aten::asinh_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Asinh>>},
-        {"aten::atan", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Atan>},
+        {"aten::atan", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Atan>, 1>},
         {"aten::atan_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Atan>>},
-        {"aten::atanh", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Atanh>},
+        {"aten::atanh",
+         op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Atanh>, 1>},
         {"aten::atanh_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Atanh>>},
         {"aten::avg_pool1d", op::quantizable_op<op::translate_avg_poolnd>},
         {"aten::avg_pool2d", op::quantizable_op<op::translate_avg_poolnd>},
@@ -356,7 +358,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::broadcast_to", op::translate_expand},
         {"aten::cat", op::translate_cat},
         {"aten::cdist", op::translate_cdist},
-        {"aten::ceil", op::translate_1to1_match_1_inputs<opset10::Ceiling>},
+        {"aten::ceil", op::optional_out<op::translate_1to1_match_1_inputs<opset10::Ceiling>, 1>},
         {"aten::ceil_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Ceiling>>},
         {"aten::channel_shuffle", op::translate_channel_shuffle},
         // aten::chunk - Supported in limited set of patterns
@@ -380,9 +382,9 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::convolution", op::translate_convolution},
         {"aten::copy", op::skip_node},
         {"aten::copy_", op::translate_copy_},
-        {"aten::cos", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Cos>},
+        {"aten::cos", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Cos>, 1>},
         {"aten::cos_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Cos>>},
-        {"aten::cosh", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Cosh>},
+        {"aten::cosh", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Cosh>, 1>},
         {"aten::cosh_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Cosh>>},
         {"aten::cross", op::translate_cross},
         {"aten::cumsum", op::translate_cumsum},
@@ -404,7 +406,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::erf_", op::inplace_op<op::translate_erf>},
         {"aten::erfc", op::translate_erfc},
         {"aten::erfc_", op::inplace_op<op::translate_erfc>},
-        {"aten::exp", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Exp>},
+        {"aten::exp", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Exp>, 1>},
         {"aten::exp_", op::inplace_op<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Exp>>},
         {"aten::expand", op::translate_expand},
         {"aten::expand_as", op::translate_expand_as},
@@ -421,7 +423,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::fill_diagonal_", op::inplace_op<op::translate_fill_diagonal>},
         {"aten::flatten", op::quantizable_op<op::translate_flatten>},
         {"aten::flip", op::translate_flip},
-        {"aten::floor", op::translate_1to1_match_1_inputs<opset10::Floor>},
+        {"aten::floor", op::optional_out<op::translate_1to1_match_1_inputs<opset10::Floor>, 1>},
         {"aten::floor_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Floor>>},
         {"aten::floor_divide", op::translate_floor_divide},
         {"aten::floordiv", op::translate_floor_divide},
@@ -475,18 +477,19 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::linalg_vector_norm", op::translate_linalg_vector_norm},
         {"aten::linear", op::translate_linear},
         {"aten::linspace", op::translate_linspace},
-        {"aten::log", op::translate_log},
-        {"aten::log_", op::inplace_op<op::translate_log>},
+        {"aten::log", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Log>, 1>},
+        {"aten::log_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Log>>},
         {"aten::logical_and", op::translate_and},
         {"aten::logical_or", op::translate_or},
         {"aten::logical_not", op::translate_not},
         {"aten::logical_xor", op::translate_xor},
+        {"aten::log_sigmoid", op::translate_log_sigmoid},
         {"aten::log_softmax", op::translate_log_softmax},
-        {"aten::log1p", op::translate_log1p},
+        {"aten::log1p", op::optional_out<op::translate_log1p, 1>},
         {"aten::log1p_", op::inplace_op<op::translate_log1p>},
-        {"aten::log2", op::translate_log2},
+        {"aten::log2", op::optional_out<op::translate_log2, 1>},
         {"aten::log2_", op::inplace_op<op::translate_log2>},
-        {"aten::log10", op::translate_log10},
+        {"aten::log10", op::optional_out<op::translate_log10, 1>},
         {"aten::log10_", op::inplace_op<op::translate_log10>},
         {"aten::lstm", op::translate_lstm},
         {"aten::lt", op::translate_1to1_match_2_inputs_align_types<opset10::Less>},
@@ -548,10 +551,10 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::randn", op::translate_randn},
         {"aten::randn_like", op::translate_randn_like},
         // aten::real - Supported in limited set of patterns
-        {"aten::reciprocal", op::translate_reciprocal},
+        {"aten::reciprocal", op::optional_out<op::translate_reciprocal, 1>},
         {"aten::reciprocal_", op::inplace_op<op::translate_reciprocal>},
         // aten::reflection_pad2d - Supported in limited set of patterns
-        {"aten::relu", op::translate_1to1_match_1_inputs<opset10::Relu>},
+        {"aten::relu", op::optional_out<op::translate_1to1_match_1_inputs<opset10::Relu>, 1>},
         {"aten::relu_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Relu>>},
         {"aten::relu6", op::translate_relu6},
         {"aten::relu6_", op::inplace_op<op::translate_relu6>},
@@ -569,7 +572,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::rnn_tanh", op::translate_rnn},
         {"aten::roll", op::translate_roll},
         {"aten::round", op::translate_round},
-        {"aten::rsqrt", op::translate_rsqrt},
+        {"aten::rsqrt", op::optional_out<op::translate_rsqrt, 1>},
         {"aten::rsub", op::translate_rsub},
         {"aten::ScalarImplicit", op::skip_node},
         {"aten::scaled_dot_product_attention", op::translate_scaled_dot_product_attention},
@@ -582,14 +585,15 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::select", op::quantizable_op<op::translate_select>},
         {"aten::selu", op::translate_selu},
         {"aten::selu_", op::inplace_op<op::translate_selu>},
-        {"aten::sigmoid", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sigmoid>},
+        {"aten::sigmoid",
+         op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sigmoid>, 1>},
         {"aten::sigmoid_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Sigmoid>>},
         {"aten::sign", op::translate_sign},
         {"aten::silu", op::translate_1to1_match_1_inputs<opset10::Swish>},
         {"aten::silu_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Swish>>},
-        {"aten::sin", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sin>},
+        {"aten::sin", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sin>, 1>},
         {"aten::sin_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Sin>>},
-        {"aten::sinh", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sinh>},
+        {"aten::sinh", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sinh>, 1>},
         {"aten::sinh_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Sinh>>},
         {"aten::size", op::translate_size},
         {"aten::slice", op::quantizable_op<op::translate_slice>},
@@ -598,7 +602,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::sort", op::translate_sort},
         // aten::split - Supported in limited set of patterns
         // aten::split_with_sizes - Supported in limited set of patterns
-        {"aten::sqrt", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sqrt>},
+        {"aten::sqrt", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Sqrt>, 1>},
         {"aten::square", op::translate_square},
         {"aten::squeeze", op::quantizable_op<op::translate_squeeze>},
         // aten::stack - Supported in limited set of patterns
@@ -611,9 +615,9 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::t", op::translate_t},
         {"aten::t_", op::inplace_op<op::translate_t>},
         {"aten::take_along_dim", op::translate_take_along_dim},
-        {"aten::tan", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Tan>},
+        {"aten::tan", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Tan>, 1>},
         {"aten::tan_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Tan>>},
-        {"aten::tanh", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Tanh>},
+        {"aten::tanh", op::optional_out<op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Tanh>, 1>},
         {"aten::tanh_", op::inplace_op<op::translate_1to1_match_1_inputs<opset10::Tanh>>},
         {"aten::tensor", op::translate_as_tensor},
         // aten::tensor_split - Supported in limited set of patterns
@@ -755,7 +759,9 @@ const std::map<std::string, CreatorFunction> get_supported_ops_fx() {
         {"aten.eq.Scalar", op::translate_1to1_match_2_inputs_align_types<opset10::Equal>},
         {"aten.eq.Tensor", op::translate_1to1_match_2_inputs_align_types<opset10::Equal>},
         {"aten.erf.default", op::translate_erf},
+        {"aten.erfc.default", op::translate_erfc},
         {"aten.exp.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Exp>},
+        {"aten.expm1.default", op::translate_expm1},
         {"aten.expand.default", op::translate_expand_fx},
         {"aten.fake_quantize_per_channel_affine_cachemask.default", op::translate_fake_quantize_per_channel_affine_fx},
         {"aten.fill.Scalar", op::translate_fill},
@@ -788,7 +794,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_fx() {
         {"aten.leaky_relu_.default", op::inplace_op<op::translate_leaky_relu_fx>},
         {"aten.lift_fresh_copy.default", op::skip_node},
         {"aten.linalg_vector_norm.default", op::translate_linalg_vector_norm},
-        {"aten.log.default", op::translate_log},
+        {"aten.log.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment<opset10::Log>},
         {"aten.log_sigmoid_forward.default", op::translate_log_sigmoid},
         {"aten.log10.default", op::translate_log10},
         {"aten.log1p.default", op::translate_log1p},
diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp
index f7387bd6adaa61..6493d9a3f62c2d 100644
--- a/src/frontends/pytorch/src/utils.hpp
+++ b/src/frontends/pytorch/src/utils.hpp
@@ -117,6 +117,17 @@ OutputVector inplace_op(const NodeContext& context) {
     return translation_res;
 }
 
+template <OutputVector (*T)(const NodeContext&), size_t idx>
+OutputVector optional_out(const NodeContext& context) {
+    auto translation_res = T(context);
+    if (!context.input_is_none(idx)) {
+        FRONT_END_OP_CONVERSION_CHECK(translation_res.size() == 1,
+                                      "inplace_op function must be used on single output translators");
+        context.mutate_input(idx, translation_res[0]);
+    }
+    return translation_res;
+}
+
 template <typename T>
 OutputVector translate_1to1_match_1_inputs(const NodeContext& context) {
     FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0), "Input should not be None.");
diff --git a/tests/layer_tests/pytorch_tests/test_erf.py b/tests/layer_tests/pytorch_tests/test_erf.py
deleted file mode 100644
index 8bd9424bb3e209..00000000000000
--- a/tests/layer_tests/pytorch_tests/test_erf.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-
-from pytorch_layer_test_class import PytorchLayerTest, skip_if_export
-
-
-class TestErf(PytorchLayerTest):
-    def _prepare_input(self, input_dtype, out=False):
-        import numpy as np
-        x = np.linspace(-3, 3).astype(input_dtype)
-        if not out:
-            return (x, )
-        return (x, np.zeros_like(x).astype(input_dtype))
-
-    def create_model(self, mode="", input_dtype="float32"):
-        import torch
-        dtypes = {
-            "float32": torch.float32,
-            "float64": torch.float64,
-            "int32": torch.int32
-        }
-
-        dtype = dtypes[input_dtype]
-        class aten_erf(torch.nn.Module):
-            def __init__(self, mode, dtype):
-                super(aten_erf, self).__init__()
-                self.dtype = dtype
-                if mode == "out":
-                    self.forward = self.forward_out
-                elif mode == "inplace":
-                    self.forward = self.forward_inplace
-
-            def forward(self, x):
-                return torch.special.erf(x.to(self.dtype))
-
-            def forward_out(self, x, y):
-                return torch.special.erf(x.to(self.dtype), out=y), y
-
-            def forward_inplace(self, x):
-                x = x.to(self.dtype)
-                return x.erf_(), x
-
-        ref_net = None
-
-        return aten_erf(mode, dtype), ref_net, "aten::erf" if mode != "inplace" else "aten::erf_"
-
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.precommit_torch_export
-    @pytest.mark.precommit_fx_backend
-    @pytest.mark.parametrize("mode,input_dtype", [
-        ("", "float32"), ("", "float64"), ("", "int32"),
-        ("out", "float32"), ("out", "float64"),
-        ("inplace", "float32"), ("inplace", "float64")])
-    def test_erf(self, mode, input_dtype, ie_device, precision, ir_version):
-        if PytorchLayerTest.use_torch_export() and mode in ["out", "inplace"]:
-            pytest.skip(reason="export fails for inplace or out")
-        self._test(*self.create_model(mode, input_dtype), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={"input_dtype": input_dtype, "out": mode == "out"} )
\ No newline at end of file
diff --git a/tests/layer_tests/pytorch_tests/test_erfc.py b/tests/layer_tests/pytorch_tests/test_erfc.py
deleted file mode 100644
index 85da9e41b759b0..00000000000000
--- a/tests/layer_tests/pytorch_tests/test_erfc.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-
-from pytorch_layer_test_class import PytorchLayerTest
-
-
-class TestErfc(PytorchLayerTest):
-    def _prepare_input(self, input_dtype, out=False):
-        import numpy as np
-        x = np.linspace(-3, 3).astype(input_dtype)
-        if not out:
-            return (x, )
-        return (x, np.zeros_like(x).astype(input_dtype))
-
-    def create_model(self, mode="", input_dtype="float32"):
-        import torch
-        dtypes = {
-            "float32": torch.float32,
-            "float64": torch.float64,
-            "int32": torch.int32
-        }
-
-        dtype = dtypes[input_dtype]
-        class aten_erfc(torch.nn.Module):
-            def __init__(self, mode, dtype):
-                super(aten_erfc, self).__init__()
-                self.dtype = dtype
-                if mode == "out":
-                    self.forward = self.forward_out
-                elif mode == "inplace":
-                    self.forward = self.forward_inplace
-
-            def forward(self, x):
-                return torch.special.erfc(x.to(self.dtype))
-
-            def forward_out(self, x, y):
-                return torch.special.erfc(x.to(self.dtype), out=y), y
-
-            def forward_inplace(self, x):
-                x = x.to(self.dtype)
-                return x.erfc_(), x
-
-        ref_net = None
-
-        return aten_erfc(mode, dtype), ref_net, "aten::erfc" if mode != "inplace" else "aten::erfc_"
-
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.parametrize("mode,input_dtype", [
-        ("", "float32"), ("", "float64"), ("", "int32"),
-        ("out", "float32"), ("out", "float64"),
-        ("inplace", "float32"), ("inplace", "float64")])
-    def test_erfc(self, mode, input_dtype, ie_device, precision, ir_version):
-        self._test(*self.create_model(mode, input_dtype), ie_device, precision, ir_version,
-                   kwargs_to_prepare_input={"input_dtype": input_dtype, "out": mode == "out"} )
\ No newline at end of file
diff --git a/tests/layer_tests/pytorch_tests/test_expm1.py b/tests/layer_tests/pytorch_tests/test_expm1.py
deleted file mode 100644
index 8c00a644199b70..00000000000000
--- a/tests/layer_tests/pytorch_tests/test_expm1.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0i
-
-import pytest
-
-from pytorch_layer_test_class import PytorchLayerTest
-
-class TestExpm1(PytorchLayerTest):
-    def _prepare_input(self, inputs, dtype=None, out=False):
-        import numpy as np
-        x = np.array(inputs).astype(dtype)
-        if not out:
-            return (x, )
-        return (x, np.zeros_like(x).astype(dtype))
-
-    def create_model(self, mode="", dtype=None):
-        import torch
-        dtype_map = {
-            "float32": torch.float32,
-            "float64": torch.float64,
-            "int32": torch.int32,
-            "int64": torch.int64,
-        }
-
-        dtype = dtype_map.get(dtype)
-
-        class aten_expm1(torch.nn.Module):
-            def __init__(self, mode, dtype):
-                super().__init__()
-                self.dtype = dtype
-                if mode == "out":
-                    self.forward = self.forward_out
-                else:
-                    self.forward = self.forward_default
-
-            def forward_default(self, x):
-                return torch.expm1(x.to(self.dtype)).to(torch.float32)
-
-            def forward_out(self, x, y):
-                y = y.to(torch.float32)
-                return torch.expm1(x.to(self.dtype), out=y).to(torch.float32), y
-
-        model_class = aten_expm1(mode, dtype)
-
-        ref_net = None
-
-        return model_class, ref_net, "aten::expm1"
-
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.parametrize("mode,dtype", [
-        ("", "float32"), ("", "float64"), ("", "int32"), ("", "int64"),
-        ("out", "float32"), ("out", "float64"), ("out", "int32"), ("out", "int64")])
-    @pytest.mark.parametrize("inputs", [[0, 1, 2, 3, 4, 5], [-2, -1, 0, 1, 2, 3], [1, 2, 3, 4, 5, 6]])
-    def test_expm1(self, mode, dtype, inputs, ie_device, precision, ir_version):
-        self._test(
-            *self.create_model(mode, dtype),
-            ie_device,
-            precision,
-            ir_version,
-            trace_model=True,
-            freeze_model=False,
-            kwargs_to_prepare_input={"inputs": inputs, "dtype": dtype, "out": mode == "out"}        
-        )
diff --git a/tests/layer_tests/pytorch_tests/test_log.py b/tests/layer_tests/pytorch_tests/test_log.py
deleted file mode 100644
index 8d595e82e82166..00000000000000
--- a/tests/layer_tests/pytorch_tests/test_log.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-from pytorch_layer_test_class import PytorchLayerTest
-
-
-class TestLog(PytorchLayerTest):
-    def _prepare_input(self, dtype):
-        import numpy as np
-        return (np.random.uniform(2, 16, (1, 10)).astype(dtype),)
-
-    def create_model(self, op):
-        import torch
-
-        ops = {
-            "log": torch.log,
-            "log_": torch.log_,
-            "log2": torch.log2,
-            "log2_": torch.log2_,
-            "log10": torch.log10,
-            "log10_": torch.log10_,
-            "log1p": torch.log1p,
-            "log1p_": torch.log1p_
-        }
-
-        op_fn = ops[op]
-
-        class aten_log(torch.nn.Module):
-            def __init__(self, op):
-                super(aten_log, self).__init__()
-                self.op = op
-
-            def forward(self, x):
-                return self.op(x)
-
-        ref_net = None
-
-        return aten_log(op_fn), ref_net, f"aten::{op}"
-
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.parametrize(("op", "input_dtype"),
-                             [["log", "float32"], 
-                             ["log", "int32"], 
-                             ["log_", "float32"], 
-                             ["log2", "float32"], 
-                             ["log2", "int32"], 
-                             ["log2_", "float32"],
-                             ["log10", "float32"],
-                             ["log10", "int32"],
-                             ["log10_", "float32"],
-                             ["log1p", "float32"], 
-                             ["log1p", "int32"], 
-                             ["log1p_", "float32"]])
-    def test_log(self, op, input_dtype, ie_device, precision, ir_version):
-        if PytorchLayerTest.use_torch_export() and op[-1] == "_":
-            pytest.skip(reason="export fails for inplace")
-        self._test(*self.create_model(op), ie_device, precision,
-                   ir_version, kwargs_to_prepare_input={"dtype": input_dtype})
\ No newline at end of file
diff --git a/tests/layer_tests/pytorch_tests/test_relu6.py b/tests/layer_tests/pytorch_tests/test_relu6.py
deleted file mode 100644
index e705e38c71d939..00000000000000
--- a/tests/layer_tests/pytorch_tests/test_relu6.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-
-from pytorch_layer_test_class import PytorchLayerTest
-
-
-class TestRelu6(PytorchLayerTest):
-    def _prepare_input(self):
-        import numpy as np
-        return (np.random.randn(1, 3, 224, 224).astype(np.float32),)
-
-    def create_model(self, inplace=False):
-        import torch
-        import torch.nn.functional as F
-
-        class aten_relu6(torch.nn.Module):
-            def __init__(self, inplace):
-                super(aten_relu6, self).__init__()
-                self.inplace = inplace
-
-            def forward(self, x):
-                return x, F.relu6(x, inplace=self.inplace)
-
-        ref_net = None
-
-        return aten_relu6(inplace), ref_net, "aten::relu6" if not inplace else "aten::relu6_"
-
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.parametrize("inplace", [True, False])
-    def test_relu6(self, inplace, ie_device, precision, ir_version):
-        self._test(*self.create_model(inplace), ie_device, precision, ir_version)
diff --git a/tests/layer_tests/pytorch_tests/test_selu.py b/tests/layer_tests/pytorch_tests/test_selu.py
deleted file mode 100644
index 4be0b7029e3c35..00000000000000
--- a/tests/layer_tests/pytorch_tests/test_selu.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-
-from pytorch_layer_test_class import PytorchLayerTest
-
-
-class TestSilu(PytorchLayerTest):
-    def _prepare_input(self):
-        import numpy as np
-        return (np.random.randn(1, 3, 224, 224).astype(np.float32),)
-
-    def create_model(self, inplace=False):
-        import torch
-        import torch.nn.functional as F
-
-        class aten_selu(torch.nn.Module):
-            def __init__(self, inplace):
-                super(aten_selu, self).__init__()
-                self.inplace = inplace
-
-            def forward(self, x):
-                return x, F.selu(x, inplace=self.inplace)
-
-        ref_net = None
-
-        return aten_selu(inplace), ref_net, "aten::selu" if not inplace else "aten::selu_"
-
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.parametrize("inplace", [True, False])
-    def test_silu(self, inplace, ie_device, precision, ir_version):
-        self._test(*self.create_model(inplace), ie_device, precision, ir_version)
diff --git a/tests/layer_tests/pytorch_tests/test_silu.py b/tests/layer_tests/pytorch_tests/test_silu.py
deleted file mode 100644
index 3c206b073de1ed..00000000000000
--- a/tests/layer_tests/pytorch_tests/test_silu.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import pytest
-
-from pytorch_layer_test_class import PytorchLayerTest
-
-
-class TestSilu(PytorchLayerTest):
-    def _prepare_input(self):
-        import numpy as np
-        return (np.random.randn(1, 3, 224, 224).astype(np.float32),)
-
-    def create_model(self):
-        import torch
-        import torch.nn.functional as F
-
-        class aten_silu(torch.nn.Module):
-            def __init__(self):
-                super(aten_silu, self).__init__()
-
-            def forward(self, x):
-                return F.silu(x)
-
-        ref_net = None
-
-        return aten_silu(), ref_net, "aten::silu"
-
-    @pytest.mark.nightly
-    @pytest.mark.precommit
-    @pytest.mark.precommit_torch_export
-    def test_silu(self, ie_device, precision, ir_version):
-        self._test(*self.create_model(), ie_device, precision, ir_version)
diff --git a/tests/layer_tests/pytorch_tests/test_unary_ops.py b/tests/layer_tests/pytorch_tests/test_unary_ops.py
index f495e7ba3d272f..9981ba3b178bda 100644
--- a/tests/layer_tests/pytorch_tests/test_unary_ops.py
+++ b/tests/layer_tests/pytorch_tests/test_unary_ops.py
@@ -12,8 +12,14 @@
     "aten::abs_": torch.abs_,
     "aten::rsqrt": torch.rsqrt,
     "aten::sqrt": torch.sqrt,
+    "aten::erf": torch.erf,
+    "aten::erf_": torch.erf_,
+    "aten::erfc": torch.erfc,
+    "aten::erfc_": torch.erfc_,
     "aten::exp": torch.exp,
     "aten::exp_": torch.exp_,
+    "aten::expm1": torch.expm1,
+    "aten::expm1_": torch.expm1_,
     "aten::relu": torch.relu,
     "aten::relu_": torch.relu_,
     "aten::ceil": torch.ceil,
@@ -24,6 +30,18 @@
     "aten::sigmoid_": torch.sigmoid_,
     "aten::reciprocal": torch.reciprocal,
     "aten::reciprocal_": torch.reciprocal_,
+    "aten::relu6": F.relu6,
+    "aten::selu": F.selu,
+    "aten::silu": F.silu,
+    "aten::log": torch.log,
+    "aten::log_": torch.log_,
+    "aten::log2": torch.log2,
+    "aten::log2_": torch.log2_,
+    "aten::log10": torch.log10,
+    "aten::log10_": torch.log10_,
+    "aten::log1p": torch.log1p,
+    "aten::log1p_": torch.log1p_,
+    "aten::log_sigmoid": F.logsigmoid,
     "aten::cos": torch.cos,
     "aten::cos_": torch.cos_,
     "aten::sin": torch.sin,
@@ -50,6 +68,7 @@
     "aten::atanh_": torch.atanh_
 }
 
+
 class unary_op_net(torch.nn.Module):
     def __init__(self, op, dtype):
         super(unary_op_net, self).__init__()
@@ -62,6 +81,32 @@ def forward(self, x):
         return y, x1
 
 
+class unary_op_out_net(torch.nn.Module):
+    def __init__(self, op, dtype):
+        super(unary_op_out_net, self).__init__()
+        self.dtype = dtype
+        self.op = op
+
+    def forward(self, x):
+        x1 = x.to(self.dtype)
+        y = self.op(x1)
+        z = torch.empty_like(y)
+        y1 = self.op(x1, out=z)
+        return y1, z
+
+
+class unary_func_op_inplace_net(torch.nn.Module):
+    def __init__(self, op, dtype):
+        super(unary_func_op_inplace_net, self).__init__()
+        self.dtype = dtype
+        self.op = op
+
+    def forward(self, x):
+        x1 = x.to(self.dtype)
+        y = self.op(x1, inplace=True)
+        return y, x1
+
+
 class TestUnaryOp(PytorchLayerTest):
     def _prepare_input(self):
         # random number in range [1, 11)
@@ -73,33 +118,40 @@ def _prepare_input(self):
     @pytest.mark.precommit_torch_export
     @pytest.mark.parametrize("dtype", [torch.float32, torch.float64, torch.int8, torch.uint8, torch.int32, torch.int64])
     @pytest.mark.parametrize("op_type",
-    [
-        "aten::abs",
-        "aten::rsqrt",
-        "aten::sqrt",
-        "aten::exp",
-        "aten::relu",
-        skip_if_export("aten::relu_"),
-        "aten::ceil",
-        skip_if_export("aten::ceil_"),
-        "aten::floor",
-        skip_if_export("aten::floor_"),
-        "aten::sigmoid",
-        "aten::reciprocal",
-        # trigonometry
-        "aten::cos",
-        "aten::sin",
-        "aten::tan",
-        "aten::cosh",
-        "aten::sinh",
-        "aten::tanh",
-        "aten::acos",
-        "aten::asin",
-        "aten::atan",
-        "aten::acosh",
-        "aten::asinh",
-        "aten::atanh"
-    ])
+                             [
+                                 "aten::abs",
+                                 "aten::rsqrt",
+                                 "aten::sqrt",
+                                 "aten::erf",
+                                 "aten::erfc",
+                                 "aten::exp",
+                                 "aten::expm1",
+                                 "aten::relu",
+                                 skip_if_export("aten::relu_"),
+                                 "aten::ceil",
+                                 skip_if_export("aten::ceil_"),
+                                 "aten::floor",
+                                 skip_if_export("aten::floor_"),
+                                 "aten::sigmoid",
+                                 "aten::reciprocal",
+                                 "aten::log",
+                                 "aten::log2",
+                                 "aten::log10",
+                                 "aten::log1p",
+                                 # trigonometry
+                                 "aten::cos",
+                                 "aten::sin",
+                                 "aten::tan",
+                                 "aten::cosh",
+                                 "aten::sinh",
+                                 "aten::tanh",
+                                 "aten::acos",
+                                 "aten::asin",
+                                 "aten::atan",
+                                 "aten::acosh",
+                                 "aten::asinh",
+                                 "aten::atanh"
+                             ])
     def test_unary_op(self, op_type, dtype, ie_device, precision, ir_version):
         self.dtype = dtype
         self._test(unary_op_net(OPS[op_type], dtype), None, op_type,
@@ -109,54 +161,92 @@ def test_unary_op(self, op_type, dtype, ie_device, precision, ir_version):
     @pytest.mark.precommit
     @pytest.mark.parametrize("dtype", [torch.float32, torch.float64])
     @pytest.mark.parametrize("op_type",
-    [
-        # some pytorch inplace ops do not support int
-        "aten::abs_",
-        "aten::exp_",
-        "aten::sigmoid_",
-        "aten::reciprocal_",
-        # trigonometry
-        "aten::cos_",
-        "aten::sin_",
-        "aten::tan_",
-        "aten::cosh_",
-        "aten::sinh_",
-        "aten::tanh_",
-        "aten::acos_",
-        "aten::asin_",
-        "aten::atan_",
-        "aten::acosh_",
-        "aten::asinh_",
-        "aten::atanh_"
-    ])
+                             [
+                                 # some pytorch inplace ops do not support int
+                                 "aten::abs_",
+                                 "aten::erf_",
+                                 "aten::erfc_",
+                                 "aten::exp_",
+                                 "aten::expm1_",
+                                 "aten::sigmoid_",
+                                 "aten::reciprocal_",
+                                 "aten::relu6",
+                                 "aten::selu",
+                                 "aten::silu",
+                                 "aten::log_sigmoid",
+                                 "aten::log_",
+                                 "aten::log2_",
+                                 "aten::log10_",
+                                 "aten::log1p_",
+                                 # trigonometry
+                                 "aten::cos_",
+                                 "aten::sin_",
+                                 "aten::tan_",
+                                 "aten::cosh_",
+                                 "aten::sinh_",
+                                 "aten::tanh_",
+                                 "aten::acos_",
+                                 "aten::asin_",
+                                 "aten::atan_",
+                                 "aten::acosh_",
+                                 "aten::asinh_",
+                                 "aten::atanh_"
+                             ])
     def test_unary_op_float(self, op_type, dtype, ie_device, precision, ir_version):
         self.dtype = dtype
         self._test(unary_op_net(OPS[op_type], dtype), None, op_type,
                    ie_device, precision, ir_version)
 
-
-class glu_net(torch.nn.Module):
-    def __init__(self, dim, dtype):
-        super(glu_net, self).__init__()
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.float64, torch.int8, torch.uint8, torch.int32, torch.int64])
+    @pytest.mark.parametrize("op_type",
+                             [
+                                 "aten::abs",
+                                 "aten::rsqrt",
+                                 "aten::sqrt",
+                                 "aten::erf",
+                                 "aten::erfc",
+                                 "aten::exp",
+                                 "aten::expm1",
+                                 "aten::relu",
+                                 "aten::ceil",
+                                 "aten::floor",
+                                 "aten::sigmoid",
+                                 "aten::reciprocal",
+                                 "aten::log",
+                                 "aten::log2",
+                                 "aten::log10",
+                                 "aten::log1p",
+                                 # trigonometry
+                                 "aten::cos",
+                                 "aten::sin",
+                                 "aten::tan",
+                                 "aten::cosh",
+                                 "aten::sinh",
+                                 "aten::tanh",
+                                 "aten::acos",
+                                 "aten::asin",
+                                 "aten::atan",
+                                 "aten::acosh",
+                                 "aten::asinh",
+                                 "aten::atanh"
+                             ])
+    def test_unary_op_out(self, op_type, dtype, ie_device, precision, ir_version):
         self.dtype = dtype
-        self.dim = dim
-
-    def forward(self, x):
-        y = F.glu(x.to(self.dtype), dim=self.dim)
-        return y
-
-
-class TestGluOp(PytorchLayerTest):
-    def _prepare_input(self):
-        # random number in range [1, 11)
-        x = torch.rand(2, 4, 10, 10) * 10 + 1
-        return (x.to(self.dtype).numpy(),)
+        self._test(unary_op_out_net(OPS[op_type], dtype), None, op_type,
+                   ie_device, precision, ir_version)
 
     @pytest.mark.nightly
     @pytest.mark.precommit
-    @pytest.mark.parametrize("dim", [0, 1, 2, 3, -1, -2])
     @pytest.mark.parametrize("dtype", [torch.float32, torch.float64])
-    def test_glu(self, dim, dtype, ie_device, precision, ir_version):
+    @pytest.mark.parametrize("op_type",
+                             [
+                                 "aten::relu6",
+                                 "aten::selu",
+                                 "aten::silu",
+                             ])
+    def test_unary_func_op_inplace(self, op_type, dtype, ie_device, precision, ir_version):
         self.dtype = dtype
-        self._test(glu_net(dim, dtype), None, "aten::glu",
+        self._test(unary_func_op_inplace_net(OPS[op_type], dtype), None, op_type + "_",
                    ie_device, precision, ir_version)

From a97abd59acedff92c3ef7d032300d0ccb8664de7 Mon Sep 17 00:00:00 2001
From: Siddhant Chauhan <chauhanjoy10@gmail.com>
Date: Tue, 5 Mar 2024 09:32:28 +0530
Subject: [PATCH 22/28] [ONNX] Extend ONNX Frontend with Function `Celu-12`
 (#22002)

### Details:
 - created implementation for `Celu-12` operator
 - created tests

### Tickets:
 - Closes #20543

---------

Co-authored-by: Andrei Kochin <andrei.kochin@intel.com>
Co-authored-by: Georgy Krivoruchko <georgy.krivoruchko@intel.com>
Co-authored-by: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
---
 .../onnx/frontend/src/core/transform.hpp      |  7 ++-
 src/frontends/onnx/frontend/src/op/celu.cpp   | 35 +++++++++++++++
 src/frontends/onnx/frontend/src/op/celu.hpp   | 20 +++++++++
 .../onnx/frontend/src/ops_bridge.cpp          |  2 +
 .../onnx/tests/models/celu_float.prototxt     | 44 +++++++++++++++++++
 .../tests/models/celu_float_alpha.prototxt    | 44 +++++++++++++++++++
 src/frontends/onnx/tests/onnx_import.in.cpp   | 20 +++++++++
 7 files changed, 170 insertions(+), 2 deletions(-)
 create mode 100644 src/frontends/onnx/frontend/src/op/celu.cpp
 create mode 100644 src/frontends/onnx/frontend/src/op/celu.hpp
 create mode 100644 src/frontends/onnx/tests/models/celu_float.prototxt
 create mode 100644 src/frontends/onnx/tests/models/celu_float_alpha.prototxt

diff --git a/src/frontends/onnx/frontend/src/core/transform.hpp b/src/frontends/onnx/frontend/src/core/transform.hpp
index fd33ac5a7bcfc0..2a174cc40ae8ec 100644
--- a/src/frontends/onnx/frontend/src/core/transform.hpp
+++ b/src/frontends/onnx/frontend/src/core/transform.hpp
@@ -13,8 +13,11 @@ namespace transform {
 
 using ::ONNX_NAMESPACE::ModelProto;
 
-static const std::vector<std::string> onnx_functions_to_expand =
-    {"AffineGrid", "Bernoulli", "Celu", "CenterCropPad", "NegativeLogLikelihoodLoss", "SoftmaxCrossEntropyLoss"};
+static const std::vector<std::string> onnx_functions_to_expand = {"AffineGrid",
+                                                                  "Bernoulli",
+                                                                  "CenterCropPad",
+                                                                  "NegativeLogLikelihoodLoss",
+                                                                  "SoftmaxCrossEntropyLoss"};
 
 /// \brief Replace nodes with expanded body of ONNX functions
 ///
diff --git a/src/frontends/onnx/frontend/src/op/celu.cpp b/src/frontends/onnx/frontend/src/op/celu.cpp
new file mode 100644
index 00000000000000..c0c078e0a3d7a1
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/celu.cpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "op/celu.hpp"
+
+#include <memory>
+
+#include "exceptions.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/elu.hpp"
+#include "openvino/op/multiply.hpp"
+#include "utils/common.hpp"
+
+using namespace ov::op;
+
+namespace ov {
+namespace frontend {
+namespace onnx {
+namespace op {
+namespace set_1 {
+ov::OutputVector celu(const ov::frontend::onnx::Node& node) {
+    auto alpha_node = node.get_attribute_as_constant<float>("alpha", 1.0f);
+    auto x_celu = node.get_ov_inputs().at(0);
+
+    auto divide_node = std::make_shared<v1::Divide>(x_celu, alpha_node);
+    auto elu_node = std::make_shared<v0::Elu>(divide_node, 1.0);
+
+    return {std::make_shared<v1::Multiply>(alpha_node, elu_node)};
+}
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/onnx/frontend/src/op/celu.hpp b/src/frontends/onnx/frontend/src/op/celu.hpp
new file mode 100644
index 00000000000000..0b47beb6d237a5
--- /dev/null
+++ b/src/frontends/onnx/frontend/src/op/celu.hpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "core/node.hpp"
+
+namespace ov {
+namespace frontend {
+namespace onnx {
+namespace op {
+namespace set_1 {
+ov::OutputVector celu(const ov::frontend::onnx::Node& node);
+
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/onnx/frontend/src/ops_bridge.cpp b/src/frontends/onnx/frontend/src/ops_bridge.cpp
index 951302dfdcd1de..2e4d60d87e60ab 100644
--- a/src/frontends/onnx/frontend/src/ops_bridge.cpp
+++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp
@@ -36,6 +36,7 @@
 #include "op/cast.hpp"
 #include "op/cast_like.hpp"
 #include "op/ceil.hpp"
+#include "op/celu.hpp"
 #include "op/clip.hpp"
 #include "op/com.microsoft/attention.hpp"
 #include "op/com.microsoft/bias_gelu.hpp"
@@ -368,6 +369,7 @@ OperatorsBridge::OperatorsBridge() {
     REGISTER_OPERATOR("Cast", 1, cast);
     REGISTER_OPERATOR("CastLike", 1, cast_like);
     REGISTER_OPERATOR("Ceil", 1, ceil);
+    REGISTER_OPERATOR("Celu", 1, celu);
     REGISTER_OPERATOR("Clip", 1, clip);
     REGISTER_OPERATOR("Clip", 11, clip);
     REGISTER_OPERATOR("Concat", 1, concat);
diff --git a/src/frontends/onnx/tests/models/celu_float.prototxt b/src/frontends/onnx/tests/models/celu_float.prototxt
new file mode 100644
index 00000000000000..9bb50e7e364b98
--- /dev/null
+++ b/src/frontends/onnx/tests/models/celu_float.prototxt
@@ -0,0 +1,44 @@
+ir_version: 7
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "X"
+    output: "Y"
+    op_type: "Celu"
+    attribute {
+      name: "alpha"
+      f: 1.0
+      type: FLOAT
+    }
+  }
+  name: "test_celu_float"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 12
+}
diff --git a/src/frontends/onnx/tests/models/celu_float_alpha.prototxt b/src/frontends/onnx/tests/models/celu_float_alpha.prototxt
new file mode 100644
index 00000000000000..aac1333dd2b5a0
--- /dev/null
+++ b/src/frontends/onnx/tests/models/celu_float_alpha.prototxt
@@ -0,0 +1,44 @@
+ir_version: 7
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "X"
+    output: "Y"
+    op_type: "Celu"
+    attribute {
+      name: "alpha"
+      f: 3.0
+      type: FLOAT
+    }
+  }
+  name: "test_celu_float_alpha"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 12
+}
diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp
index f9f69583ccb696..b02d3a1116131c 100644
--- a/src/frontends/onnx/tests/onnx_import.in.cpp
+++ b/src/frontends/onnx/tests/onnx_import.in.cpp
@@ -6228,6 +6228,26 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_bitwise_not) {
     test_case.run();
 }
 
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_celu_float) {
+    auto model = convert_model("celu_float.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>(Shape{2}, {-45.f, 22.98f});
+    test_case.add_expected_output<float>(Shape{2}, {-1.f, 22.98f});
+
+    test_case.run();
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_celu_float_alpha) {
+    auto model = convert_model("celu_float_alpha.onnx");
+
+    auto test_case = ov::test::TestCase(model, s_device);
+    test_case.add_input<float>(Shape{4}, {-5.f, -4.25f, -10.f, 7.3f});
+    test_case.add_expected_output<float>(Shape{4}, {-2.43337319f, -2.27243678f, -2.89297802f, 7.3f});
+
+    test_case.run();
+}
+
 OPENVINO_TEST(${BACKEND_NAME}, onnx_model_gelu_float) {
     auto model = convert_model("gelu_float.onnx");
 

From cfb27d7b6e71a90e8911e9dae1b4cce7b44c613f Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi3.zhang@intel.com>
Date: Tue, 5 Mar 2024 15:38:08 +0800
Subject: [PATCH 23/28] [CPU]Remove unsupport clang option. (#23153)

### Details:
 - *Remove unsupported clang compiler option*
- *Fix Clang warning: unknown warning option
'-Wno-delete-non-abstract-non-virtual-dtor';*

### Tickets:
 - *CVS-120605*
---
 src/plugins/intel_cpu/CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt
index d62c67ab28ad21..41794c72b5c9cc 100644
--- a/src/plugins/intel_cpu/CMakeLists.txt
+++ b/src/plugins/intel_cpu/CMakeLists.txt
@@ -22,7 +22,12 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     # oneDNN arm64: unary minus operator applied to unsigned type, result still unsigned
     ov_add_compiler_flags(/wd4146)
 elseif(OV_COMPILER_IS_CLANG)
-    ov_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
+    # -Wno-delete-non-abstract-non-virtual-dtor is support > clang8.0
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8)
+        ov_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
+    else()
+        ov_add_compiler_flags(-Wno-delete-non-virtual-dtor)
+    endif()
 endif()
 
 set(OV_CPU_ARM_TARGET_GENERIC_ARCHS armv8a

From 1c5a736ad180027e82fe37f43456784129281c14 Mon Sep 17 00:00:00 2001
From: Nikolay Shchegolev <nikolay.shchegolev@intel.com>
Date: Tue, 5 Mar 2024 12:03:44 +0400
Subject: [PATCH 24/28] [CPU] RoPE added to extension list. (#23268)

### Details:
- *RoPE node from the CPU opset was added to the extensions list. That
fixes model deserialization from cache.*

### Tickets:
 - *Part of the task 127331*
---
 src/plugins/intel_cpu/src/extension.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
index 7fc3f85f77a892..ed53f61f5a5187 100644
--- a/src/plugins/intel_cpu/src/extension.cpp
+++ b/src/plugins/intel_cpu/src/extension.cpp
@@ -16,6 +16,7 @@
 #include "transformations/cpu_opset/common/op/leaky_relu.hpp"
 #include "transformations/cpu_opset/common/op/ngram.hpp"
 #include "transformations/cpu_opset/common/op/power_static.hpp"
+#include "transformations/cpu_opset/common/op/rope.hpp"
 #include "transformations/cpu_opset/common/op/sdpa.hpp"
 #include "transformations/cpu_opset/common/op/swish_cpu.hpp"
 #include "transformations/cpu_opset/x64/op/interaction.hpp"
@@ -67,6 +68,7 @@ class TypeRelaxedExtension : public ov::OpExtension<ov::op::TypeRelaxed<Op>> {
     OP_EXTENSION(ov::intel_cpu::FullyConnectedNode)                         \
     OP_EXTENSION(ov::intel_cpu::LeakyReluNode)                              \
     OP_EXTENSION(ov::intel_cpu::PowerStaticNode)                            \
+    OP_EXTENSION(ov::intel_cpu::RoPENode)                                   \
     OP_EXTENSION(ov::intel_cpu::SwishNode)                                  \
     OP_EXTENSION(ov::intel_cpu::NgramNode)                                  \
     OP_EXTENSION(ov::op::internal::NonMaxSuppressionIEInternal)             \

From 981df38e715a0e76b24be830282072ba50066060 Mon Sep 17 00:00:00 2001
From: Anastasiia Pnevskaia <anastasiia.pnevskaia@intel.com>
Date: Tue, 5 Mar 2024 11:10:10 +0100
Subject: [PATCH 25/28] Removed usage of matrix param in TF models tests job.
 (#23255)

### Details:
- Removed usage of matrix param in TF models tests job, as the timeout
is shared between jobs launched by matrix, which causes nightly jobs
abortion by timeout.

### Tickets:
 -
---
 .github/workflows/linux.yml | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 0e2ee84264d6a6..c26182fd71880b 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -511,18 +511,23 @@ jobs:
       runner: 'ubuntu-20.04-8-cores'
       model_scope: 'precommit'
 
-  TensorFlow_Models_Tests_Nightly:
-    name: TensorFlow Models tests
+  TensorFlow_Models_Tests_Nightly_TF_HUB:
+    name: TensorFlow TF Hub Models tests
+    if: ${{ github.event_name == 'schedule' }}
+    needs: [ Build, Smart_CI, Openvino_tokenizers ]
+    uses: ./.github/workflows/job_tensorflow_models_tests.yml
+    with:
+      runner: 'ubuntu-20.04-16-cores'
+      model_scope: 'nightly_tf_hub'
+
+  TensorFlow_Models_Tests_Nightly_HF:
+    name: TensorFlow Hugging Face Models tests
     if: ${{ github.event_name == 'schedule' }}
     needs: [ Build, Smart_CI, Openvino_tokenizers ]
-    strategy:
-      max-parallel: 2
-      matrix:
-        MODEL_SCOPE: ['nightly_hf', 'nightly_tf_hub']
     uses: ./.github/workflows/job_tensorflow_models_tests.yml
     with:
       runner: 'ubuntu-20.04-16-cores'
-      model_scope: ${{ matrix.MODEL_SCOPE }}
+      model_scope: 'nightly_hf'
 
   # TODO: Switch back to self-hosted runners
   # container:

From 4f5c2a7a939098dae732d1f0c34efb08eab9b240 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 5 Mar 2024 11:29:11 +0100
Subject: [PATCH 26/28] Change model since previous persimmon model was removed
 (#23201)

### Details:
 - *item1*
 - *...*

### Tickets:
 - *ticket-id*
---
 tests/model_hub_tests/pytorch/hf_transformers_models  | 2 +-
 tests/model_hub_tests/pytorch/test_hf_transformers.py | 1 -
 tests/model_hub_tests/pytorch/torch_utils.py          | 9 +--------
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/tests/model_hub_tests/pytorch/hf_transformers_models b/tests/model_hub_tests/pytorch/hf_transformers_models
index f88ea4430520b4..63f39659234508 100644
--- a/tests/model_hub_tests/pytorch/hf_transformers_models
+++ b/tests/model_hub_tests/pytorch/hf_transformers_models
@@ -3,6 +3,7 @@ abcp4/mymodel-test,mymodel,skip,Load problem
 abeja/gpt-neox-japanese-2.7b,gpt_neox_japanese
 acl-submission-anonym/EAM-spectral,examuse,skip,Load problem
 adalbertojunior/modular-test,modular,skip,Load problem
+adept/persimmon-8b-base,persimmon
 aerner/lm-v2,open-llama,xfail,Example input problem
 afonsosamarques/ardt-vanilla-combo_train_hopper_v2-2508_1336-33,decision_transformer,xfail,Tracing problem
 aihijo/gec-zh-gector-bert-large,gector,skip,Load problem
@@ -18,7 +19,6 @@ anugunj/omnivore-swinL-in21k,omnivore,skip,Load problem
 apple/mobilevitv2-1.0-imagenet1k-256,mobilevitv2,xfail,Unsupported op aten::col2im
 ArthurZ/jukebox_prior_0,jukebox_prior,skip,Load problem
 ArthurZ/jukebox-vqvae,jukebox_vqvae,skip,Load problem
-ArthurZ/persimmon-8b-base,persimmon
 ashishpatel26/span-marker-bert-base-fewnerd-coarse-super,span-marker,skip,Load problem
 asi/albert-act-tiny,albert_act,skip,Load problem
 BAAI/AltCLIP,altclip
diff --git a/tests/model_hub_tests/pytorch/test_hf_transformers.py b/tests/model_hub_tests/pytorch/test_hf_transformers.py
index 4adbcf536c398d..ff5f0c0957f9d5 100644
--- a/tests/model_hub_tests/pytorch/test_hf_transformers.py
+++ b/tests/model_hub_tests/pytorch/test_hf_transformers.py
@@ -563,5 +563,4 @@ def test_convert_model_precommit(self, name, type, ie_device):
                              process_pytest_marks(os.path.join(os.path.dirname(__file__), "hf_transformers_models")))
     @pytest.mark.nightly
     def test_convert_model_all_models(self, name, ie_device):
-        self.mode = "export"
         self.run(model_name=name, model_link=None, ie_device=ie_device)
diff --git a/tests/model_hub_tests/pytorch/torch_utils.py b/tests/model_hub_tests/pytorch/torch_utils.py
index afb348a9341202..09a891046991cd 100644
--- a/tests/model_hub_tests/pytorch/torch_utils.py
+++ b/tests/model_hub_tests/pytorch/torch_utils.py
@@ -50,14 +50,6 @@ def setup_class(self):
         torch.set_grad_enabled(False)
 
     def load_model(self, model_name, model_link):
-        if self.cached_model is not None and self.cached_model[0] == model_name and self.cached_model[1] == model_link:
-            return self.cached_model[2]
-        else:
-            res = self.load_model_impl(model_name, model_link)
-            self.cached_model = (model_name, model_link, res)
-            return res
-
-    def load_model_impl(self, model_name, model_link):
         raise "load_model is not implemented"
 
     def get_inputs_info(self, model_obj):
@@ -81,6 +73,7 @@ def convert_model_impl(self, model_obj):
 
             input_shapes = []
             input_types = []
+            model_obj.eval()
             if isinstance(self.example, dict):
                 graph = export(model_obj, tuple(), self.example)
                 for input_data in self.example.values():

From d093c7465f8ebd8ab4f4d586bb9f4ffd3aaeef08 Mon Sep 17 00:00:00 2001
From: Tomasz Jankowski <tomasz1.jankowski@intel.com>
Date: Tue, 5 Mar 2024 13:24:47 +0100
Subject: [PATCH 27/28] [core] Optimize ScatterElementsUpdate reference
 implementation binary size (#23146)

### Details:
- Adds tests into `ov_template_func_tests` for ScatterElementsUpdate
version 12.
- Removes Indices type from template parameters for internal template
function - all works on `int64_t`.
 - Uses `std::memcpy` instead of type dependent assignment.

### Tickets:
 - CVS-119213
---
 .../reference/scatter_elements_update.hpp     | 109 ++++----
 .../op_reference/scatter_elements_update.cpp  | 246 ++++++++++++++++--
 2 files changed, 284 insertions(+), 71 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/scatter_elements_update.hpp b/src/core/reference/include/openvino/reference/scatter_elements_update.hpp
index 6e10914a5f5311..1cf8cd62a97196 100644
--- a/src/core/reference/include/openvino/reference/scatter_elements_update.hpp
+++ b/src/core/reference/include/openvino/reference/scatter_elements_update.hpp
@@ -11,6 +11,7 @@
 #include "openvino/core/except.hpp"
 #include "openvino/core/shape.hpp"
 #include "openvino/op/scatter_elements_update.hpp"
+#include "openvino/reference/utils/coordinate_index.hpp"
 #include "openvino/reference/utils/coordinate_transform.hpp"
 
 namespace ov {
@@ -26,43 +27,16 @@ size_t normalize_index(const T idx, const size_t dim_value) {
     }
 }
 
-template <typename DataType, typename IndicesType>
-void scatter_elem_update_with_reduction(const DataType* input_data,
-                                        const IndicesType* indices,
-                                        const DataType* updates,
-                                        const int64_t axis,
-                                        DataType* out_buf,
-                                        const Shape& data_shape,
-                                        const Shape& indices_shape,
-                                        const ov::op::v12::ScatterElementsUpdate::Reduction reduction_type,
-                                        const bool use_init_val);
-
-template <typename DataType, typename IndicesType>
-void scatter_elem_update(const DataType* input_data,
-                         const IndicesType* indices,
-                         const DataType* updates,
-                         const int64_t axis,
-                         DataType* out_buf,
-                         const Shape& data_shape,
-                         const Shape& indices_shape,
-                         const Reduction reduction_type = Reduction::NONE,
-                         const bool use_init_val = true) {
-    // Copy inputs to out
-    std::memcpy(out_buf, input_data, sizeof(DataType) * shape_size(data_shape));
-
-    if (reduction_type != Reduction::NONE) {
-        scatter_elem_update_with_reduction(input_data,
-                                           indices,
-                                           updates,
-                                           axis,
-                                           out_buf,
-                                           data_shape,
-                                           indices_shape,
-                                           reduction_type,
-                                           use_init_val);
-        return;
-    }
-
+namespace {
+void scatter_elem_update_no_reduction(const size_t data_elem_size,
+                                      const int64_t* indices,
+                                      const char* updates,
+                                      const int64_t axis,
+                                      char* out_buf,
+                                      const Shape& data_shape,
+                                      const Shape& indices_shape,
+                                      const Reduction reduction_type,
+                                      const bool use_init_val) {
     // 3D example
     // output[indices[i][j][k]][j][k] = updates[i][j][k] if axis = 0,
     // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1,
@@ -78,10 +52,11 @@ void scatter_elem_update(const DataType* input_data,
             std::inner_product(indices_cord.begin(), indices_cord.end(), indices_strides.begin(), uint64_t(0));
         Coordinate out_cord(indices_cord);
         out_cord.at(axis) = normalize_index(indices[indices_idx], data_shape[axis]);
-        const auto out_idx = std::inner_product(out_cord.begin(), out_cord.end(), data_strides.begin(), uint64_t(0));
-        out_buf[out_idx] = updates[indices_idx];
+        const size_t out_idx = ov::coordinate_offset(out_cord, data_strides);
+        std::memcpy(out_buf + out_idx * data_elem_size, updates + indices_idx * data_elem_size, data_elem_size);
     }
 }
+}  // namespace
 
 template <typename T>
 T reduction_neutral_value(const Reduction reduction_type) {
@@ -97,7 +72,6 @@ T reduction_neutral_value(const Reduction reduction_type) {
         return T{0};
     default:
         OPENVINO_THROW("Neutral value not available for this type of reduction");
-        return 0;
     }
 }
 
@@ -119,7 +93,6 @@ std::function<T(const T, const T)> reduction_functor_for(const Reduction reducti
         return std::plus<T>{};
     default:
         OPENVINO_THROW("No functor available for this type of reduction");
-        return 0;
     }
 }
 
@@ -144,7 +117,6 @@ std::function<char(const char, const char)> reduction_functor_for<char>(const Re
         };
     default:
         OPENVINO_THROW("No functor available for this type of reduction");
-        return 0;
     }
 }
 
@@ -180,9 +152,8 @@ struct RoundingDirectionGuard {
     decltype(std::fegetround()) m_original_mode;
 };
 
-template <typename DataType, typename IndicesType>
-void scatter_elem_update_with_reduction(const DataType* input_data,
-                                        const IndicesType* indices,
+template <typename DataType>
+void scatter_elem_update_with_reduction(const int64_t* indices,
                                         const DataType* updates,
                                         const int64_t axis,
                                         DataType* out_buf,
@@ -247,5 +218,53 @@ void scatter_elem_update_with_reduction(const DataType* input_data,
         }
     }
 }
+
+template <typename InType, typename OutType>
+const OutType* convert_indices(const InType* indices, const size_t indices_count, std::vector<OutType>& buffer) {
+    if (std::is_same<typename std::decay<InType>::type, OutType>::value)
+        return reinterpret_cast<const OutType*>(indices);
+
+    buffer.resize(indices_count);
+    for (auto i = indices_count; i-- > 0;)
+        buffer[i] = indices[i];
+    return buffer.data();
+}
+
+template <typename DataType, typename IndicesType>
+void scatter_elem_update(const DataType* input_data,
+                         const IndicesType* indices,
+                         const DataType* updates,
+                         const int64_t axis,
+                         DataType* out_buf,
+                         const Shape& data_shape,
+                         const Shape& indices_shape,
+                         const Reduction reduction_type = Reduction::NONE,
+                         const bool use_init_val = true) {
+    std::memcpy(out_buf, input_data, sizeof(DataType) * shape_size(data_shape));
+
+    std::vector<int64_t> buffer;
+    const auto indices_i64 = convert_indices(indices, shape_size(indices_shape), buffer);
+
+    if (reduction_type != Reduction::NONE) {
+        scatter_elem_update_with_reduction(indices_i64,
+                                           updates,
+                                           axis,
+                                           out_buf,
+                                           data_shape,
+                                           indices_shape,
+                                           reduction_type,
+                                           use_init_val);
+    } else {
+        scatter_elem_update_no_reduction(sizeof(DataType),
+                                         indices_i64,
+                                         reinterpret_cast<const char*>(updates),
+                                         axis,
+                                         reinterpret_cast<char*>(out_buf),
+                                         data_shape,
+                                         indices_shape,
+                                         reduction_type,
+                                         use_init_val);
+    }
+}
 }  // namespace reference
 }  // namespace ov
diff --git a/src/plugins/template/tests/functional/op_reference/scatter_elements_update.cpp b/src/plugins/template/tests/functional/op_reference/scatter_elements_update.cpp
index 48db3024d0b45f..ca908f55f5f7a1 100644
--- a/src/plugins/template/tests/functional/op_reference/scatter_elements_update.cpp
+++ b/src/plugins/template/tests/functional/op_reference/scatter_elements_update.cpp
@@ -12,36 +12,44 @@ using namespace reference_tests;
 using namespace ov;
 
 namespace {
+using Reduction = ov::op::v12::ScatterElementsUpdate::Reduction;
+
 struct ScatterElementsUpdateParams {
-    ScatterElementsUpdateParams(const reference_tests::Tensor& paramData,
-                                const reference_tests::Tensor& paramIndices,
-                                const reference_tests::Tensor& paramUpdates,
-                                const reference_tests::Tensor& paramAxis,
-                                const reference_tests::Tensor& paramExpected)
-        : input(paramData),
-          indices(paramIndices),
-          updates(paramUpdates),
-          axis(paramAxis),
-          expected(paramExpected) {}
-
-    reference_tests::Tensor input;
-    reference_tests::Tensor indices;
-    reference_tests::Tensor updates;
-    reference_tests::Tensor axis;
-    reference_tests::Tensor expected;
+    ScatterElementsUpdateParams(reference_tests::Tensor paramData,
+                                reference_tests::Tensor paramIndices,
+                                reference_tests::Tensor paramUpdates,
+                                reference_tests::Tensor paramAxis,
+                                reference_tests::Tensor paramExpected,
+                                const Reduction paramReduction = Reduction::NONE,
+                                const bool paramUseInitValue = true)
+        : input{std::move(paramData)},
+          indices{std::move(paramIndices)},
+          updates{std::move(paramUpdates)},
+          axis{std::move(paramAxis)},
+          expected{std::move(paramExpected)},
+          reduction{paramReduction},
+          use_init_value{paramUseInitValue} {}
+
+    const reference_tests::Tensor input;
+    const reference_tests::Tensor indices;
+    const reference_tests::Tensor updates;
+    const reference_tests::Tensor axis;
+    const reference_tests::Tensor expected;
+    const Reduction reduction;
+    const bool use_init_value;
 };
 
-class ReferenceScatterElementsUpdateLayerTest : public testing::TestWithParam<ScatterElementsUpdateParams>,
-                                                public CommonReferenceTest {
+class ReferenceScatterElementsUpdateV3LayerTest : public testing::TestWithParam<ScatterElementsUpdateParams>,
+                                                  public CommonReferenceTest {
 public:
     void SetUp() override {
-        auto params = GetParam();
+        const auto& params = GetParam();
         function = CreateFunction(params);
         inputData = {params.input.data, params.indices.data, params.updates.data, params.axis.data};
         refOutData = {params.expected.data};
     }
     static std::string getTestCaseName(const testing::TestParamInfo<ScatterElementsUpdateParams>& obj) {
-        auto param = obj.param;
+        const auto& param = obj.param;
         std::ostringstream result;
         result << "data_sh=" << param.input.shape;
         result << "_data_pr=" << param.input.type;
@@ -65,7 +73,54 @@ class ReferenceScatterElementsUpdateLayerTest : public testing::TestWithParam<Sc
     }
 };
 
-TEST_P(ReferenceScatterElementsUpdateLayerTest, CompareWithHardcodedRefs) {
+class ReferenceScatterElementsUpdateV12LayerTest : public testing::TestWithParam<ScatterElementsUpdateParams>,
+                                                   public CommonReferenceTest {
+public:
+    void SetUp() override {
+        const auto& params = GetParam();
+        function = CreateFunction(params);
+        inputData = {params.input.data, params.indices.data, params.updates.data, params.axis.data};
+        refOutData = {params.expected.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<ScatterElementsUpdateParams>& obj) {
+        static std::map<Reduction, std::string> reduction_as_string = {
+            {Reduction::NONE, "none"},
+            {Reduction::SUM, "sum"},
+            {Reduction::PROD, "prod"},
+            {Reduction::MIN, "min"},
+            {Reduction::MAX, "max"},
+            {Reduction::MEAN, "mean"},
+        };
+        const auto& param = obj.param;
+        std::ostringstream result;
+        result << ReferenceScatterElementsUpdateV3LayerTest::getTestCaseName(obj);
+        result << "_reduction=" << reduction_as_string[param.reduction];
+        result << "_use_init_value=" << std::boolalpha << param.use_init_value;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Model> CreateFunction(const ScatterElementsUpdateParams& params) {
+        const auto data = std::make_shared<op::v0::Parameter>(params.input.type, params.input.shape);
+        const auto indices = std::make_shared<op::v0::Parameter>(params.indices.type, params.indices.shape);
+        const auto updates = std::make_shared<op::v0::Parameter>(params.updates.type, params.updates.shape);
+        const auto axis = std::make_shared<op::v0::Parameter>(params.axis.type, params.axis.shape);
+        auto scatter_eu = std::make_shared<op::v12::ScatterElementsUpdate>(data,
+                                                                           indices,
+                                                                           updates,
+                                                                           axis,
+                                                                           params.reduction,
+                                                                           params.use_init_value);
+        return std::make_shared<ov::Model>(NodeVector{scatter_eu}, ParameterVector{data, indices, updates, axis});
+    }
+};
+
+TEST_P(ReferenceScatterElementsUpdateV3LayerTest, CompareWithHardcodedRefs) {
+    Exec();
+}
+
+TEST_P(ReferenceScatterElementsUpdateV12LayerTest, CompareWithHardcodedRefs) {
     Exec();
 }
 
@@ -159,13 +214,152 @@ std::vector<ScatterElementsUpdateParams> generateScatterCombinedParams() {
         generateScatterParams<element::Type_t::f32, element::Type_t::u64>(),
     };
     std::vector<ScatterElementsUpdateParams> combinedParams;
-    for (const auto& params : scatterTypeParams) {
-        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    for (const auto& param : scatterTypeParams) {
+        std::move(param.begin(), param.end(), std::back_inserter(combinedParams));
     }
     return combinedParams;
 }
-INSTANTIATE_TEST_SUITE_P(smoke_ScatterEltsUpdate_With_Hardcoded_Refs,
-                         ReferenceScatterElementsUpdateLayerTest,
+
+template <typename Indices_t, typename std::enable_if<std::is_signed<Indices_t>::value>::type* = nullptr>
+Indices_t norm(int i, int d) {
+    return static_cast<Indices_t>(i);
+}
+template <typename Indices_t, typename std::enable_if<std::is_unsigned<Indices_t>::value>::type* = nullptr>
+Indices_t norm(int i, int d) {
+    return static_cast<Indices_t>(i < 0 ? i + d : i);
+}
+
+template <element::Type_t DATA_ET, element::Type_t INDICES_ET>
+std::vector<ScatterElementsUpdateParams> generate_scatter_eu_v12_params() {
+    using Data_t = typename element_type_traits<DATA_ET>::value_type;
+    using Indices_t = typename element_type_traits<INDICES_ET>::value_type;
+    return {
+        {{Shape{3, 2}, element::Type(DATA_ET), std::vector<Data_t>{11, 12, 13, 14, 15, 16}},           // data
+         {Shape{1, 2}, element::Type(INDICES_ET), std::vector<Indices_t>{norm<Indices_t>(-1, 3), 1}},  // indices
+         {Shape{1, 2}, element::Type(DATA_ET), std::vector<Data_t>{5, 24}},                            // updates
+         {Shape{1}, element::Type(INDICES_ET), std::vector<Indices_t>{0}},                             // axis
+         {Shape{3, 2}, element::Type(DATA_ET), std::vector<Data_t>{11, 12, 13, 24, 15, 16}},           // expected
+         Reduction::MAX,
+         true},
+        {{Shape{2, 3}, element::Type(DATA_ET), std::vector<Data_t>{11, 12, 13, 14, 15, 16}},
+         {Shape{2, 2}, element::Type(INDICES_ET), std::vector<Indices_t>{norm<Indices_t>(-3, 3), 1, 0, 2}},
+         {Shape{2, 2}, element::Type(DATA_ET), std::vector<Data_t>{1, 22, 24, 6}},
+         {Shape{1}, element::Type(INDICES_ET), std::vector<Indices_t>{1}},
+         {Shape{2, 3}, element::Type(DATA_ET), std::vector<Data_t>{1, 22, 13, 24, 15, 6}},
+         Reduction::MIN,
+         false},
+        {{Shape{1, 2, 3}, element::Type(DATA_ET), std::vector<Data_t>{11, 12, 13, 14, 15, 16}},
+         {Shape{1, 1, 4}, element::Type(INDICES_ET), std::vector<Indices_t>{0, 1, 0, 2}},
+         {Shape{1, 1, 4}, element::Type(DATA_ET), std::vector<Data_t>{23, 38, 32, 7}},
+         {Shape{1}, element::Type(INDICES_ET), std::vector<Indices_t>{2}},
+         {Shape{1, 2, 3}, element::Type(DATA_ET), std::vector<Data_t>{22, 25, 10, 14, 15, 16}},
+         Reduction::MEAN,
+         true},
+        {{Shape{1, 2, 3}, element::Type(DATA_ET), std::vector<Data_t>{11, 12, 13, 14, 15, 16}},
+         {Shape{1, 1, 4}, element::Type(INDICES_ET), std::vector<Indices_t>{0, 1, 0, 0}},
+         {Shape{1, 1, 4}, element::Type(DATA_ET), std::vector<Data_t>{20, 33, 26, 29}},
+         {Shape{1}, element::Type(INDICES_ET), std::vector<Indices_t>{2}},
+         {Shape{1, 2, 3}, element::Type(DATA_ET), std::vector<Data_t>{25, 33, 13, 14, 15, 16}},
+         Reduction::MEAN,
+         false},
+        {{Shape{2, 2, 1}, element::Type(DATA_ET), std::vector<Data_t>{1, 2, 3, 4}},
+         {Shape{1, 5, 1}, element::Type(INDICES_ET), std::vector<Indices_t>{0, 0, 1, 1, 1}},
+         {Shape{1, 5, 1}, element::Type(DATA_ET), std::vector<Data_t>{50, 51, 10, 20, 30}},
+         {Shape{1}, element::Type(INDICES_ET), std::vector<Indices_t>{1}},
+         {Shape{2, 2, 1}, element::Type(DATA_ET), std::vector<Data_t>{101, 60, 3, 4}},
+         Reduction::SUM,
+         false},
+        {{Shape{3, 2}, element::Type(DATA_ET), std::vector<Data_t>{1, 2, 3, 4, 5, 6}},
+         {Shape{4, 1}, element::Type(INDICES_ET), std::vector<Indices_t>{0, 0, 1, 2}},
+         {Shape{4, 1}, element::Type(DATA_ET), std::vector<Data_t>{7, 7, 10, 5}},
+         {Shape{1}, element::Type(INDICES_ET), std::vector<Indices_t>{0}},
+         {Shape{3, 2}, element::Type(DATA_ET), std::vector<Data_t>{49, 2, 30, 4, 25, 6}},
+         Reduction::PROD,
+         true},
+    };
+}
+
+std::vector<ScatterElementsUpdateParams> collect_scatter_eu_v12_params() {
+    const std::vector<std::vector<ScatterElementsUpdateParams>> params{
+        // i16
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::i8>(),
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::u8>(),
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::i16>(),
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::u16>(),
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::i32>(),
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::u32>(),
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::i64>(),
+        generate_scatter_eu_v12_params<element::Type_t::i16, element::Type_t::u64>(),
+        // i32
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::i8>(),
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::u8>(),
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::i16>(),
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::u16>(),
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::i32>(),
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::u32>(),
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::i64>(),
+        generate_scatter_eu_v12_params<element::Type_t::i32, element::Type_t::u64>(),
+        // i64
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::i8>(),
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::u8>(),
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::i16>(),
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::u16>(),
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::i32>(),
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::u32>(),
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::i64>(),
+        generate_scatter_eu_v12_params<element::Type_t::i64, element::Type_t::u64>(),
+        // u32
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::i8>(),
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::u8>(),
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::i16>(),
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::u16>(),
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::i32>(),
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::u32>(),
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::i64>(),
+        generate_scatter_eu_v12_params<element::Type_t::u32, element::Type_t::u64>(),
+        // u64
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::i8>(),
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::u8>(),
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::i16>(),
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::u16>(),
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::i32>(),
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::u32>(),
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::i64>(),
+        generate_scatter_eu_v12_params<element::Type_t::u64, element::Type_t::u64>(),
+        // f16
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::i8>(),
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::u8>(),
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::i16>(),
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::u16>(),
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::i32>(),
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::u32>(),
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::i64>(),
+        generate_scatter_eu_v12_params<element::Type_t::f16, element::Type_t::u64>(),
+        // f32
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::i8>(),
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::u8>(),
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::i16>(),
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::u16>(),
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::i32>(),
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::u32>(),
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::i64>(),
+        generate_scatter_eu_v12_params<element::Type_t::f32, element::Type_t::u64>(),
+    };
+
+    auto combined_params = generateScatterCombinedParams();
+    for (const auto& param : params) {
+        std::move(param.begin(), param.end(), std::back_inserter(combined_params));
+    }
+    return combined_params;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_ScatterElementsUpdate,
+                         ReferenceScatterElementsUpdateV3LayerTest,
                          ::testing::ValuesIn(generateScatterCombinedParams()),
-                         ReferenceScatterElementsUpdateLayerTest::getTestCaseName);
+                         ReferenceScatterElementsUpdateV3LayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ScatterElementsUpdate,
+                         ReferenceScatterElementsUpdateV12LayerTest,
+                         ::testing::ValuesIn(collect_scatter_eu_v12_params()),
+                         ReferenceScatterElementsUpdateV12LayerTest::getTestCaseName);
 }  // namespace

From b6b4bda07a71a4634d369987c177eac5fb7f1565 Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Tue, 5 Mar 2024 19:46:34 +0400
Subject: [PATCH 28/28] Delete check that ReadValue/Assign operations have to
 be in pair in ov::Model (#23051)

### Details:
If ReadValue and Assign are not paired in model topology we have "Model
is incorrect. Assign and ReadValue operations must be in pairs on the
network."

Check for ReadValue-Assign pairs is useless and it is an obstacle for
model transformation debugging. Moreover, with more advanced techniques
of state modification there can be other ops (custom ones) that modify
state in place.

### Tickets:
 - *CVS-133159*
---
 src/core/src/model.cpp | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp
index 334e9fe224146a..315c3ab870f6e0 100644
--- a/src/core/src/model.cpp
+++ b/src/core/src/model.cpp
@@ -220,11 +220,6 @@ void ov::Model::prerequirements(bool detect_variables, bool detect_parameters) {
 void ov::Model::validate_nodes_and_infer_types() const {
     OV_ITT_SCOPED_TASK(ov::itt::domains::core, "Model::validate_nodes_and_infer_types");
 
-    struct Counter {
-        int cnt_assign = 0;
-        int cnt_read_val = 0;
-    };
-    std::map<ov::op::util::Variable*, Counter> pair_checker;
     std::stringstream unregistered_parameters;
     std::stringstream unregistered_variables;
     std::unordered_set<const ov::descriptor::Tensor*> tensors;
@@ -246,12 +241,6 @@ void ov::Model::validate_nodes_and_infer_types() const {
         if (variable_op &&
             std::find(m_variables.begin(), m_variables.end(), variable_op->get_variable()) == m_variables.end())
             unregistered_variables << variable_op->get_variable_id() << std::endl;
-
-        if (const auto& assign = std::dynamic_pointer_cast<ov::op::util::AssignBase>(node)) {
-            pair_checker[assign->get_variable().get()].cnt_assign++;
-        } else if (const auto& read_value = std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node)) {
-            pair_checker[read_value->get_variable().get()].cnt_read_val++;
-        }
     }
 
     OPENVINO_ASSERT(unregistered_parameters.str().empty(),
@@ -261,13 +250,7 @@ void ov::Model::validate_nodes_and_infer_types() const {
     OPENVINO_ASSERT(unregistered_variables.str().empty(),
                     "Model references undeclared Variables: ",
                     unregistered_variables.str());
-    bool only_pairs =
-        std::all_of(pair_checker.begin(), pair_checker.end(), [](const std::pair<op::util::Variable*, Counter>& val) {
-            return val.second.cnt_assign == 1 && val.second.cnt_read_val == 1;
-        });
-    OPENVINO_ASSERT(only_pairs,
-                    "Model is incorrect. Assign and ReadValue operations must be in pairs on the "
-                    "network.");
+
     for (const auto& output : outputs()) {
         OPENVINO_ASSERT(ov::layout::utils::is_compatible(ov::layout::get_layout(output), output.get_partial_shape()),
                         "Result '",