diff --git a/.github/workflows/linux_riscv.yml b/.github/workflows/linux_riscv.yml
index 83b03a2fe56290..1e886d80cd2598 100644
--- a/.github/workflows/linux_riscv.yml
+++ b/.github/workflows/linux_riscv.yml
@@ -156,6 +156,7 @@ jobs:
             git submodule update --init -- ${OPENVINO_REPO}/thirdparty/telemetry
             git submodule update --init -- ${OPENVINO_REPO}/src/plugins/intel_cpu
             git submodule update --init -- ${OPENVINO_REPO}/thirdparty/flatbuffers/flatbuffers
+            git submodule update --init -- ${OPENVINO_REPO}/thirdparty/level_zero
           popd
 
       #
diff --git a/.gitmodules b/.gitmodules
index d9733bc0d844c1..fdd61a6915652a 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -93,3 +93,6 @@
 [submodule "src/plugins/intel_cpu/thirdparty/xbyak_riscv"]
 	path = src/plugins/intel_cpu/thirdparty/xbyak_riscv
 	url = https://github.com/herumi/xbyak_riscv.git
+[submodule "src/plugins/intel_gpu/thirdparty/l0_onednn_gpu"]
+	path = src/plugins/intel_gpu/thirdparty/l0_onednn_gpu
+	url = https://github.com/jkasprza/oneDNN.git
diff --git a/cmake/features.cmake b/cmake/features.cmake
index c9a3e898719296..40560ea61826f8 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -41,6 +41,12 @@ else()
     set(ENABLE_ONEDNN_FOR_GPU_DEFAULT ON)
 endif()
 
+# Set default GPU runtime to OCL
+set(OV_GPU_DEFAULT_RT "OCL")
+if (ENABLE_INTEL_GPU)
+    ov_option_enum (GPU_RT_TYPE "Type of GPU runtime. Supported value: OCL and L0" ${OV_GPU_DEFAULT_RT} ALLOWED_VALUES L0 OCL)
+endif()
+
 ov_dependent_option (ENABLE_ONEDNN_FOR_GPU "Enable oneDNN with GPU support" ${ENABLE_ONEDNN_FOR_GPU_DEFAULT} "ENABLE_INTEL_GPU" OFF)
 
 ov_dependent_option (ENABLE_INTEL_NPU "NPU plugin for OpenVINO runtime" ON "X86_64;WIN32 OR LINUX" OFF)
diff --git a/src/inference/include/openvino/runtime/intel_gpu/remote_properties.hpp b/src/inference/include/openvino/runtime/intel_gpu/remote_properties.hpp
index 53c8de921a747d..cad287b56059bb 100644
--- a/src/inference/include/openvino/runtime/intel_gpu/remote_properties.hpp
+++ b/src/inference/include/openvino/runtime/intel_gpu/remote_properties.hpp
@@ -24,6 +24,7 @@ using gpu_handle_param = void*;
 enum class ContextType {
     OCL = 0,        //!< Pure OpenCL context
     VA_SHARED = 1,  //!< Context shared with a video decoding device
+    ZE = 2,         //!< Pure Level0 context
 };
 
 /** @cond INTERNAL */
@@ -33,6 +34,8 @@ inline std::ostream& operator<<(std::ostream& os, const ContextType& context_typ
         return os << "OCL";
     case ContextType::VA_SHARED:
         return os << "VA_SHARED";
+    case ContextType::ZE:
+        return os << "ZE";
     default:
         OPENVINO_THROW("Unsupported context type");
     }
@@ -43,6 +46,8 @@ inline std::istream& operator>>(std::istream& is, ContextType& context_type) {
     is >> str;
     if (str == "OCL") {
         context_type = ContextType::OCL;
+    } else if (str == "ZE") {
+        context_type = ContextType::ZE;
     } else if (str == "VA_SHARED") {
         context_type = ContextType::VA_SHARED;
     } else {
diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt
index 9556ab5873c616..38e3dafbc3305f 100644
--- a/src/plugins/intel_gpu/CMakeLists.txt
+++ b/src/plugins/intel_gpu/CMakeLists.txt
@@ -8,6 +8,8 @@ endif()
 
 set (TARGET_NAME "openvino_intel_gpu_plugin")
 
+include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
+
 if(OV_COMPILER_IS_INTEL_LLVM)
     # For windows we need to disable warning as error option to make FindSYCL.cmake work
     if (WIN32)
@@ -36,6 +38,10 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus")
 endif()
 
+if(WIN32)
+    add_definitions(-DNOMINMAX)
+endif()
+
 if(ENABLE_GPU_DEBUG_CAPS)
     add_definitions(-DGPU_DEBUG_CONFIG=1)
     add_definitions(-DENABLE_DEBUG_CAPS=1)
@@ -77,6 +83,7 @@ target_include_directories(${TARGET_NAME} PRIVATE
         ${CMAKE_CURRENT_SOURCE_DIR}/include/)
 
 ov_set_threading_interface_for(${TARGET_NAME})
+ov_gpu_set_runtime_interface_for(${TARGET_NAME})
 
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 
diff --git a/src/plugins/intel_gpu/cmake/utils.cmake b/src/plugins/intel_gpu/cmake/utils.cmake
new file mode 100644
index 00000000000000..f346f50f8792cf
--- /dev/null
+++ b/src/plugins/intel_gpu/cmake/utils.cmake
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+function(ov_gpu_set_runtime_interface_for TARGET_NAME)
+    if(GPU_RT_TYPE STREQUAL "L0")
+        target_compile_definitions(${TARGET_NAME} PRIVATE OV_GPU_WITH_ZE_RT=1)
+        target_link_libraries(${TARGET_NAME} PRIVATE LevelZero::LevelZero)
+    elseif(GPU_RT_TYPE STREQUAL "OCL")
+        target_compile_definitions(${TARGET_NAME} PRIVATE OV_GPU_WITH_OCL_RT=1)
+        target_link_libraries(${TARGET_NAME} PRIVATE OpenCL::OpenCL)
+    else()
+        message(FATAL_ERROR "Invalid GPU runtime type: `${GPU_RT_TYPE}` Only `L0` and `OCL` are supported")
+    endif()
+endfunction()
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
index 7425f701f88710..e210d332cc6296 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp
@@ -8,6 +8,7 @@
 # define NOMINMAX
 #endif
 
+#include "intel_gpu/runtime/engine_configuration.hpp"
 #include "openvino/runtime/intel_gpu/remote_properties.hpp"
 #include "openvino/runtime/iremote_context.hpp"
 
@@ -93,7 +94,11 @@ class RemoteContextImpl : public ov::IRemoteContext {
     ov::intel_gpu::gpu_handle_param m_va_display = nullptr;
     ov::intel_gpu::gpu_handle_param m_external_queue = nullptr;
 
+#ifdef OV_GPU_WITH_ZE_RT
+    ContextType m_type = ContextType::ZE;
+#else
     ContextType m_type = ContextType::OCL;
+#endif
     std::string m_device_name = "";
     static const size_t cache_capacity = 100;
     cldnn::LruCache<size_t, cldnn::memory::ptr> m_memory_cache = cldnn::LruCache<size_t, cldnn::memory::ptr>(cache_capacity);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
index 66b16345984bc9..9d8bd8aa410e00 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
@@ -8,11 +8,15 @@
 # define NOMINMAX
 #endif
 
+
+// Do not include DirectX / VA wrappers when running with L0 runtime as they depend on OCL
+#ifndef OV_GPU_WITH_ZE_RT
 #ifdef _WIN32
 # include <openvino/runtime/intel_gpu/ocl/dx.hpp>
 #else
 # include <openvino/runtime/intel_gpu/ocl/va.hpp>
 #endif
+#endif
 #include "openvino/runtime/iremote_tensor.hpp"
 
 #include "intel_gpu/runtime/memory_caps.hpp"
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp
index 7c567e877d7552..ef885414c6f1c0 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/device.hpp
@@ -33,4 +33,28 @@ struct device {
     virtual ~device() = default;
 };
 
+// The priority return by this function impacts the order of devices reported by GPU plugin and devices enumeration
+// Lower priority value means lower device ID
+// Current behavior is: Intel iGPU < Intel dGPU < any other GPU
+// Order of Intel dGPUs is undefined and depends on the OCL impl
+// Order of other vendor GPUs is undefined and depends on the OCL impl
+inline size_t get_device_priority(const cldnn::device_info& info) {
+    if (info.vendor_id == cldnn::INTEL_VENDOR_ID && info.dev_type == cldnn::device_type::integrated_gpu) {
+        return 0;
+    } else if (info.vendor_id == cldnn::INTEL_VENDOR_ID) {
+        return 1;
+    } else {
+        return std::numeric_limits<size_t>::max();
+    }
+}
+
+inline std::vector<device::ptr> sort_devices(const std::vector<device::ptr>& devices_list) {
+    std::vector<device::ptr> sorted_list = devices_list;
+    std::stable_sort(sorted_list.begin(), sorted_list.end(), [](device::ptr d1,  device::ptr d2) {
+        return get_device_priority(d1->get_info()) < get_device_priority(d2->get_info());
+    });
+
+    return sorted_list;
+}
+
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp
index 416a72c1a527aa..94e6ff2605a100 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp
@@ -56,13 +56,13 @@ struct gfx_version {
                <= std::tie(r.major, r.minor, r.revision); // same order
     }
 
-    bool operator==(const gfx_version& other) {
+    bool operator==(const gfx_version& other) const {
         return major == other.major &&
                minor == other.minor &&
                revision == other.revision;
     }
 
-    bool operator!=(const gfx_version& other) {
+    bool operator!=(const gfx_version& other) const {
         return !(*this == other);
     }
 };
@@ -73,14 +73,14 @@ struct pci_bus_info {
     uint32_t pci_device = 0;
     uint32_t pci_function = 0;
 
-    bool operator==(const pci_bus_info& other) {
+    bool operator==(const pci_bus_info& other) const {
         return pci_domain == other.pci_domain &&
                pci_bus == other.pci_bus &&
                pci_device == other.pci_device &&
                pci_function == other.pci_function;
     }
 
-    bool operator!=(const pci_bus_info& other) {
+    bool operator!=(const pci_bus_info& other) const {
         return !(*this == other);
     }
 };
@@ -116,9 +116,14 @@ struct device_info {
     bool supports_imad;                         ///< Does engine support int8 mad.
     bool supports_immad;                        ///< Does engine support int8 multi mad.
 
+    bool supports_mutable_command_list;         ///< [L0] Does the target runtime/device support mutable command list feature
+
     bool supports_usm;                          ///< Does engine support unified shared memory.
     bool has_separate_cache;                    ///< Does the target hardware has separate cache for usm_device and usm_host
 
+    bool supports_cp_offload;                   ///< [L0] Does the command queue support copy offload
+    bool supports_counter_based_events;                    ///< [L0] Does the target runtime support counter based events
+
     std::vector<size_t> supported_simd_sizes;   ///< List of SIMD sizes supported by current device and compiler
 
     uint32_t vendor_id;                         ///< Vendor ID
@@ -140,8 +145,45 @@ struct device_info {
 
     pci_bus_info pci_info;                      ///< PCI bus information for the device
 
+    uint64_t timer_resolution;                  ///< [L0] Resolution of device timer used for profiling in cycles/sec
+    uint32_t kernel_timestamp_valid_bits;       ///< [L0] Number of valid bits in the kernel timestamp values
+    uint32_t compute_queue_group_ordinal;       ///< [L0] Ordinal of the command queue group to use for compute
+    uint32_t device_memory_ordinal;             ///< [L0] Ordinal of the selected global device memory
+
     ov::device::UUID uuid;                      ///< UUID of the gpu device
     ov::device::LUID luid;                      ///< LUID of the gpu device
+
+    inline bool is_same_device(const device_info &other) const {
+        // Relying solely on the UUID is not reliable in all the cases (particularly on legacy platforms),
+        // where the UUID may be missing or incorrectly generated
+        // Therefore, we also validate other attributes
+        if (uuid.uuid != other.uuid.uuid)
+            return false;
+
+        if (pci_info != other.pci_info)
+            return false;
+
+        if (sub_device_idx != other.sub_device_idx)
+            return false;
+
+        if (vendor_id != other.vendor_id ||
+            dev_name != other.dev_name ||
+            driver_version != other.driver_version)
+            return false;
+
+        if (dev_type != other.dev_type ||
+            gfx_ver != other.gfx_ver ||
+            arch != other.arch)
+            return false;
+
+        if (ip_version != other.ip_version || device_id != other.device_id)
+            return false;
+
+        if (execution_units_count != other.execution_units_count || max_global_mem_size != other.max_global_mem_size)
+            return false;
+
+        return true;
+    }
 };
 
 /// @}
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/device_query.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/device_query.hpp
index ecb82795c0d920..d5291f50486152 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/device_query.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/device_query.hpp
@@ -17,6 +17,12 @@ namespace cldnn {
 struct device_query {
 public:
     static int device_id;
+    /// @brief Get default engine type
+    static engine_types get_default_engine_type();
+
+    /// @brief Get default runtime type
+    static runtime_types get_default_runtime_type();
+
     explicit device_query(engine_types engine_type,
                           runtime_types runtime_type,
                           void* user_context = nullptr,
@@ -25,6 +31,13 @@ struct device_query {
                           int target_tile_id = -1,
                           bool initialize_devices = false);
 
+    /// @brief Create device query with default values for engine type and runtime type
+    explicit device_query(void* user_context = nullptr,
+                          void* user_device = nullptr,
+                          int ctx_device_id = 0,
+                          int target_tile_id = -1,
+                          bool initialize_devices = false);
+
     std::map<std::string, device::ptr> get_available_devices() const {
         return _available_devices;
     }
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
index 357508292d28ec..fa57dd19af6f59 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
@@ -12,6 +12,7 @@
 #include "layout.hpp"
 #include "execution_config.hpp"
 #include "engine_configuration.hpp"
+#include "kernel_builder.hpp"
 
 #include <memory>
 #include <set>
@@ -83,7 +84,7 @@ class engine {
     /// Checks whether two memory objects represents the same physical memory
     virtual bool is_the_same_buffer(const memory& mem1, const memory& mem2) = 0;
 
-    virtual bool check_allocatable(const layout& layout, allocation_type type) = 0;
+    virtual bool check_allocatable(const layout& layout, allocation_type type);
 
     /// Returns basic allocation type which will be used as a fallback when allocation type is not specified or device doesn't support some features.
     virtual allocation_type get_default_allocation_type() const = 0;
@@ -141,7 +142,9 @@ class engine {
     virtual stream_ptr create_stream(const ExecutionConfig& config, void *handle) const = 0;
 
     /// Returns service stream which can be used during program build and optimizations
-    virtual stream& get_service_stream() const = 0;
+    virtual stream& get_service_stream() const;
+
+    virtual std::shared_ptr<kernel_builder> create_kernel_builder() const = 0;
 
     virtual allocation_type detect_usm_allocation_type(const void* memory) const = 0;
 
@@ -154,13 +157,9 @@ class engine {
     virtual void create_onednn_engine(const ExecutionConfig& config) = 0;
 
     /// Returns onednn engine object which shares device and context with current engine
-    virtual dnnl::engine& get_onednn_engine() const = 0;
+    virtual dnnl::engine& get_onednn_engine() const;
 #endif
 
-    /// This method is intended to create kernel handle for current engine from handle from arbitrary engine
-    /// For instance, source kernel can be compiled using ocl engine, and then we can build L0 kernel object based on that
-    virtual kernel::ptr prepare_kernel(const kernel::ptr kernel) const = 0;
-
     /// Factory method which creates engine object with impl configured by @p engine_type
     /// @param engine_type requested engine type
     /// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported
@@ -178,6 +177,12 @@ class engine {
     engine(const device::ptr device);
     const device::ptr _device;
     bool enable_large_allocations = false;
+    std::unique_ptr<stream> _service_stream;
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+    std::mutex onednn_mutex;
+    std::shared_ptr<dnnl::engine> _onednn_engine;
+#endif
 
     std::array<std::atomic<uint64_t>, static_cast<size_t>(allocation_type::max_value)> _memory_usage_data{};
     std::array<std::atomic<uint64_t>, static_cast<size_t>(allocation_type::max_value)> _peak_memory_usage_data{};
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp
index abe01b0cc5da22..8eea9df0169ab2 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp
@@ -4,20 +4,22 @@
 
 #pragma once
 
-#include <string>
+#include <ostream>
 
 namespace cldnn {
 
 /// @brief Defines available engine types
 enum class engine_types : int32_t {
     ocl,
-    sycl
+    sycl,
+    ze
 };
 
 inline std::ostream& operator<<(std::ostream& os, const engine_types& type) {
     switch (type) {
     case engine_types::ocl: os << "ocl"; break;
     case engine_types::sycl: os << "sycl"; break;
+    case engine_types::ze: os << "ze"; break;
     default: os << "unknown"; break;
     }
 
@@ -27,11 +29,13 @@ inline std::ostream& operator<<(std::ostream& os, const engine_types& type) {
 /// @brief Defines available runtime types
 enum class runtime_types : int32_t {
     ocl,
+    ze,
 };
 
 inline std::ostream& operator<<(std::ostream& os, const runtime_types& type) {
     switch (type) {
     case runtime_types::ocl: os << "ocl"; break;
+    case runtime_types::ze: os << "ze"; break;
     default: os << "unknown"; break;
     }
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/event.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/event.hpp
index dffa64173fe72c..24465c4f2d0569 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/event.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/event.hpp
@@ -32,6 +32,16 @@ struct event {
         _profiling_captured = false;
         _profiling_info.clear();
     }
+    /// @brief Set event profiling data instead of retrieving it from event object
+    /// @param duration_nsec duration in nanoseconds
+    void set_profiling_duration(uint64_t duration_nsec) {
+        auto stage = instrumentation::profiling_stage::executing;
+        auto duration = std::chrono::nanoseconds(duration_nsec);
+        auto period = std::make_shared<instrumentation::profiling_period_basic>(duration);
+
+        _profiling_info.push_back({ stage, period });
+        _profiling_captured = true;
+    }
 
     // returns true if handler has been successfully added
     bool add_event_handler(event_handler handler, void* data);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp
index 8dcd9d07d0f246..be273cd1d50aa7 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel.hpp
@@ -4,11 +4,9 @@
 
 #pragma once
 
-#include "kernel_args.hpp"
-#include "event.hpp"
-
 #include <memory>
 #include <vector>
+#include <string>
 
 namespace cldnn {
 
@@ -18,8 +16,15 @@ class kernel {
 public:
     using ptr = std::shared_ptr<kernel>;
     virtual std::shared_ptr<kernel> clone(bool reuse_kernel_handle = false) const = 0;
+    /// @brief Check if objects share the same handle to the kernel instance
+    /// @param other kernel object for comparison
+    /// @return true if underlying kernel handles are the same, false otherwise
+    virtual bool is_same(const kernel &other) const = 0;
     virtual ~kernel() = default;
-    virtual std::string get_id() const { return ""; }
+
+    virtual std::string get_id() const = 0;
+    virtual std::vector<uint8_t> get_binary() const = 0;
+    virtual std::string get_build_log() const = 0;
 };
 
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_builder.hpp
new file mode 100644
index 00000000000000..90c17a77be03cf
--- /dev/null
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_builder.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel.hpp"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace cldnn {
+
+enum class KernelFormat {
+    SOURCE,
+    NATIVE_BIN,
+};
+
+class kernel_builder {
+public:
+    virtual void build_kernels(const void *src, size_t src_bytes, KernelFormat src_format, const std::string &options, std::vector<kernel::ptr> &out) const = 0;
+};
+
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
index cdb15dbace42d7..9cd00b1c3065e5 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
@@ -224,7 +224,6 @@ struct surfaces_lock {
     surfaces_lock(const surfaces_lock& other) = delete;
     surfaces_lock& operator=(const surfaces_lock& other) = delete;
 
-    static std::unique_ptr<surfaces_lock> create(engine_types engine_type, std::vector<memory::ptr> mem, const stream& stream);
     static bool is_lock_needed(const shared_mem_type& mem_type);
 };
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp
index 78d4b99b32f99a..4d04792bae0bb0 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_caps.hpp
@@ -81,6 +81,19 @@ enum class shared_mem_type {
     shared_mem_usm
 };
 
+inline std::ostream& operator<<(std::ostream& out, const shared_mem_type& mem_type) {
+    switch (mem_type) {
+        case shared_mem_type::shared_mem_empty:     out << "shared_mem_empty"; break;
+        case shared_mem_type::shared_mem_buffer:    out << "shared_mem_buffer"; break;
+        case shared_mem_type::shared_mem_image:     out << "shared_mem_image"; break;
+        case shared_mem_type::shared_mem_vasurface: out << "shared_mem_vasurface"; break;
+        case shared_mem_type::shared_mem_dxbuffer:  out << "shared_mem_dxbuffer"; break;
+        case shared_mem_type::shared_mem_usm:       out << "shared_mem_usm"; break;
+        default: out << "unknown"; break;
+    }
+    return out;
+}
+
 using shared_handle = void*;
 using shared_surface = uint32_t;
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp
index 1e8300f92135e7..98f6d87066ac34 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/stream.hpp
@@ -17,6 +17,7 @@
 #endif
 
 namespace cldnn {
+struct surfaces_lock;
 
 // Possible sync methods for kernels in stream
 enum class SyncMethods {
@@ -66,12 +67,12 @@ class stream {
     virtual void wait_for_events(const std::vector<event::ptr>& events) = 0;
     virtual event::ptr create_user_event(bool set) = 0;
     virtual event::ptr create_base_event() = 0;
+    virtual std::unique_ptr<surfaces_lock> create_surfaces_lock(const std::vector<memory::ptr> &mem) const = 0;
     virtual event::ptr aggregate_events(const std::vector<event::ptr>& events, bool group = false, bool is_output = false);
 
     QueueTypes get_queue_type() const { return m_queue_type; }
     SyncMethods get_sync_method() const { return m_sync_method; }
 
-    static QueueTypes detect_queue_type(engine_types engine_type, void* queue_handle);
     static SyncMethods get_expected_sync_method(const ExecutionConfig& config);
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
diff --git a/src/plugins/intel_gpu/src/graph/CMakeLists.txt b/src/plugins/intel_gpu/src/graph/CMakeLists.txt
index df2a3257d6ea7f..80bf20a8273274 100644
--- a/src/plugins/intel_gpu/src/graph/CMakeLists.txt
+++ b/src/plugins/intel_gpu/src/graph/CMakeLists.txt
@@ -84,6 +84,10 @@ macro(ov_gpu_add_backend_target)
         target_include_directories(${ARG_NAME} SYSTEM BEFORE PRIVATE $<TARGET_PROPERTY:onednn_gpu_tgt,INTERFACE_INCLUDE_DIRECTORIES>)
         add_dependencies(openvino_intel_gpu_${IMPL_TYPE}_obj onednn_gpu_tgt)
     endif()
+    # Onednn headers use OCL/L0 headers
+    ov_gpu_set_runtime_interface_for(openvino_intel_gpu_${IMPL_TYPE}_obj)
+    # Onednn needs OCL headers even when running L0
+    target_link_libraries(openvino_intel_gpu_${IMPL_TYPE}_obj PRIVATE OpenCL::OpenCL)
 endmacro()
 
 set(CODEGEN_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/common_utils/kernels_db_gen.py")
@@ -135,6 +139,9 @@ ov_build_target_faster(${TARGET_NAME} PCH)
 target_compile_options(${TARGET_NAME} PRIVATE ${COMMON_COMPILE_OPTIONS})
 
 ov_set_threading_interface_for(${TARGET_NAME})
+ov_gpu_set_runtime_interface_for(${TARGET_NAME})
+# Onednn needs OCL headers even when running L0
+target_link_libraries(${TARGET_NAME} PRIVATE OpenCL::OpenCL)
 
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
index 4b05604f2da89f..fa00107fd4e773 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
@@ -9,6 +9,7 @@
 #include "fully_connected_inst.h"
 #include "assign_inst.h"
 #include "mvn_inst.h"
+#include "reorder_inst.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
index e2b53fe9eafcbc..9e06c71e124da5 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
@@ -5,6 +5,7 @@
 #include "pass_manager.h"
 #include "program_helpers.h"
 #include "reshape_inst.h"
+#include "reorder_inst.h"
 #include <vector>
 #include <memory>
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp
index 5a28094d5d86c9..b96acc4d66a1a2 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp
@@ -12,7 +12,7 @@
 #include "shape_of_inst.h"
 #include "broadcast_inst.h"
 #include "non_zero_inst.h"
-#include "non_max_suppression_inst.h"
+#include "reorder_inst.h"
 #include "unique_inst.hpp"
 #include "scatter_elements_update_inst.h"
 #include "scatter_update_inst.h"
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
index 1566d0ad20a7f8..e521139b62f34c 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp
@@ -9,6 +9,7 @@
 #include "convolution_inst.h"
 #include "deconvolution_inst.h"
 #include "fully_connected_inst.h"
+#include "reorder_inst.h"
 #include "lstm_seq_inst.h"
 #include "gru_seq_inst.h"
 #include "intel_gpu/runtime/format.hpp"
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
index da21f1d2220c16..6e3537f8eccd4f 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@@ -11,6 +11,7 @@
 #include "crop_inst.h"
 #include "eltwise_inst.h"
 #include "gemm_inst.h"
+#include "reorder_inst.h"
 #include "assign_inst.h"
 #include "read_value_inst.h"
 #include "reshape_inst.h"
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
index 0b7a105c1d7c99..7ae646ec69510f 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -12,6 +12,7 @@
 #include "activation_inst.h"
 #include "batch_to_space_inst.h"
 #include "crop_inst.h"
+#include "reorder_inst.h"
 #include "eltwise_inst.h"
 #include "gemm_inst.h"
 #include "lrn_inst.h"
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
index 21801247ebbbe5..7737c50e585477 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
@@ -10,6 +10,7 @@
 #include "data_inst.h"
 #include "eltwise_inst.h"
 #include "mutable_data_inst.h"
+#include "reorder_inst.h"
 #include <vector>
 #include <memory>
 
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
index f873edf9a306ec..fc2bfe638e5a75 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@@ -22,6 +22,7 @@
 #include "fully_connected_inst.h"
 #include "group_normalization_inst.h"
 #include "mvn_inst.h"
+#include "reorder_inst.h"
 #include "rms_inst.h"
 
 #include <vector>
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
index 92cfe47926fcf0..a1f5c0f1ac1b63 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
@@ -20,10 +20,6 @@
 #include "intel_gpu/runtime/itt.hpp"
 #include "intel_gpu/runtime/file_util.hpp"
 
-#include "ocl/ocl_kernel.hpp"
-#include "ocl/ocl_common.hpp"
-#include "ocl/ocl_device.hpp"
-
 #ifdef WIN32
 #include <sdkddkver.h>
 #ifdef NTDDI_WIN10_RS5
@@ -54,41 +50,18 @@
 namespace {
 std::mutex cacheAccessMutex;
 
-static const cldnn::device::ptr get_target_device(const cldnn::engine& engine) {
-    using namespace cldnn;
-    if (engine.runtime_type() == runtime_types::ocl) {
-        return engine.get_device();
-    } else {
-        ocl::ocl_device_detector detector;
-        auto device_map = detector.get_available_devices(nullptr, nullptr);
-        auto original_device = engine.get_device();
-
-        for (auto& d : device_map) {
-            const auto& target_uuid = d.second->get_info().uuid;
-            const auto& original_uuid = original_device->get_info().uuid;
-            if (target_uuid.uuid == original_uuid.uuid)
-                return d.second;
-        }
+std::string join_strings(const std::vector<std::string> strings) {
+    size_t total_size = 0;
+    for (auto &str : strings) {
+        total_size += str.size();
     }
-
-    OPENVINO_THROW("[GPU] Couldn't find target device for kernels cache");
-}
-
-#ifdef ENABLE_ONEDNN_FOR_GPU
-cl::Program fuse_microkernels(const cl::Context& context, const cl::Device& device, cl::Program& program, const std::string& code) {
-    using namespace dnnl::impl::gpu::intel;
-    std::vector<std::vector<uint8_t>> binaries = program.getInfo<CL_PROGRAM_BINARIES>();
-    OPENVINO_ASSERT(binaries.size() == 1);
-    std::vector<uint8_t> binary = binaries[0];
-    micro::fuseMicrokernels(binary, code.c_str());
-
-    cl::Program::Binaries fused_binary = { binary };
-    cl::Program fused_program(context, {device}, fused_binary);
-    fused_program.build({device});
-
-    return fused_program;
+    std::string acc_str;
+    acc_str.reserve(total_size);
+    for (auto &str : strings) {
+        acc_str.append(str);
+    }
+    return acc_str;
 }
-#endif  // ENABLE_ONEDNN_FOR_GPU
 
 std::string reorder_options(const std::string& org_options) {
     std::stringstream ss(org_options);
@@ -307,42 +280,21 @@ kernels_cache::kernels_cache(engine& engine,
                              uint32_t prog_id,
                              std::shared_ptr<ov::threading::ITaskExecutor> task_executor,
                              const std::map<std::string, std::string>& batch_headers)
-    : _device(get_target_device(engine))
+    : _device(engine.get_device())
+    , _builder(engine.create_kernel_builder())
     , _task_executor(task_executor)
     , _config(config)
     , _prog_id(prog_id)
     , batch_headers(std::move(batch_headers)) { }
 
-static std::vector<unsigned char> getProgramBinaries(cl::Program program) {
-    // Get the size of the program binary in bytes.
-    std::vector<size_t> binary_sizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
-
-    if (binary_sizes.size() != 1)
-        throw std::runtime_error("Invalid binaries count");
-
-    size_t binary_size = binary_sizes.front();
-    // Binary is not available for the device.
-    if (binary_size == 0)
-        throw std::runtime_error("Binary is not avaliable after program build");
-
-    // Get program binary.
-    return program.getInfo<CL_PROGRAM_BINARIES>().front();
-}
-
-// TODO: This build_batch method should be backend specific
 void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& compiled_kernels) {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::build_batch");
-
-    auto& cl_build_device = dynamic_cast<const ocl::ocl_device&>(*_device);
-
     bool dump_sources = batch.dump_custom_program;
     std::string dump_sources_dir = GPU_DEBUG_VALUE_OR(_config.get_dump_sources_path(), "");
     GPU_DEBUG_IF(!dump_sources_dir.empty()) {
         dump_sources = true;
     }
 
-    std::string err_log;  // accumulated build log from all program's parts (only contains messages from parts which
-
     std::string current_dump_file_name = "";
     if (dump_sources) {
         current_dump_file_name = std::move(dump_sources_dir);
@@ -361,128 +313,71 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co
                 dump_file << s;
         }
     }
-
     std::string cached_bin_name = get_cache_path() + std::to_string(batch.hash_value) + ".cl_cache";
-    cl::Program::Binaries precompiled_kernels = {};
-
+    ///////////////////////////////////////////////////////////////////////////////////
+    std::vector<uint8_t> precompiled;
     if (is_cache_enabled()) {
-        // Try to load file with name ${hash_value}.cl_cache which contains precompiled kernels for current bucket
-        // If read is successful, then remove kernels from compilation bucket
-        std::vector<uint8_t> bin;
-        {
-            std::lock_guard<std::mutex> lock(cacheAccessMutex);
-            bin = ov::util::load_binary(cached_bin_name);
-        }
-        if (!bin.empty()) {
-            precompiled_kernels.push_back(bin);
-        }
+        std::lock_guard<std::mutex> lock(cacheAccessMutex);
+        precompiled = ov::util::load_binary(cached_bin_name);
     }
-    try {
-        cl::vector<cl::Kernel> kernels;
-
-        // Run compilation
-        if (precompiled_kernels.empty()) {
-            cl::Program program(cl_build_device.get_context(), batch.source);
-            {
-                OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::BuildProgram::RunCompilation");
-                if (program.build({cl_build_device.get_device()}, batch.options.c_str()) != CL_SUCCESS)
-                    throw std::runtime_error("Failed in building program.");
-            }
-
-            if (dump_sources && dump_file.good()) {
-                dump_file << "\n/* Build Log:\n";
-                for (auto& p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>())
-                    dump_file << p.second << "\n";
-
-                dump_file << "*/\n";
+    std::vector<kernel::ptr> kernels;
+    if (!precompiled.empty()) {
+        _builder->build_kernels(precompiled.data(), precompiled.size(), KernelFormat::NATIVE_BIN, "", kernels);
+    } else {
+        auto combined_source = join_strings(batch.source);
+        _builder->build_kernels(combined_source.data(), combined_source.size(), KernelFormat::SOURCE, batch.options, kernels);
+        if (dump_sources && dump_file.good()) {
+            dump_file << "\n/* Build Log:\n";
+            // Retreive build log from the first kernel only
+            // It should be the same for all kernels in batch
+            if (kernels.size() > 1) {
+                dump_file << kernels[0]->get_build_log();
             }
-
-            if (batch.has_microkernels) {
+            dump_file << "\n*/\n";
+        }
+        if (batch.has_microkernels) {
 #ifdef ENABLE_ONEDNN_FOR_GPU
-                OPENVINO_ASSERT(batch.kernels_counter == 1);
-                // Do we need full source code here (with batch headers)?
-                program = fuse_microkernels(cl_build_device.get_context(), cl_build_device.get_device(), program, batch.source.back());
+            using namespace dnnl::impl::gpu::intel;
+            OPENVINO_ASSERT(batch.kernels_counter == 1 && kernels.size() == 1);
+            std::vector<uint8_t> binary = kernels[0]->get_binary();
+            kernels.clear();
+            // Update binary and rebuild kernel
+            micro::fuseMicrokernels(binary, combined_source.c_str());
+            _builder->build_kernels(binary.data(), binary.size(), KernelFormat::NATIVE_BIN, "", kernels);
 #else  // ENABLE_ONEDNN_FOR_GPU
-                OPENVINO_THROW("[GPU] Can't compile kernel w/ microkernels as onednn is not available");
+            OPENVINO_THROW("[GPU] Can't compile kernel w/ microkernels as onednn is not available");
 #endif  // ENABLE_ONEDNN_FOR_GPU
-            }
-
-
-            program.createKernels(&kernels);
-
-            if (is_cache_enabled()) {
+        }
+        if (is_cache_enabled()) {
                 // If kernels caching is enabled, then we save compiled bucket to binary file with name ${code_hash_value}.cl_cache
                 // Note: Bin file contains full bucket, not separate kernels, so kernels reuse across different models is quite limited
                 // Bucket size can be changed by max_kernels_per_batch config option, but forcing it to 1 will lead to much longer
                 // compile time.
+                std::vector<uint8_t> binary = kernels[0]->get_binary();
                 std::lock_guard<std::mutex> lock(cacheAccessMutex);
-                ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(std::move(program)));
-            }
-        } else {
-            cl::Program program(cl_build_device.get_context(), {cl_build_device.get_device()}, precompiled_kernels);
-            if (program.build({cl_build_device.get_device()}, batch.options.c_str()) != CL_SUCCESS)
-                throw std::runtime_error("Failed in building program with a precompiled kernel.");
-
-            program.createKernels(&kernels);
+                ov::intel_gpu::save_binary(cached_bin_name, binary);
         }
-
-        {
-            std::lock_guard<std::mutex> lock(_mutex);
-            for (auto& k : kernels) {
-                const auto& entry_point = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
-                const auto& iter = batch.entry_point_to_id.find(entry_point);
-                if (iter != batch.entry_point_to_id.end()) {
-                    kernel::ptr kernel = std::make_shared<ocl::ocl_kernel>(ocl::ocl_kernel_type(k, cl_build_device.get_usm_helper()), entry_point);
-
-                    auto& params = iter->second.first;
-                    auto kernel_part_idx = iter->second.second;
-                    if (compiled_kernels.find(params) != compiled_kernels.end()) {
-                        compiled_kernels[params].push_back(std::make_pair(kernel, kernel_part_idx));
-                    } else {
-                        compiled_kernels[params] = { std::make_pair(kernel, kernel_part_idx) };
-                    }
-                    if (_kernel_batch_hash.find(params) == _kernel_batch_hash.end()) {
-                       _kernel_batch_hash[params] = batch.hash_value;
-                    }
+    }
+    {
+        std::lock_guard<std::mutex> lock(_mutex);
+        for (auto& k : kernels) {
+            auto entry_point = k->get_id();
+            const auto& iter = batch.entry_point_to_id.find(entry_point);
+            if (iter != batch.entry_point_to_id.end()) {
+                auto& params = iter->second.first;
+                auto kernel_part_idx = iter->second.second;
+                if (compiled_kernels.find(params) != compiled_kernels.end()) {
+                    compiled_kernels[params].push_back(std::make_pair(k, kernel_part_idx));
                 } else {
-                    throw std::runtime_error("Could not find entry point");
+                    compiled_kernels[params] = { std::make_pair(k, kernel_part_idx) };
                 }
+                if (_kernel_batch_hash.find(params) == _kernel_batch_hash.end()) {
+                   _kernel_batch_hash[params] = batch.hash_value;
+                }
+            } else {
+                throw std::runtime_error("Could not find entry point");
             }
         }
-    } catch (const cl::BuildError& err) {
-        if (dump_sources && dump_file.good())
-            dump_file << "\n/* Build Log:\n";
-
-        for (auto& p : err.getBuildLog()) {
-            if (dump_sources && dump_file.good())
-                dump_file << p.second << "\n";
-            err_log += p.second + '\n';
-        }
-        if (dump_sources && dump_file.good())
-            dump_file << "*/\n";
-    }
-    if (!err_log.empty()) {
-        GPU_DEBUG_INFO << "-------- OpenCL build error" << std::endl;
-        GPU_DEBUG_INFO << err_log << std::endl;
-        GPU_DEBUG_INFO << "-------- End of OpenCL build error" << std::endl;
-        std::stringstream err_ss(err_log);
-        std::string line;
-        std::stringstream err;
-        int cnt = 0;
-
-        while (std::getline(err_ss, line, '\n')) {
-            if (line.find("error") != std::string::npos)
-                cnt = 5;
-            cnt--;
-            if (cnt > 0)
-                err << line << std::endl;
-            else if (cnt == 0)
-                err << "...." << std::endl;
-        }
-
-        throw std::runtime_error("Program build failed(" + std::to_string(batch.bucket_id) + + "_part_"
-                                 + std::to_string(batch.batch_id)
-                                 + "):\n" + err.str());
     }
 }
 
@@ -504,53 +399,15 @@ std::vector<kernel::ptr> kernels_cache::get_kernels(const kernel_impl_params& pa
     OPENVINO_ASSERT(_kernels.end() != res, "Kernel for {" + current_node_id + "} is not found in the kernel cache!");
     OPENVINO_ASSERT(res->second.size() != 0, "Number of kernels should not be zero for " + current_node_id);
 
-    auto& engine = params.get_program().get_engine();
-
     std::vector<kernel::ptr> kernels(res->second.size());
     for (auto& k : res->second) {
         auto& kernel_ptr = k.first;
         auto kernel_part_idx = k.second;
-        kernels[kernel_part_idx] = engine.prepare_kernel(kernel_ptr->clone(_reuse_kernels));
+        kernels[kernel_part_idx] = kernel_ptr->clone(_reuse_kernels);
     }
     return kernels;
 }
 
-bool kernels_cache::validate_simple_kernel_execution(kernel::ptr krl) {
-    auto casted = downcast<ocl::ocl_kernel>(krl.get());
-    auto kernel = casted->get_handle();
-    try {
-        auto casted_dev = dynamic_cast<ocl::ocl_device*>(_device.get());
-        OPENVINO_ASSERT(casted_dev != nullptr, "device is nullptr");
-
-        auto device = casted_dev->get_device();
-        cl::Context ctx(device);
-
-        cl::Buffer buffer(ctx, CL_MEM_READ_WRITE, sizeof(uint8_t) * 8);
-        if (kernel.setArg(0, buffer) != CL_SUCCESS)
-            return false;
-
-        cl::Event ev;
-        cl::CommandQueue queue(ctx, device);
-        if (queue.enqueueNDRangeKernel(kernel, cl::NDRange(), cl::NDRange(8), cl::NDRange(8), nullptr, &ev) != CL_SUCCESS)
-            return false;
-
-        uint8_t result[8];
-        uint8_t expected[8] = { 1, 3, 5, 7, 9, 11, 13, 15 };
-        if (queue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(uint8_t) * 8, &result) != CL_SUCCESS)
-            return false;
-
-        for (int i = 0; i < 8; ++i) {
-            if (result[i] != expected[i])
-                return false;
-        }
-
-        ev.wait();
-        return true;
-    } catch (...) {
-        return false;
-    }
-}
-
 void kernels_cache::build_all() {
     OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "KernelsCache::BuildAll");
     if (!_pending_compilation)
@@ -635,15 +492,12 @@ void kernels_cache::add_kernels_source(const kernel_impl_params& params,
 }
 
 std::string kernels_cache::get_cached_kernel_id(kernel::ptr kernel) const {
-    auto ocl_kernel = std::static_pointer_cast<cldnn::ocl::ocl_kernel>(kernel);
-    const auto& entry_point = ocl_kernel->get_handle().getInfo<CL_KERNEL_FUNCTION_NAME>();
-    auto program = ocl_kernel->get_handle().getInfo<CL_KERNEL_PROGRAM>();
-    cl::vector<unsigned char> program_binaries = getProgramBinaries(std::move(program));
+    auto program_binaries = kernel->get_binary();
 
     auto iter = _cached_binaries.find(program_binaries);
     OPENVINO_ASSERT(iter != _cached_binaries.end(), "[GPU] Not found cached kernel binaries");
 
-    return entry_point + "@" + std::to_string(iter->second);
+    return kernel->get_id() + "@" + std::to_string(iter->second);
 }
 
 std::vector<std::string> kernels_cache::get_cached_kernel_ids(const std::vector<kernel::ptr>& kernels) const {
@@ -661,9 +515,7 @@ void kernels_cache::add_to_cached_kernels(const std::vector<kernel::ptr>& kernel
     static std::atomic<uint32_t> id_gen{0};
 
     for (auto& kernel : kernels) {
-        auto ocl_kernel = std::static_pointer_cast<cldnn::ocl::ocl_kernel>(kernel);
-        auto program = ocl_kernel->get_handle().getInfo<CL_KERNEL_PROGRAM>();
-        cl::vector<unsigned char> program_binaries = getProgramBinaries(std::move(program));
+        auto program_binaries = kernel->get_binary();
 
         std::lock_guard<std::mutex> lock(_mutex);
         auto iter = _cached_binaries.find(program_binaries);
@@ -698,7 +550,7 @@ void kernels_cache::save(BinaryOutputBuffer& ob) const {
         ob << cached_binary.first;
         ob << is_zebin_binary;
         if (!is_zebin_binary) {
-            auto driver_version = downcast<ocl::ocl_device>(*_device).get_info().driver_version;
+            auto driver_version = _device->get_info().driver_version;
             ob << driver_version;
         }
     }
@@ -707,8 +559,6 @@ void kernels_cache::save(BinaryOutputBuffer& ob) const {
 void kernels_cache::load(BinaryInputBuffer& ib) {
     std::unordered_map<uint32_t, std::vector<unsigned char>> precompiled_kernels;
 
-    const auto& build_device = downcast<ocl::ocl_device>(*_device);
-
     size_t num_cached_binaries;
     ib >> num_cached_binaries;
     for (size_t i = 0; i < num_cached_binaries; ++i) {
@@ -722,7 +572,7 @@ void kernels_cache::load(BinaryInputBuffer& ib) {
             // Legacy patchtoken path
             std::string driver_version, current_driver_version;
             ib >> driver_version;
-            current_driver_version = build_device.get_info().driver_version;
+            current_driver_version = _device->get_info().driver_version;
 
             if (driver_version != current_driver_version) {
                 OPENVINO_THROW("Driver version mismatch in cached patchtoken kernels");
@@ -730,31 +580,22 @@ void kernels_cache::load(BinaryInputBuffer& ib) {
         }
     }
 
-    try {
+    {
         std::lock_guard<std::mutex> lock(_mutex);
         _cached_kernels.clear();
 
         for (auto& precompiled_kernel : precompiled_kernels) {
-            cl::vector<cl::Kernel> kernels;
-            cl::Program program(build_device.get_context(), {build_device.get_device()}, {precompiled_kernel.second});
-            program.build({build_device.get_device()});
-            program.createKernels(&kernels);
-
+            std::vector<kernel::ptr> kernels;
+            _builder->build_kernels(precompiled_kernel.second.data(), precompiled_kernel.second.size(), KernelFormat::NATIVE_BIN, "", kernels);
             for (auto& k : kernels) {
-                const auto& entry_point = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
+                const auto& entry_point = k->get_id();
                 std::string cached_kernel_id = entry_point + "@" + std::to_string(precompiled_kernel.first);
                 const auto& iter = _cached_kernels.find(cached_kernel_id);
                 if (iter == _cached_kernels.end()) {
-                    _cached_kernels[cached_kernel_id] = std::make_shared<ocl::ocl_kernel>(ocl::ocl_kernel_type(k, build_device.get_usm_helper()), entry_point);
+                    _cached_kernels[cached_kernel_id] = k;
                 }
             }
         }
-    } catch (const cl::BuildError& err) {
-        std::string err_log = "";
-        for (auto& p : err.getBuildLog()) {
-            err_log += p.second + '\n';
-        }
-        OPENVINO_THROW(err_log);
     }
 }
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp
index dc77442fedef47..3b6a5cf78032dd 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.hpp
@@ -7,6 +7,7 @@
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
 #include "intel_gpu/runtime/device.hpp"
 #include "intel_gpu/runtime/kernel.hpp"
+#include "intel_gpu/runtime/kernel_builder.hpp"
 #include "intel_gpu/runtime/execution_config.hpp"
 #include "intel_gpu/graph/kernel_impl_params.hpp"
 
@@ -97,6 +98,7 @@ class kernels_cache {
 private:
     static std::mutex _mutex;
     const device::ptr _device;
+    std::shared_ptr<kernel_builder> _builder;
     std::shared_ptr<ov::threading::ITaskExecutor> _task_executor;
     ExecutionConfig _config;
     uint32_t _prog_id = 0;
@@ -127,8 +129,6 @@ class kernels_cache {
     void set_kernels_reuse(bool reuse_kernels) { _reuse_kernels = reuse_kernels; }
     bool get_kernels_reuse() const { return _reuse_kernels; }
 
-    bool validate_simple_kernel_execution(kernel::ptr kernel);
-
     // forces compilation of all pending kernels/programs
     void build_all();
     void reset();
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
index 46af62be2069f8..8ff102848b0dd4 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp
@@ -139,7 +139,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
         if (is_cpu()) {
             return;
         }
-
         _kernels.clear();
         if (!_kernel_data.kernels.empty()) {
             auto compiled_kernels = kernels_cache.get_kernels(params);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.hpp
index ce5701f2739cf2..08d254b65d7d62 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.hpp
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+
+#include "reorder_inst.h"
 #include "registry/implementation_manager.hpp"
 #include "intel_gpu/primitives/reorder.hpp"
 #include "program_node.h"
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl_v2/moe/moe_3gemm_swiglu_opt.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl_v2/moe/moe_3gemm_swiglu_opt.cpp
index 7094ddcf1d68c5..f5b0158c747844 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl_v2/moe/moe_3gemm_swiglu_opt.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl_v2/moe/moe_3gemm_swiglu_opt.cpp
@@ -12,7 +12,6 @@
 #ifdef ENABLE_ONEDNN_FOR_GPU
 #    include <initializer_list>
 #    include <oneapi/dnnl/dnnl.hpp>
-#    include <oneapi/dnnl/dnnl_ocl.hpp>
 #    include <sstream>
 #    include <string_view>
 #    include <tuple>
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
index 5ac39d3c13968a..33d87ea1c394db 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h
@@ -12,7 +12,6 @@
 #include "intel_gpu/runtime/file_util.hpp"
 #include "to_string_utils.h"
 #include "utils.hpp"
-#include "runtime/ocl/ocl_event.hpp"
 
 #include "intel_gpu/primitives/reorder.hpp"
 
@@ -543,8 +542,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
             try {
                 _prim.execute(stream.get_onednn_stream(), _args[net_id]);
             } catch (dnnl::error& err) {
-                auto err_code = err.status == dnnl_status_t::dnnl_out_of_memory ? CL_OUT_OF_RESOURCES : CL_INVALID_OPERATION;
-                ocl::rethrow(err.what(), err_code, _engine->get_device_info());
+                OPENVINO_THROW(err.what());
             }
 
             if (_enable_profiling) {
@@ -553,12 +551,11 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
                 stream.wait();
 
                 std::vector<uint64_t> duration = dnnl::get_profiling_data(stream.get_onednn_stream(), dnnl::profiling_data_kind::time);
-                if (duration.empty()) {
-                    event = std::make_shared<ocl::ocl_event>(0);
-                } else {
+                event = stream.create_user_event(true);
+                if (!duration.empty()) {
                     OPENVINO_ASSERT(duration.size() == 1, "[GPU] oneDNN profiling data is expected to have info only for single primitive ",
                                                       "actual number is ", duration.size());
-                    event = std::make_shared<ocl::ocl_event>(duration[0]);
+                    event->set_profiling_duration(duration[0]);
                 }
 
             } else {
diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
index dff786a9853098..b095d68c2d108a 100644
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
@@ -5,7 +5,6 @@
 #include "utils.hpp"
 #include <oneapi/dnnl/dnnl_debug.h>
 #include <numeric>
-#include <oneapi/dnnl/dnnl_ocl.hpp>
 
 namespace cldnn {
 namespace onednn {
diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
index 469d63e331cd56..e89a463530aea1 100644
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -715,13 +715,13 @@ std::map<primitive_id, network_output> network::execute(const std::vector<event:
         }
     }
 
-    // We shouldn't call surfaces_lock::create() function constantly here, but due to
+    // We shouldn't call create_surfaces_lock function constantly here, but due to
     // some changes in assembler code, performance drops in case if we move it under
     // `shared_mem_found` condition (it somehow connected with get_cl_queue() - this function call
-    // makes asm faster for some reasons). So, as WA we keep this surfaces_lock::create() here
+    // makes asm faster for some reasons). So, as WA we keep this create_surfaces_lock here
     // with empty memory vector and do nothing inside this function for saving performance
     // in some cases.
-    auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream());
+    auto surf_lock = get_stream().create_surfaces_lock(in_out_mem);
 
     execute_impl(dependencies);
 
diff --git a/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt b/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt
index fbe79313fd6381..9f733688dc84b2 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt
+++ b/src/plugins/intel_gpu/src/kernel_selector/CMakeLists.txt
@@ -70,7 +70,8 @@ if (ENABLE_ONEDNN_FOR_GPU)
     target_link_libraries(${TARGET_NAME} PRIVATE onednn_gpu_tgt)
 endif()
 
-target_link_libraries(${TARGET_NAME} PUBLIC OpenCL::OpenCL openvino::runtime PRIVATE openvino::runtime::dev)
+target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE OpenCL::OpenCL openvino::runtime::dev)
+ov_gpu_set_runtime_interface_for(${TARGET_NAME})
 
 target_include_directories(${TARGET_NAME} PRIVATE $<TARGET_PROPERTY:rapidjson,INTERFACE_INCLUDE_DIRECTORIES>)
 
diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
index 14a2a08f63d0d6..e8c18242695624 100644
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -217,12 +217,7 @@ Plugin::Plugin() {
     set_device_name("GPU");
     register_primitives();
 
-    // Set OCL runtime which should be always available
-#ifdef OV_GPU_WITH_SYCL
-    cldnn::device_query device_query(cldnn::engine_types::sycl, cldnn::runtime_types::ocl);
-#else
-    cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl);
-#endif
+    cldnn::device_query device_query;
     m_device_map = device_query.get_available_devices();
 
     // Set default configs for each device
diff --git a/src/plugins/intel_gpu/src/plugin/remote_context.cpp b/src/plugins/intel_gpu/src/plugin/remote_context.cpp
index 885ceb9bda4720..63c8bd043be974 100644
--- a/src/plugins/intel_gpu/src/plugin/remote_context.cpp
+++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp
@@ -65,12 +65,9 @@ RemoteContextImpl::RemoteContextImpl(const std::map<std::string, RemoteContextIm
         }
     }
 
-    const auto engine_type = cldnn::engine_types::ocl;
-    const auto runtime_type = cldnn::runtime_types::ocl;
     const auto initialize_devices = true;
 
-    // Use actual runtime and engine types
-    cldnn::device_query device_query(engine_type, runtime_type, context_id, m_va_display, ctx_device_id, target_tile_id, initialize_devices);
+    cldnn::device_query device_query(context_id, m_va_display, ctx_device_id, target_tile_id, initialize_devices);
     auto device_map = device_query.get_available_devices();
 
     OPENVINO_ASSERT(device_map.size() == 1, "[GPU] Exactly one device expected in case of context sharing, but ", device_map.size(), " found");
@@ -103,6 +100,9 @@ void RemoteContextImpl::init_properties() {
         properties.insert(ov::intel_gpu::context_type(ov::intel_gpu::ContextType::VA_SHARED));
         properties.insert(ov::intel_gpu::va_device(m_va_display));
         break;
+    case ContextType::ZE:
+        properties.insert(ov::intel_gpu::context_type(ov::intel_gpu::ContextType::ZE));
+        break;
     default:
         OPENVINO_THROW("[GPU] Unsupported shared context type ", m_type);
     }
@@ -243,15 +243,9 @@ void RemoteContextImpl::initialize() {
     std::call_once(m_initialize_flag, [this]() {
         GPU_DEBUG_INFO << "Initialize RemoteContext for " << m_device_name << " (" << m_device->get_info().dev_name << ")" << std::endl;
 
-#ifdef OV_GPU_WITH_SYCL
-        const auto engine_type = cldnn::engine_types::sycl;
-#else
-        const auto engine_type = cldnn::engine_types::ocl;
-#endif
-        const auto runtime_type = cldnn::runtime_types::ocl;
-
         m_device->initialize();  // Initialize associated device before use
-        m_engine = cldnn::engine::create(engine_type, runtime_type, m_device);
+        m_engine = cldnn::engine::create(
+            cldnn::device_query::get_default_engine_type(), cldnn::device_query::get_default_runtime_type(), m_device);
 
         init_properties();
 
diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
index c2524c8ab3d1a9..9faecbbb6842e4 100644
--- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
+++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
@@ -317,7 +317,15 @@ void RemoteTensorImpl::allocate() {
 
     switch (m_mem_type) {
     case TensorType::BT_BUF_INTERNAL: {
-        m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem, reset);
+        // BT_BUF_INTERNAL should map to cl_mem however L0 engine can not allocate cl_mem
+        if (engine.supports_allocation(cldnn::allocation_type::cl_mem)) {
+            m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem, reset);
+        } else {
+            // Fall back to usm_host and override memory type
+            GPU_DEBUG_COUT << "[Warning] [GPU] Could not allocate cl_mem, using usm_host allocation instead\n";
+            m_mem_type = TensorType::BT_USM_HOST_INTERNAL;
+            m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host, reset);
+        }
         break;
     }
     case TensorType::BT_USM_HOST_INTERNAL: {
diff --git a/src/plugins/intel_gpu/src/runtime/CMakeLists.txt b/src/plugins/intel_gpu/src/runtime/CMakeLists.txt
index 8daa9444a97304..e052e427dc90df 100644
--- a/src/plugins/intel_gpu/src/runtime/CMakeLists.txt
+++ b/src/plugins/intel_gpu/src/runtime/CMakeLists.txt
@@ -16,15 +16,25 @@ file(GLOB LIBRARY_SOURCES_OCL
     "${CMAKE_CURRENT_SOURCE_DIR}/ocl/*.cpp"
 )
 
-set(LIBRARY_SOURCES_ALL
-    ${LIBRARY_SOURCES_MAIN}
-    ${LIBRARY_SOURCES_OCL}
-  )
-
+file(GLOB LIBRARY_SOURCES_ZE
+    "${CMAKE_CURRENT_SOURCE_DIR}/ze/*.h"
+    "${CMAKE_CURRENT_SOURCE_DIR}/ze/*.hpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/ze/*.cpp"
+)
 file(GLOB_RECURSE SYCL_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ocl/sycl_*.cpp")
 
 if(NOT OV_COMPILER_IS_INTEL_LLVM)
-    list(REMOVE_ITEM LIBRARY_SOURCES_ALL ${SYCL_SOURCES})
+    list(REMOVE_ITEM LIBRARY_SOURCES_OCL ${SYCL_SOURCES})
+endif()
+
+set(LIBRARY_SOURCES_ALL ${LIBRARY_SOURCES_MAIN})
+
+if(GPU_RT_TYPE STREQUAL "OCL")
+    list(APPEND LIBRARY_SOURCES_ALL ${LIBRARY_SOURCES_OCL})
+endif()
+
+if(GPU_RT_TYPE STREQUAL "L0")
+    list(APPEND LIBRARY_SOURCES_ALL ${LIBRARY_SOURCES_ZE})
 endif()
 
 add_library(${TARGET_NAME} STATIC ${LIBRARY_SOURCES_ALL})
@@ -58,6 +68,7 @@ if(ENABLE_ONEDNN_FOR_GPU)
 endif()
 
 ov_set_threading_interface_for(${TARGET_NAME})
+ov_gpu_set_runtime_interface_for(${TARGET_NAME})
 
 target_link_libraries(${TARGET_NAME} PRIVATE
     OpenCL::OpenCL
diff --git a/src/plugins/intel_gpu/src/runtime/device_query.cpp b/src/plugins/intel_gpu/src/runtime/device_query.cpp
index 6b6ce3787cf73b..5e6ba1576a17c3 100644
--- a/src/plugins/intel_gpu/src/runtime/device_query.cpp
+++ b/src/plugins/intel_gpu/src/runtime/device_query.cpp
@@ -4,12 +4,50 @@
 
 #include "intel_gpu/runtime/device_query.hpp"
 #include "ocl/ocl_device_detector.hpp"
+#include "ze/ze_device_detector.hpp"
 
 #include <map>
-#include <string>
 
 namespace cldnn {
 int device_query::device_id = -1;
+
+engine_types device_query::get_default_engine_type() {
+    auto engine_type = engine_types::ocl;
+#ifdef OV_GPU_WITH_ZE_RT
+    engine_type = engine_types::ze;
+#endif
+#ifdef OV_GPU_WITH_OCL_RT
+    engine_type = engine_types::ocl;
+#endif
+#ifdef OV_GPU_WITH_SYCL
+    engine_type = engine_types::sycl;
+#endif
+    return engine_type;
+}
+runtime_types device_query::get_default_runtime_type() {
+    auto rt_type = runtime_types::ocl;
+#ifdef OV_GPU_WITH_ZE_RT
+    rt_type = runtime_types::ze;
+#endif
+#ifdef OV_GPU_WITH_OCL_RT
+    rt_type = runtime_types::ocl;
+#endif
+    return rt_type;
+}
+
+device_query::device_query(void* user_context,
+                           void* user_device,
+                           int ctx_device_id,
+                           int target_tile_id,
+                           bool initialize_devices)
+    : device_query(get_default_engine_type(),
+        get_default_runtime_type(),
+        user_context,
+        user_device,
+        ctx_device_id,
+        target_tile_id,
+        initialize_devices) {}
+
 device_query::device_query(engine_types engine_type,
                            runtime_types runtime_type,
                            void* user_context,
@@ -17,17 +55,24 @@ device_query::device_query(engine_types engine_type,
                            int ctx_device_id,
                            int target_tile_id,
                            bool initialize_devices) {
-    switch (engine_type) {
-    case engine_types::sycl:
-    case engine_types::ocl: {
-        if (runtime_type != runtime_types::ocl)
-            throw std::runtime_error("Unsupported runtime type for ocl engine");
-
+    switch (runtime_type) {
+#ifdef OV_GPU_WITH_OCL_RT
+    case runtime_types::ocl: {
+        OPENVINO_ASSERT(engine_type == engine_types::ocl || engine_type == engine_types::sycl);
         ocl::ocl_device_detector ocl_detector;
         _available_devices = ocl_detector.get_available_devices(user_context, user_device, ctx_device_id, target_tile_id, initialize_devices);
         break;
     }
-    default: throw std::runtime_error("Unsupported engine type in device_query");
+#endif
+#ifdef OV_GPU_WITH_ZE_RT
+    case runtime_types::ze: {
+        OPENVINO_ASSERT(engine_type == engine_types::ze);
+        ze::ze_device_detector ze_detector;
+        _available_devices = ze_detector.get_available_devices(user_context, user_device, ctx_device_id, target_tile_id, initialize_devices);
+        break;
+    }
+#endif
+    default: OPENVINO_THROW("[GPU] Unsupported engine/runtime types in device_query");
     }
 }
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp
index 7cbf0ed7e77020..959fb414a51c68 100644
--- a/src/plugins/intel_gpu/src/runtime/engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/engine.cpp
@@ -10,6 +10,7 @@
 #include "intel_gpu/runtime/debug_configuration.hpp"
 
 #include "ocl/ocl_engine_factory.hpp"
+#include "ze/ze_engine_factory.hpp"
 
 #include <string>
 #include <vector>
@@ -262,9 +263,16 @@ std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type, runtime_
         ret = ocl::create_sycl_engine(device, runtime_type);
         break;
 #endif  // OV_GPU_WITH_SYCL
+#ifdef OV_GPU_WITH_OCL_RT
     case engine_types::ocl:
         ret = ocl::create_ocl_engine(device, runtime_type);
         break;
+#endif
+#ifdef OV_GPU_WITH_ZE_RT
+    case engine_types::ze:
+        ret = ze::create_ze_engine(device, runtime_type);
+        break;
+#endif
     default:
         throw std::runtime_error("Invalid engine type");
     }
@@ -286,4 +294,60 @@ std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type, runtime_
     return engine::create(engine_type, runtime_type, device);
 }
 
+bool engine::check_allocatable(const layout& layout, allocation_type type) {
+    OPENVINO_ASSERT(supports_allocation(type), "[GPU] Unsupported allocation type: ", type);
+
+    if (!get_enable_large_allocations()) {
+        bool exceed_allocatable_mem_size = (layout.bytes_count() > get_device_info().max_alloc_mem_size);
+
+        // When dynamic shape upper bound makes bigger buffer, then return false.
+        if (exceed_allocatable_mem_size && layout.is_dynamic()) {
+            OPENVINO_ASSERT(layout.has_upper_bound(), "[GPU] Dynamic shape without upper bound tries to allocate");
+            return false;
+        }
+
+        OPENVINO_ASSERT(!exceed_allocatable_mem_size,
+                        "[GPU] Exceeded max size of memory object allocation: ",
+                        "requested ", layout.bytes_count(), " bytes, "
+                        "but max alloc size supported by device is ", get_device_info().max_alloc_mem_size, " bytes.",
+                        "Please try to reduce batch size or use lower precision.");
+    }
+
+    auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
+    auto exceed_available_mem_size = (layout.bytes_count() + used_mem > get_max_memory_size());
+
+    // When dynamic shape upper bound makes bigger buffer, then return false.
+    if (exceed_available_mem_size && layout.is_dynamic()) {
+        OPENVINO_ASSERT(layout.has_upper_bound(), "[GPU] Dynamic shape without upper bound tries to allocate");
+        return false;
+    }
+
+#ifdef __unix__
+    // Prevent from being killed by Ooo Killer of Linux
+    OPENVINO_ASSERT(!exceed_available_mem_size,
+                    "[GPU] Exceeded max size of memory allocation: ",
+                    "Required ", layout.bytes_count(), " bytes, already occupied : ", used_mem, " bytes, ",
+                    "but available memory size is ", get_max_memory_size(), " bytes");
+#else
+    if (exceed_available_mem_size) {
+        GPU_DEBUG_COUT << "[Warning] [GPU] Exceeded max size of memory allocation: " << "Required " << layout.bytes_count() << " bytes, already occupied : "
+                       << used_mem << " bytes, but available memory size is " << get_max_memory_size() << " bytes" << std::endl;
+        GPU_DEBUG_COUT << "Please note that performance might drop due to memory swap." << std::endl;
+    }
+#endif
+
+    return true;
+}
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+dnnl::engine& engine::get_onednn_engine() const {
+    OPENVINO_ASSERT(_onednn_engine, "[GPU] Can't get onednn engine handle as it was not initialized. Please check that create_onednn_engine() was called");
+    return *_onednn_engine;
+}
+#endif
+
+stream& engine::get_service_stream() const {
+    return *_service_stream;
+}
+
 }  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/memory.cpp b/src/plugins/intel_gpu/src/runtime/memory.cpp
index 117d934d0d7d0c..46904030b6ee52 100644
--- a/src/plugins/intel_gpu/src/runtime/memory.cpp
+++ b/src/plugins/intel_gpu/src/runtime/memory.cpp
@@ -7,8 +7,6 @@
 #include "intel_gpu/runtime/stream.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 
-#include "ocl/ocl_memory.hpp"
-
 #include <string>
 #include <vector>
 #include <memory>
@@ -45,15 +43,6 @@ memory::memory(engine* engine, const layout& layout, allocation_type type, std::
     : _engine(engine), _layout(layout), _bytes_count(_layout.bytes_count()), m_mem_tracker(mem_tracker), _type(type) {
 }
 
-std::unique_ptr<surfaces_lock> surfaces_lock::create(engine_types engine_type, std::vector<memory::ptr> mem, const stream& stream) {
-    switch (engine_type) {
-    case engine_types::sycl:
-    case engine_types::ocl:
-        return std::unique_ptr<ocl::ocl_surfaces_lock>(new ocl::ocl_surfaces_lock(mem, stream));
-    default: throw std::runtime_error("Unsupported engine type in surfaces_lock::create");
-    }
-}
-
 bool surfaces_lock::is_lock_needed(const shared_mem_type& mem_type) {
     return mem_type == shared_mem_type::shared_mem_vasurface ||
            mem_type == shared_mem_type::shared_mem_dxbuffer ||
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
index ca0193f85e1d3c..d935b6ad900e54 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
@@ -2,12 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#ifdef ENABLE_ONEDNN_FOR_GPU
+#if defined(ENABLE_ONEDNN_FOR_GPU) && defined(OV_GPU_WITH_OCL_RT)
 #ifndef NOMINMAX
 # define NOMINMAX
 #endif
 #include "gpu/intel/jit/generator.hpp"
-#endif  // ENABLE_ONEDNN_FOR_GPU
+#endif
 
 #include "ocl_device.hpp"
 #include "ocl_common.hpp"
@@ -51,7 +51,7 @@ namespace ocl {
 
 namespace {
 
-#ifdef ENABLE_ONEDNN_FOR_GPU
+#if defined(ENABLE_ONEDNN_FOR_GPU) && defined(OV_GPU_WITH_OCL_RT)
 gpu_arch convert_ngen_arch(ngen::HW gpu_arch) {
     switch (gpu_arch) {
         case ngen::HW::Gen9: return gpu_arch::gen9;
@@ -344,8 +344,17 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex
         info.num_ccs = std::max<uint32_t>(num_queues, info.num_ccs);
     }
 
+    info.supports_mutable_command_list = false;
+
+    // Not supported
+    info.timer_resolution = 0;
+    info.kernel_timestamp_valid_bits = 0;
+    info.compute_queue_group_ordinal = 0;
+    info.device_memory_ordinal = 0;
+    info.supports_cp_offload = false;
+    info.supports_counter_based_events = false;
 
-#ifdef ENABLE_ONEDNN_FOR_GPU
+#if defined(ENABLE_ONEDNN_FOR_GPU) && defined(OV_GPU_WITH_OCL_RT)
     using namespace dnnl::impl::gpu::intel::jit;
     if (context.get() != nullptr) {
         ngen::Product product = ngen::OpenCLCodeGenerator<ngen::HW::Unknown>::detectHWInfo(context.get(), device.get());
@@ -377,6 +386,7 @@ bool does_device_support(int32_t param, const cl::Device& device) {
 
 memory_capabilities init_memory_caps(const cl::Device& device, const device_info& info) {
     std::vector<allocation_type> memory_caps;
+    memory_caps.push_back(allocation_type::cl_mem);
     if (info.supports_usm) {
         if (does_device_support(CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, device)) {
             memory_caps.push_back(allocation_type::usm_host);
@@ -428,36 +438,7 @@ bool ocl_device::is_same(const device::ptr other) {
     // Short path if cl_device is the same
     if (_platform == casted->_platform && _device.get() && casted->_device.get() && _device == casted->_device)
         return true;
-
-    // Relying solely on the UUID is not reliable in all the cases (particularly on legacy platforms),
-    // where the UUID may be missing or incorrectly generated
-    // Therefore, we also validate other attributes
-    if (_info.uuid.uuid != casted->_info.uuid.uuid)
-        return false;
-
-    if (_info.pci_info != casted->_info.pci_info)
-        return false;
-
-    if (_info.sub_device_idx != casted->_info.sub_device_idx)
-        return false;
-
-    if (_info.vendor_id != casted->_info.vendor_id ||
-        _info.dev_name != casted->_info.dev_name ||
-        _info.driver_version != casted->_info.driver_version)
-        return false;
-
-    if (_info.dev_type != casted->_info.dev_type ||
-        _info.gfx_ver != casted->_info.gfx_ver ||
-        _info.arch != casted->_info.arch)
-        return false;
-
-    if (_info.ip_version != casted->_info.ip_version || _info.device_id != casted->_info.device_id)
-        return false;
-
-    if (_info.execution_units_count != casted->_info.execution_units_count || _info.max_global_mem_size != casted->_info.max_global_mem_size)
-        return false;
-
-    return true;
+    return _info.is_same_device(casted->_info);
 }
 
 void ocl_device::set_mem_caps(const memory_capabilities& memory_capabilities) {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp
index 11dd1ea71d0a14..6f3443fe2163ea 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp
@@ -69,20 +69,6 @@ bool does_device_match_config(const cl::Device& device) {
     return true;
 }
 
-// The priority return by this function impacts the order of devices reported by GPU plugin and devices enumeration
-// Lower priority value means lower device ID
-// Current behavior is: Intel iGPU < Intel dGPU < any other GPU
-// Order of Intel dGPUs is undefined and depends on the OCL impl
-// Order of other vendor GPUs is undefined and depends on the OCL impl
-size_t get_device_priority(const cldnn::device_info& info) {
-    if (info.vendor_id == cldnn::INTEL_VENDOR_ID && info.dev_type == cldnn::device_type::integrated_gpu) {
-        return 0;
-    } else if (info.vendor_id == cldnn::INTEL_VENDOR_ID) {
-        return 1;
-    } else {
-        return std::numeric_limits<size_t>::max();
-    }
-}
 }  // namespace
 
 namespace cldnn {
@@ -133,15 +119,6 @@ static std::vector<cl::Device> getSubDevices(cl::Device& rootDevice) {
     return subDevices;
 }
 
-std::vector<device::ptr> ocl_device_detector::sort_devices(const std::vector<device::ptr>& devices_list) {
-    std::vector<device::ptr> sorted_list = devices_list;
-    std::stable_sort(sorted_list.begin(), sorted_list.end(), [](device::ptr d1,  device::ptr d2) {
-        return get_device_priority(d1->get_info()) < get_device_priority(d2->get_info());
-    });
-
-    return sorted_list;
-}
-
 std::map<std::string, device::ptr> ocl_device_detector::get_available_devices(void* user_context,
                                                                               void* user_device,
                                                                               int ctx_device_id,
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.hpp
index 121fc025098b08..0992d734bf512b 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.hpp
@@ -25,8 +25,6 @@ class ocl_device_detector {
                                                              int target_tile_id = -1,
                                                              bool initialize_devices = false) const;
 
-    static std::vector<device::ptr> sort_devices(const std::vector<device::ptr>& devices_list);
-
 private:
     std::vector<device::ptr> create_device_list() const;
     std::vector<device::ptr> create_device_list_from_user_context(void* user_context, int ctx_device_id = 0) const;
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
index 35cef4bafb88a0..6e45fdd788a109 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
@@ -4,7 +4,8 @@
 
 #include "ocl_engine.hpp"
 #include "intel_gpu/runtime/utils.hpp"
-#include "ocl/ocl_kernel.hpp"
+#include "ocl_kernel.hpp"
+#include "ocl_kernel_builder.hpp"
 #include "ocl_common.hpp"
 #include "ocl_memory.hpp"
 #include "ocl_stream.hpp"
@@ -63,15 +64,13 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) {
     if (!_onednn_engine) {
         auto casted = std::dynamic_pointer_cast<ocl_device>(_device);
         OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine");
-
+#ifdef OV_GPU_WITH_ZE_RT
+        OPENVINO_THROW("[GPU] Using OCL OneDNN API with L0 runtime");
+#else
         _onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
+#endif
     }
 }
-
-dnnl::engine& ocl_engine::get_onednn_engine() const {
-    OPENVINO_ASSERT(_onednn_engine, "[GPU] Can't get onednn engine handle as it was not initialized. Please check that create_onednn_engine() was called");
-    return *_onednn_engine;
-}
 #endif
 
 const cl::Context& ocl_engine::get_cl_context() const {
@@ -97,51 +96,6 @@ allocation_type ocl_engine::detect_usm_allocation_type(const void* memory) const
                                        : allocation_type::unknown;
 }
 
-bool ocl_engine::check_allocatable(const layout& layout, allocation_type type) {
-    OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem, "[GPU] Unsupported allocation type: ", type);
-
-    if (!get_enable_large_allocations()) {
-        bool exceed_allocatable_mem_size = (layout.bytes_count() > get_device_info().max_alloc_mem_size);
-
-        // When dynamic shape upper bound makes bigger buffer, then return false.
-        if (exceed_allocatable_mem_size && layout.is_dynamic()) {
-            OPENVINO_ASSERT(layout.has_upper_bound(), "[GPU] Dynamic shape without upper bound tries to allocate");
-            return false;
-        }
-
-        OPENVINO_ASSERT(!exceed_allocatable_mem_size,
-                        "[GPU] Exceeded max size of memory object allocation: ",
-                        "requested ", layout.bytes_count(), " bytes, "
-                        "but max alloc size supported by device is ", get_device_info().max_alloc_mem_size, " bytes.",
-                        "Please try to reduce batch size or use lower precision.");
-    }
-
-    auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
-    auto exceed_available_mem_size = (layout.bytes_count() + used_mem > get_max_memory_size());
-
-    // When dynamic shape upper bound makes bigger buffer, then return false.
-    if (exceed_available_mem_size && layout.is_dynamic()) {
-        OPENVINO_ASSERT(layout.has_upper_bound(), "[GPU] Dynamic shape without upper bound tries to allocate");
-        return false;
-    }
-
-#ifdef __unix__
-    // Prevent from being killed by Ooo Killer of Linux
-    OPENVINO_ASSERT(!exceed_available_mem_size,
-                    "[GPU] Exceeded max size of memory allocation: ",
-                    "Required ", layout.bytes_count(), " bytes, already occupied : ", used_mem, " bytes, ",
-                    "but available memory size is ", get_max_memory_size(), " bytes");
-#else
-    if (exceed_available_mem_size) {
-        GPU_DEBUG_COUT << "[Warning] [GPU] Exceeded max size of memory allocation: " << "Required " << layout.bytes_count() << " bytes, already occupied : "
-                       << used_mem << " bytes, but available memory size is " << get_max_memory_size() << " bytes" << std::endl;
-        GPU_DEBUG_COUT << "Please note that performance might drop due to memory swap." << std::endl;
-    }
-#endif
-
-    return true;
-}
-
 memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) {
     OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate memory for dynamic layout");
 
@@ -304,9 +258,10 @@ void* ocl_engine::get_user_context() const {
     return static_cast<void*>(cl_device.get_context().get());
 }
 
-kernel::ptr ocl_engine::prepare_kernel(const kernel::ptr kernel) const {
-    OPENVINO_ASSERT(downcast<const ocl::ocl_kernel>(kernel.get()) != nullptr);
-    return kernel;
+std::shared_ptr<kernel_builder> ocl_engine::create_kernel_builder() const {
+    auto cl_device = std::dynamic_pointer_cast<ocl_device>(_device);
+    OPENVINO_ASSERT(cl_device, "[GPU] Invalid device type for ocl_engine");
+    return std::make_shared<ocl_kernel_builder>(*cl_device);
 }
 
 bool ocl_engine::extension_supported(std::string extension) const {
@@ -321,10 +276,6 @@ stream::ptr ocl_engine::create_stream(const ExecutionConfig& config, void* handl
     return std::make_shared<ocl_stream>(*this, config, handle);
 }
 
-stream& ocl_engine::get_service_stream() const {
-    return *_service_stream;
-}
-
 std::shared_ptr<cldnn::engine> ocl_engine::create(const device::ptr device, runtime_types runtime_type) {
     return std::make_shared<ocl::ocl_engine>(device, runtime_type);
 }
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp
index 0fa77922f03442..e9d152e20fd88c 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.hpp
@@ -29,7 +29,6 @@ class ocl_engine : public engine {
     memory_ptr create_subbuffer(const memory& memory, const layout& new_layout, size_t offset) override;
     memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) override;
     bool is_the_same_buffer(const memory& mem1, const memory& mem2) override;
-    bool check_allocatable(const layout& layout, allocation_type type) override;
 
     void* get_user_context() const override;
 
@@ -44,26 +43,17 @@ class ocl_engine : public engine {
 
     stream_ptr create_stream(const ExecutionConfig& config) const override;
     stream_ptr create_stream(const ExecutionConfig& config, void *handle) const override;
-    stream& get_service_stream() const override;
 
-    kernel::ptr prepare_kernel(const kernel::ptr kernel) const override;
+    std::shared_ptr<kernel_builder> create_kernel_builder() const override;
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
     void create_onednn_engine(const ExecutionConfig& config) override;
-    // Returns onednn engine object which shares device and context with current engine
-    dnnl::engine& get_onednn_engine() const override;
 #endif
 
     static std::shared_ptr<cldnn::engine> create(const device::ptr device, runtime_types runtime_type);
 
 private:
     std::string _extensions;
-    std::unique_ptr<stream> _service_stream;
-
-#ifdef ENABLE_ONEDNN_FOR_GPU
-    std::mutex onednn_mutex;
-    std::shared_ptr<dnnl::engine> _onednn_engine;
-#endif
 };
 
 }  // namespace ocl
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
index 29a27e5ea6acee..26fbe6fd272487 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.cpp
@@ -100,16 +100,6 @@ static const std::vector<profiling_period_ocl_start_stop> profiling_periods{
 };
 
 bool ocl_event::get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) {
-    if (duration_nsec.has_value()) {
-        auto stage = instrumentation::profiling_stage::executing;
-        auto duration = std::chrono::nanoseconds(duration_nsec.value());
-        auto period = std::make_shared<instrumentation::profiling_period_basic>(duration);
-
-        info.push_back({ stage, period });
-
-        return true;
-    }
-
     if (!is_event_profiled(_event))
         return true;
 
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.hpp
index d51b7de50167b1..7efb87d8775405 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_event.hpp
@@ -24,10 +24,6 @@ struct ocl_event : public ocl_base_event {
         : ocl_base_event(queue_stamp)
         , _event(ev) {}
 
-    ocl_event(uint64_t duration_nsec, uint64_t queue_stamp = 0)
-        : ocl_base_event(queue_stamp)
-        , duration_nsec(duration_nsec) {}
-
     cl::Event& get() override { return _event; }
 
 private:
@@ -45,7 +41,6 @@ struct ocl_event : public ocl_base_event {
 
 protected:
     cl::Event _event;
-    std::optional<uint64_t> duration_nsec;
 };
 
 struct ocl_events : public ocl_base_event {
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.cpp
new file mode 100644
index 00000000000000..7a59b3c4169b3b
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.cpp
@@ -0,0 +1,53 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ocl_kernel.hpp"
+#include <vector>
+
+namespace cldnn {
+namespace ocl {
+
+std::vector<uint8_t> ocl_kernel::get_binary() const {
+    // Get the corresponding program object for the kernel
+    cl_program program;
+    cl_int error = clGetKernelInfo(_compiled_kernel.get(), CL_KERNEL_PROGRAM, sizeof(program), &program, nullptr);
+    if (error) {
+        throw std::runtime_error("Failed to retrieve CL_KERNEL_PROGRAM: " + std::to_string(error));
+    }
+
+    // Get the size of the program binary in bytes.
+    size_t binary_size = 0;
+    error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(binary_size), &binary_size, nullptr);
+    if (error) {
+        throw std::runtime_error("Failed to retrieve CL_PROGRAM_BINARY_SIZES: " + std::to_string(error));
+    }
+
+    // Binary is not available for the device.
+    if (binary_size == 0)
+        throw std::runtime_error("get_binary: Binary size is zero");
+
+    // Get program binary.
+    std::vector<uint8_t> binary(binary_size);
+    uint8_t* binary_buffer = binary.data();
+    error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, binary_size, &binary_buffer, nullptr);
+    if (error) {
+        throw std::runtime_error("Failed to retrieve CL_PROGRAM_BINARIES: " + std::to_string(error));
+    }
+
+    return binary;
+}
+
+std::string ocl_kernel::get_build_log() const {
+    auto program = _compiled_kernel.getInfo<CL_KERNEL_PROGRAM>();
+    auto log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>();
+    // Assume program was build for only 1 device
+    // Return first log
+    if (log.size() > 0) {
+        return log[0].second;
+    }
+    OPENVINO_THROW("[GPU] Failed to retrieve kernel build log");
+}
+
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp
index 206db55057cf5e..446fe51a7193c9 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel.hpp
@@ -33,6 +33,16 @@ class ocl_kernel : public kernel {
 
         return std::make_shared<ocl_kernel>(get_handle().clone(), _kernel_id);
     }
+    virtual bool is_same(const kernel &other) const {
+        auto other_ptr = dynamic_cast<const ocl_kernel*>(&other);
+        if (other_ptr == nullptr) {
+            return false;
+        }
+        return get_handle().get() == other_ptr->get_handle().get();
+    }
+
+    std::vector<uint8_t> get_binary() const override;
+    std::string get_build_log() const override;
 };
 
 }  // namespace ocl
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel_builder.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel_builder.hpp
new file mode 100644
index 00000000000000..97b624e4915b47
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_kernel_builder.hpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2016-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/kernel_builder.hpp"
+#include "intel_gpu/runtime/device.hpp"
+
+#include "ocl_device.hpp"
+#include "ocl_kernel.hpp"
+
+
+namespace cldnn {
+namespace ocl {
+
+class ocl_kernel_builder : public kernel_builder{
+    public:
+        ocl_kernel_builder(const ocl_device &device) : m_device(device) {}
+
+        void build_kernels(const void *src,
+            size_t src_bytes,
+            KernelFormat src_format,
+            const std::string &options,
+            std::vector<kernel::ptr> &out) const override {
+            auto context = m_device.get_context().get();
+
+            cl_program program_handle;
+            cl_int err = CL_INVALID_VALUE;
+            switch (src_format) {
+            case KernelFormat::SOURCE: {
+                const char **strings = reinterpret_cast<const char**>(&src);
+                const size_t *lenghts = &src_bytes;
+                const cl_uint count = 1;
+                program_handle = clCreateProgramWithSource(context, count, strings, lenghts, &err);
+                break;
+            }
+            case KernelFormat::NATIVE_BIN: {
+                const unsigned char **binaries = reinterpret_cast<const unsigned char**>(&src);
+                const size_t *lenghts = &src_bytes;
+                const cl_device_id device_id = m_device.get_device().get();
+                const cl_uint count = 1;
+                program_handle = clCreateProgramWithBinary(context, count, &device_id, lenghts, binaries, nullptr, &err);
+                break;
+            }
+            default:
+                OPENVINO_THROW("[GPU] Trying to build kernel from unexpected format");
+                break;
+            }
+            if (err != CL_SUCCESS) {
+                OPENVINO_THROW("[GPU] Failed to create program during kernel build process");
+            }
+            cl::Program program(program_handle);
+            if (program.build({m_device.get_device()}, options.c_str()) != CL_SUCCESS) {
+                GPU_DEBUG_INFO << "-------- Kernel build error" << std::endl;
+                auto log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>();
+                for (auto &e : log) {
+                    GPU_DEBUG_INFO << e.second;
+                }
+                GPU_DEBUG_INFO << "-------- End of Kernel build error" << std::endl;
+                OPENVINO_THROW("[GPU] Failed to build program");
+            }
+            cl::vector<cl::Kernel> kernels;
+            if (program.createKernels(&kernels) != CL_SUCCESS) {
+                OPENVINO_THROW("[GPU] Failed to create kernels");
+            }
+            for (auto& k : kernels) {
+                const auto &entry_point = k.getInfo<CL_KERNEL_FUNCTION_NAME>();
+                out.push_back(std::make_shared<ocl::ocl_kernel>(ocl::ocl_kernel_type(k, m_device.get_usm_helper()), entry_point));
+            }
+    }
+
+    private:
+        const ocl_device &m_device;
+};
+}  // namespace ocl
+}  // namespace cldnn
+
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
index fc238724f6e30f..cd41fa46f3bcec 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp
@@ -229,7 +229,11 @@ event::ptr gpu_buffer::copy_to(stream& stream, void* data_ptr, size_t src_offset
 dnnl::memory gpu_buffer::get_onednn_memory(dnnl::memory::desc desc, int64_t offset) const {
     auto onednn_engine = _engine->get_onednn_engine();
     dnnl::memory dnnl_mem(desc, onednn_engine, DNNL_MEMORY_NONE);
+#ifdef OV_GPU_WITH_ZE_RT
+    OPENVINO_THROW("[GPU] Using OCL OneDNN API with L0 runtime");
+#else
     dnnl::ocl_interop::set_mem_object(dnnl_mem, _buffer.get());
+#endif
     return dnnl_mem;
 }
 #endif
@@ -661,9 +665,13 @@ event::ptr gpu_usm::copy_to(stream& stream, void* data_ptr, size_t src_offset, s
 #ifdef ENABLE_ONEDNN_FOR_GPU
 dnnl::memory gpu_usm::get_onednn_memory(dnnl::memory::desc desc, int64_t offset) const {
     auto onednn_engine = _engine->get_onednn_engine();
+#ifdef OV_GPU_WITH_ZE_RT
+        OPENVINO_THROW("[GPU] Using OCL OneDNN API with L0 runtime");
+#else
     dnnl::memory dnnl_mem = dnnl::ocl_interop::make_memory(desc, onednn_engine, dnnl::ocl_interop::memory_kind::usm,
         reinterpret_cast<uint8_t*>(_buffer.get()) + offset);
     return dnnl_mem;
+#endif
 }
 #endif
 
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
index f5e9b74a5e681b..a7a02d69f205f7 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp
@@ -11,6 +11,7 @@
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "ocl_kernel.hpp"
 #include "ocl_common.hpp"
+#include "ocl_memory.hpp"
 
 #include <cassert>
 #include <string>
@@ -239,7 +240,11 @@ dnnl::stream& ocl_stream::get_onednn_stream() {
     OPENVINO_ASSERT(m_queue_type == QueueTypes::in_order, "[GPU] Can't create onednn stream handle as onednn doesn't support out-of-order queue");
     OPENVINO_ASSERT(_engine.get_device_info().vendor_id == INTEL_VENDOR_ID, "[GPU] Can't create onednn stream handle as for non-Intel devices");
     if (!_onednn_stream) {
+#ifdef OV_GPU_WITH_ZE_RT
+        OPENVINO_THROW("[GPU] Using OCL OneDNN API with L0 runtime");
+#else
         _onednn_stream = std::make_shared<dnnl::stream>(dnnl::ocl_interop::make_stream(_engine.get_onednn_engine(), _command_queue.get()));
+#endif
     }
 
     return *_onednn_stream;
@@ -362,6 +367,10 @@ event::ptr ocl_stream::create_base_event() {
     return std::make_shared<ocl_event>(ret_ev, ++_queue_counter);
 }
 
+std::unique_ptr<surfaces_lock> ocl_stream::create_surfaces_lock(const std::vector<memory::ptr> &mem) const {
+    return std::unique_ptr<ocl::ocl_surfaces_lock>(new ocl::ocl_surfaces_lock(mem, *this));
+}
+
 void ocl_stream::flush() const {
     try {
         get_cl_queue().flush();
diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp
index b9c51ccb046508..c86089bce08b46 100644
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.hpp
@@ -47,6 +47,7 @@ class ocl_stream : public stream {
     void enqueue_barrier() override;
     event::ptr create_user_event(bool set) override;
     event::ptr create_base_event() override;
+    std::unique_ptr<surfaces_lock> create_surfaces_lock(const std::vector<memory::ptr> &mem) const override;
 
     const cl::UsmHelper& get_usm_helper() const { return _engine.get_usm_helper(); }
 
diff --git a/src/plugins/intel_gpu/src/runtime/stream.cpp b/src/plugins/intel_gpu/src/runtime/stream.cpp
index 913d84d8f476f5..2bd8a74f857a4d 100644
--- a/src/plugins/intel_gpu/src/runtime/stream.cpp
+++ b/src/plugins/intel_gpu/src/runtime/stream.cpp
@@ -4,21 +4,10 @@
 
 #include "intel_gpu/runtime/stream.hpp"
 
-#include "ocl/ocl_stream.hpp"
-
 #include <stdexcept>
 
 namespace cldnn {
 
-QueueTypes stream::detect_queue_type(engine_types engine_type, void* queue_handle) {
-    switch (engine_type) {
-        case engine_types::sycl:
-        case engine_types::ocl:
-            return ocl::ocl_stream::detect_queue_type(queue_handle);
-        default: throw std::runtime_error("Invalid engine type");
-    }
-}
-
 SyncMethods stream::get_expected_sync_method(const ExecutionConfig& config) {
     auto profiling = config.get_enable_profiling();
     auto queue_type = config.get_queue_type();
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_base_event.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_base_event.hpp
new file mode 100644
index 00000000000000..0f121e0b8e45b6
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_base_event.hpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/event.hpp"
+#include "ze_base_event_factory.hpp"
+
+#include <ze_api.h>
+#include <chrono>
+#include <optional>
+
+namespace cldnn {
+namespace ze {
+
+// Base interface for Level Zero events
+struct ze_base_event : public event {
+public:
+    explicit ze_base_event(uint64_t queue_stamp)
+    : event()
+    , m_queue_stamp(queue_stamp) { }
+    uint64_t get_queue_stamp() const { return m_queue_stamp; }
+    void set_queue_stamp(uint64_t val) { m_queue_stamp = val; }
+
+    virtual ze_event_handle_t get_handle() const = 0;
+    virtual std::optional<ze_kernel_timestamp_result_t> query_timestamp() = 0;
+
+protected:
+    uint64_t m_queue_stamp = 0;
+
+    static std::chrono::nanoseconds timestamp_to_duration(const device_info &info, const ze_kernel_timestamp_data_t& timestamp) {
+        constexpr double NS_IN_SEC = 1000000000.0;
+        const double timestamp_freq = NS_IN_SEC / info.timer_resolution;
+        const uint64_t timestamp_max_value = ~(-1L << info.kernel_timestamp_valid_bits);
+
+        auto d = (timestamp.kernelEnd >= timestamp.kernelStart) ?
+            (timestamp.kernelEnd - timestamp.kernelStart) * timestamp_freq
+            : ((timestamp_max_value - timestamp.kernelStart) + timestamp.kernelEnd + 1) * timestamp_freq;
+        return std::chrono::nanoseconds(static_cast<uint64_t>(d));
+    }
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_base_event_factory.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_base_event_factory.hpp
new file mode 100644
index 00000000000000..8a3febc28f0393
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_base_event_factory.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_engine.hpp"
+#include "intel_gpu/runtime/event.hpp"
+
+namespace cldnn {
+namespace ze {
+
+// Interface for creating Level Zero events
+struct ze_base_event_factory {
+public:
+    ze_base_event_factory(const ze_engine &engine, bool enable_profiling)
+    : m_engine(engine), m_profiling_enabled(enable_profiling) {}
+    const ze_engine& get_engine() const { return m_engine; }
+    bool is_profiling_enabled() const { return m_profiling_enabled; }
+
+    virtual ~ze_base_event_factory() {}
+    virtual event::ptr create_event(uint64_t queue_stamp) = 0;
+protected:
+    const ze_engine& m_engine;
+    const bool m_profiling_enabled;
+};
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_common.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_common.hpp
new file mode 100644
index 00000000000000..d7456d74c7459f
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_common.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include "intel_gpu/runtime/debug_configuration.hpp"
+
+#include <ze_api.h>
+
+#include <limits>
+#include <string>
+
+
+// Expect success of level zero command, throw runtime error otherwise
+#define OV_ZE_EXPECT(f) \
+    do { \
+        ze_result_t res_ = (f); \
+        if (res_ != ZE_RESULT_SUCCESS) { \
+            throw std::runtime_error(#f " command failed with code " + std::to_string(res_)); \
+        } \
+    } while (false)
+
+// Prints warning if level zero command does not return success result
+#define OV_ZE_WARN(f) \
+    do { \
+        ze_result_t res_ = (f); \
+        if (res_ != ZE_RESULT_SUCCESS) { \
+            GPU_DEBUG_COUT << ("[Warning] [GPU] " #f " command failed with code " + std::to_string(res_)); \
+        } \
+    } while (false)
+
+namespace cldnn {
+namespace ze {
+
+static constexpr uint64_t endless_wait = std::numeric_limits<uint64_t>::max();
+static constexpr ze_module_format_t ze_module_format_oclc = (ze_module_format_t) 3U;
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event.cpp
new file mode 100644
index 00000000000000..03e342d0f571ad
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event.cpp
@@ -0,0 +1,72 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_counter_based_event.hpp"
+#include "ze/ze_common.hpp"
+
+#include <cassert>
+#include <chrono>
+#include <list>
+
+using namespace cldnn;
+using namespace ze;
+
+void ze_counter_based_event::wait_impl() {
+    OV_ZE_EXPECT(zeEventHostSynchronize(m_event, endless_wait));
+}
+
+void ze_counter_based_event::set_impl() {
+    // Counter based events start in signaled state and can not be signaled from host
+}
+
+bool ze_counter_based_event::is_set_impl() {
+    auto ret = zeEventQueryStatus(m_event);
+    switch (ret) {
+    case ZE_RESULT_SUCCESS:
+        return true;
+        break;
+    case ZE_RESULT_NOT_READY:
+        return false;
+        break;
+    default:
+        OPENVINO_THROW("[GPU] Query event returned unexpected value: ", std::to_string(ret));
+        break;
+    }
+}
+
+ze_event_handle_t ze_counter_based_event::get_handle() const {
+    return m_event;
+}
+
+std::optional<ze_kernel_timestamp_result_t> ze_counter_based_event::query_timestamp() {
+    if (!m_factory.is_profiling_enabled()) {
+        return std::nullopt;
+    }
+    ze_kernel_timestamp_result_t timestamp{};
+    OV_ZE_EXPECT(zeEventQueryKernelTimestamp(m_event, &timestamp));
+    return timestamp;
+}
+
+bool ze_counter_based_event::get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) {
+    auto opt_timestamp = query_timestamp();
+    if (!opt_timestamp.has_value()) {
+        return true;
+    }
+    ze_kernel_timestamp_result_t timestamp = opt_timestamp.value();
+    auto &dev_info = m_factory.get_engine().get_device_info();
+    auto wallclock_time = timestamp_to_duration(dev_info, timestamp.global);
+    auto exec_time = timestamp_to_duration(dev_info, timestamp.context);
+
+    auto period_exec = std::make_shared<instrumentation::profiling_period_basic>(timestamp_to_duration(dev_info, timestamp.context));
+    auto period_submit = std::make_shared<instrumentation::profiling_period_basic>(wallclock_time - exec_time);
+
+    info.push_back({ instrumentation::profiling_stage::executing, period_exec });
+    info.push_back({ instrumentation::profiling_stage::submission, period_submit });
+
+    return true;
+}
+
+ze_counter_based_event::~ze_counter_based_event() {
+    OV_ZE_WARN(zeEventDestroy(m_event));
+}
\ No newline at end of file
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event.hpp
new file mode 100644
index 00000000000000..8c8131853c0b69
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event.hpp
@@ -0,0 +1,46 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_base_event.hpp"
+
+namespace cldnn {
+namespace ze {
+
+
+// L0 counter based event.
+// Signaled state is inferred from the number of tasks completed on device.
+// Resetting counter based event is not allowed.
+// Start in signaled state and signaling from host is not allowed.
+// Can only be used with in-order command lists.
+struct ze_counter_based_event : public ze_base_event {
+public:
+    // Take ownership of counter based event handle
+    ze_counter_based_event(uint64_t queue_stamp, const ze_base_event_factory& factory, ze_event_handle_t ev)
+    : ze_base_event(queue_stamp)
+    , m_factory(factory)
+    , m_event(ev) {
+        // Ensure event handle is not null
+        if (ev == nullptr) {
+            OPENVINO_THROW("[GPU] Trying to create event with null handle");
+        }
+    }
+    ze_counter_based_event(const ze_counter_based_event&) = delete;
+    ze_counter_based_event& operator=(const ze_counter_based_event&) = delete;
+    ~ze_counter_based_event();
+    void wait_impl() override;
+    void set_impl() override;
+    bool is_set_impl() override;
+    ze_event_handle_t get_handle() const override;
+    std::optional<ze_kernel_timestamp_result_t> query_timestamp() override;
+    bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override;
+
+protected:
+    const ze_base_event_factory& m_factory;
+    ze_event_handle_t m_event;
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event_factory.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event_factory.cpp
new file mode 100644
index 00000000000000..09ad6800869af5
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event_factory.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_counter_based_event_factory.hpp"
+#include "ze_common.hpp"
+#include "ze_counter_based_event.hpp"
+
+#include "zex_event.h"
+
+using namespace cldnn;
+using namespace ze;
+namespace {
+    decltype(zexCounterBasedEventCreate2) *func_zexCounterBasedEventCreate2 = nullptr;
+    void find_function_address(ze_driver_handle_t driver) {
+        OV_ZE_EXPECT(zeDriverGetExtensionFunctionAddress(driver,
+                                                "zexCounterBasedEventCreate2",
+                                                reinterpret_cast<void **>(&func_zexCounterBasedEventCreate2)));
+    }
+}
+
+ze_counter_based_event_factory::ze_counter_based_event_factory(const ze_engine &engine, bool enable_profiling)
+    : ze_base_event_factory(engine, enable_profiling) {
+    if (func_zexCounterBasedEventCreate2 == nullptr) {
+        find_function_address(engine.get_driver());
+    }
+}
+
+event::ptr ze_counter_based_event_factory::create_event(uint64_t queue_stamp) {
+    std::lock_guard<std::mutex> lock(_mutex);
+
+    ze_event_handle_t event;
+    auto desc = defaultIntelCounterBasedEventDesc;
+    if (is_profiling_enabled()) {
+        desc.flags |= ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_TIMESTAMP;
+    }
+    OV_ZE_EXPECT(func_zexCounterBasedEventCreate2(m_engine.get_context(), m_engine.get_device(), &desc, &event));
+    auto cb_event = std::make_shared<ze_counter_based_event>(queue_stamp, *this, event);
+    return cb_event;
+}
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event_factory.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event_factory.hpp
new file mode 100644
index 00000000000000..6a1c93728d93dc
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_counter_based_event_factory.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_base_event_factory.hpp"
+
+#include <mutex>
+
+namespace cldnn {
+namespace ze {
+
+// Interface for creating l0 counter based events
+// Should only be used with in-order queue
+struct ze_counter_based_event_factory : public ze_base_event_factory {
+public:
+    ze_counter_based_event_factory(const ze_engine &engine, bool enable_profiling);
+    event::ptr create_event(uint64_t queue_stamp) override;
+protected:
+    std::mutex _mutex;
+};
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_device.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_device.cpp
new file mode 100644
index 00000000000000..e6f965d6416929
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_device.cpp
@@ -0,0 +1,373 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_device.hpp"
+#include "ze_common.hpp"
+#include "zex_common.h"
+
+#include <ze_api.h>
+#include <ze_intel_gpu.h>
+#include <vector>
+#include <algorithm>
+#include <cassert>
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#include <SetupAPI.h>
+#include <devguid.h>
+#include <cstring>
+#else
+#include <unistd.h>
+#include <limits.h>
+#include <link.h>
+#include <dlfcn.h>
+#endif
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+#include "gpu/intel/jit/generator.hpp"
+#endif
+
+namespace cldnn {
+namespace ze {
+
+namespace {
+#ifdef ENABLE_ONEDNN_FOR_GPU
+//TODO merge this with ocl_device
+gpu_arch convert_ngen_arch(ngen::HW gpu_arch) {
+    switch (gpu_arch) {
+        case ngen::HW::Gen9: return gpu_arch::gen9;
+        case ngen::HW::Gen11: return gpu_arch::gen11;
+        case ngen::HW::XeLP: return gpu_arch::xe_lp;
+        case ngen::HW::XeHP: return gpu_arch::xe_hp;
+        case ngen::HW::XeHPG: return gpu_arch::xe_hpg;
+        case ngen::HW::XeHPC: return gpu_arch::xe_hpc;
+        case ngen::HW::Xe2: return gpu_arch::xe2;
+        case ngen::HW::Xe3: return gpu_arch::xe3;
+        case ngen::HW::Gen10:
+        case ngen::HW::Unknown: return gpu_arch::unknown;
+    }
+    return gpu_arch::unknown;
+}
+#endif
+
+gfx_version parse_version(uint32_t gmdid) {
+    union GMDID {
+        uint32_t value;
+        struct {
+            uint32_t revision : 6;
+            uint32_t reserved : 8;
+            uint32_t release : 8;
+            uint32_t architecture : 10;
+        };
+    };
+
+    GMDID gmd_id = {gmdid};
+    if (gmd_id.architecture > 0 && gmd_id.architecture < 100) {
+        // New format
+        return { static_cast<uint16_t>(gmd_id.architecture), static_cast<uint8_t>(gmd_id.release), static_cast<uint8_t>(gmd_id.revision)};
+    } else {
+        // Old format
+        uint32_t ver = gmdid;
+        uint16_t major = ver >> 16;
+        uint8_t minor = (ver >> 8) & 0xFF;
+        uint8_t revision = ver & 0xFF;
+
+        return {major, minor, revision};
+    }
+}
+
+bool supports_extension(const std::vector<ze_driver_extension_properties_t>& extensions, const std::string& ext_name, uint32_t ext_ver) {
+    return std::find_if(extensions.begin(), extensions.end(), [&ext_name, &ext_ver](const ze_driver_extension_properties_t& ep) {
+        return std::string(ep.name) == ext_name && ep.version == ext_ver;
+    }) != extensions.end();
+}
+
+device_info init_device_info(ze_driver_handle_t driver, ze_device_handle_t device) {
+    device_info info;
+
+    uint32_t num_ext = 0;
+    OV_ZE_EXPECT(zeDriverGetExtensionProperties(driver, &num_ext, nullptr));
+
+    std::vector<ze_driver_extension_properties_t> extensions(num_ext);
+    OV_ZE_EXPECT(zeDriverGetExtensionProperties(driver, &num_ext, &extensions[0]));
+
+    ze_driver_properties_t driver_properties{ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES};
+    OV_ZE_EXPECT(zeDriverGetProperties(driver, &driver_properties));
+
+    bool supports_luid = supports_extension(extensions, ZE_DEVICE_LUID_EXT_NAME, ZE_DEVICE_LUID_EXT_VERSION_1_0);
+    bool supports_ip_version = supports_extension(extensions, ZE_DEVICE_IP_VERSION_EXT_NAME, ZE_DEVICE_IP_VERSION_VERSION_1_0);
+    bool supports_mutable_list = supports_extension(extensions, ZE_MUTABLE_COMMAND_LIST_EXP_NAME, ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_0);
+    bool supports_pci_properties = supports_extension(extensions, ZE_PCI_PROPERTIES_EXT_NAME, ZE_PCI_PROPERTIES_EXT_VERSION_1_0);
+    bool supports_counter_based_events = supports_extension(extensions, ZEX_COUNTER_BASED_EVENT_EXT_NAME, ZEX_COUNTER_BASED_EVENT_VERSION_1_0);
+    bool supports_cp_offload =
+        supports_extension(extensions, ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_NAME, ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_VERSION_1_0);
+    bool supports_dp_properties =
+        supports_extension(extensions, ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_NAME, ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_VERSION_1_0);
+
+    void *device_properties_next = nullptr;
+    ze_device_ip_version_ext_t ip_version_properties = {ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT, device_properties_next, 0};
+    if (supports_ip_version) {
+        device_properties_next = &ip_version_properties;
+    }
+    ze_device_properties_t device_properties{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, device_properties_next};
+    OV_ZE_EXPECT(zeDeviceGetProperties(device, &device_properties));
+
+    ze_device_compute_properties_t device_compute_properties{ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES};
+    OV_ZE_EXPECT(zeDeviceGetComputeProperties(device, &device_compute_properties));
+
+    uint32_t queue_properties_count = 0;
+    OV_ZE_EXPECT(zeDeviceGetCommandQueueGroupProperties(device, &queue_properties_count, nullptr));
+
+    std::vector<ze_command_queue_group_properties_t> queue_properties(queue_properties_count);
+    for (auto& mp : queue_properties) {
+        mp.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES;
+    }
+
+    OV_ZE_EXPECT(zeDeviceGetCommandQueueGroupProperties(device, &queue_properties_count, &queue_properties[0]));
+
+    auto compute_queue_props = std::find_if(queue_properties.begin(), queue_properties.end(), [](const ze_command_queue_group_properties_t& qp) {
+        return (qp.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) != 0;
+    });
+
+    OPENVINO_ASSERT(compute_queue_props != queue_properties.end());
+
+    uint32_t memory_properties_count = 0;
+    OV_ZE_EXPECT(zeDeviceGetMemoryProperties(device, &memory_properties_count, nullptr));
+
+    std::vector<ze_device_memory_properties_t> device_memory_properties(memory_properties_count);
+    for (auto& mp : device_memory_properties) {
+        mp.stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
+    }
+    OV_ZE_EXPECT(zeDeviceGetMemoryProperties(device, &memory_properties_count, &device_memory_properties[0]));
+
+    ze_device_memory_access_properties_t device_memory_access_properties{ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES};
+    OV_ZE_EXPECT(zeDeviceGetMemoryAccessProperties(device, &device_memory_access_properties));
+
+    auto mem_properties = std::find_if(device_memory_properties.begin(), device_memory_properties.end(), [](const ze_device_memory_properties_t& p) {
+        auto name = std::string(p.name);
+        return name == "DDR" || name == "HBM";
+    });
+
+    ze_device_module_properties_t device_module_properties{ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES};
+    ze_intel_device_module_dp_exp_properties_t dp_properties{ZE_STRUCTURE_INTEL_DEVICE_MODULE_DP_EXP_PROPERTIES, nullptr};
+    if (supports_dp_properties) {
+        device_module_properties.pNext = &dp_properties;
+    }
+    OV_ZE_EXPECT(zeDeviceGetModuleProperties(device, &device_module_properties));
+
+    ze_device_image_properties_t device_image_properties{ZE_STRUCTURE_TYPE_DEVICE_IMAGE_PROPERTIES};
+    OV_ZE_EXPECT(zeDeviceGetImageProperties(device, &device_image_properties));
+
+    info.vendor_id = device_properties.vendorId;
+    info.dev_name = device_properties.name;
+    // L0 returns drivers version in different format than OCL
+    info.driver_version = std::to_string(driver_properties.driverVersion);
+    info.dev_type = (device_properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) ? device_type::integrated_gpu : device_type::discrete_gpu;
+
+    info.execution_units_count = device_properties.numEUsPerSubslice * device_properties.numSubslicesPerSlice * device_properties.numSlices;
+
+    info.gpu_frequency = device_properties.coreClockRate;
+
+    info.supported_simd_sizes.resize(device_compute_properties.numSubGroupSizes);
+    std::copy_n(device_compute_properties.subGroupSizes, device_compute_properties.numSubGroupSizes, info.supported_simd_sizes.begin());
+    info.has_separate_cache = true;
+
+    info.max_work_group_size = device_compute_properties.maxTotalGroupSize;
+    info.max_local_mem_size = device_compute_properties.maxSharedLocalMemory;
+    uint32_t cache_properties_count = 0;
+    OV_ZE_EXPECT(zeDeviceGetCacheProperties(device, &cache_properties_count, nullptr));
+    info.max_global_cache_size = 0;
+    if (cache_properties_count > 0) {
+        std::vector<ze_device_cache_properties_t> cache_properties(cache_properties_count);
+        OV_ZE_EXPECT(zeDeviceGetCacheProperties(device, &cache_properties_count, cache_properties.data()));
+        // Assume first property is L3 cache
+        info.max_global_cache_size = cache_properties[0].cacheSize;
+    }
+
+    if (mem_properties != device_memory_properties.end()) {
+        info.max_global_mem_size = mem_properties->totalSize;
+        info.device_memory_ordinal = std::distance(device_memory_properties.begin(), mem_properties);
+    } else {
+        info.max_global_mem_size = 0;
+        info.device_memory_ordinal = 0;
+    }
+
+    info.max_alloc_mem_size = device_properties.maxMemAllocSize;
+
+    info.supports_image = device_image_properties.maxSamplers > 0;
+    info.max_image2d_width = device_image_properties.maxImageDims2D;
+    info.max_image2d_height = device_image_properties.maxImageDims2D;
+
+    info.supports_fp16 = (device_module_properties.flags & ZE_DEVICE_MODULE_FLAG_FP16) != 0;
+    info.supports_fp64 = (device_module_properties.flags & ZE_DEVICE_MODULE_FLAG_FP64) != 0;
+    info.supports_fp16_denorms = info.supports_fp16 && (device_module_properties.fp16flags & ZE_DEVICE_FP_FLAG_DENORM) != 0;
+
+    info.supports_cp_offload = supports_cp_offload;
+    info.supports_counter_based_events = supports_counter_based_events;
+
+    info.supports_imad = (device_module_properties.flags & ZE_DEVICE_MODULE_FLAG_DP4A) != 0;
+    info.supports_immad = supports_dp_properties && (dp_properties.flags & ZE_INTEL_DEVICE_MODULE_EXP_FLAG_DPAS) != 0;
+
+    info.supports_usm = device_memory_access_properties.hostAllocCapabilities && device_memory_access_properties.deviceAllocCapabilities;
+
+    // FIXME: Could not find how to retrieve those from L0
+    info.supports_work_group_collective_functions = false;
+    info.supports_intel_planar_yuv = false;
+    info.supports_khr_subgroups = true;
+    info.supports_intel_subgroups = true;
+    info.supports_intel_subgroups_short = true;
+    info.supports_intel_subgroups_char = true;
+    info.supports_intel_required_subgroup_size = true;
+    info.supports_queue_families = true;
+
+    if (supports_ip_version) {
+        info.ip_version = ip_version_properties.ipVersion;
+        info.gfx_ver = parse_version(ip_version_properties.ipVersion);
+    }
+    info.sub_device_idx = (std::numeric_limits<uint32_t>::max)();
+
+    info.device_id = device_properties.deviceId;
+    info.num_slices = device_properties.numSlices;
+    info.num_sub_slices_per_slice = device_properties.numSubslicesPerSlice;
+    info.num_eus_per_sub_slice = device_properties.numEUsPerSubslice;
+    info.num_threads_per_eu = device_properties.numThreadsPerEU;
+
+    info.num_ccs = compute_queue_props->numQueues;
+
+    info.kernel_timestamp_valid_bits  = device_properties.kernelTimestampValidBits;
+    info.timer_resolution  = device_properties.timerResolution;
+    info.compute_queue_group_ordinal = std::distance(queue_properties.begin(), compute_queue_props);
+
+    static_assert(ZE_MAX_DEVICE_UUID_SIZE == ov::device::UUID::MAX_UUID_SIZE, "");
+    static_assert(ZE_MAX_DEVICE_LUID_SIZE_EXT == ov::device::LUID::MAX_LUID_SIZE, "");
+    std::copy_n(&device_properties.uuid.id[0], ZE_MAX_DEVICE_UUID_SIZE, info.uuid.uuid.begin());
+
+    if (supports_luid) {
+        ze_device_luid_ext_properties_t luid_props{ZE_STRUCTURE_TYPE_DEVICE_LUID_EXT_PROPERTIES, nullptr};
+        ze_device_properties_t device_properties{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &luid_props};
+        if (zeDeviceGetProperties(device, &device_properties) == ZE_RESULT_SUCCESS)
+            std::copy_n(&luid_props.luid.id[0], ZE_MAX_DEVICE_LUID_SIZE_EXT, info.luid.luid.begin());
+    }
+
+    info.supports_mutable_command_list = false;
+    if (supports_mutable_list) {
+        ze_mutable_command_list_exp_properties_t mutable_list_props = { ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_PROPERTIES,  nullptr, 0, 0 };
+        ze_device_properties_t device_properties{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, &mutable_list_props};
+        if (zeDeviceGetProperties(device, &device_properties) == ZE_RESULT_SUCCESS) {
+            ze_mutable_command_exp_flags_t required_features = ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION |
+                                                               ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS |
+                                                               ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT |
+                                                               ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE |
+                                                               ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET |
+                                                               ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT |
+                                                               ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS;
+
+            info.supports_mutable_command_list = (mutable_list_props.mutableCommandFlags & required_features) == required_features;
+        }
+    }
+    if (supports_pci_properties) {
+        ze_pci_ext_properties_t pci_properties{ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES, nullptr};
+        if (zeDevicePciGetPropertiesExt(device, &pci_properties) == ZE_RESULT_SUCCESS) {
+            info.pci_info.pci_bus = pci_properties.address.bus;
+            info.pci_info.pci_device = pci_properties.address.device;
+            info.pci_info.pci_domain = pci_properties.address.domain;
+            info.pci_info.pci_function = pci_properties.address.function;
+        }
+    }
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+    using namespace dnnl::impl::gpu::intel::jit;
+    // Create temporary context just for OneDNN HW detection
+    ze_context_desc_t context_desc = { ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0 };
+    ze_context_handle_t context;
+    OV_ZE_EXPECT(zeContextCreate(driver, &context_desc, &context));
+    ngen::Product product = ngen::LevelZeroCodeGenerator<ngen::HW::Unknown>::detectHWInfo(context, device);
+    zeContextDestroy(context);
+    info.arch = convert_ngen_arch(ngen::getCore(product.family));
+
+    if (product.family == ngen::ProductFamily::Unknown) {
+        info.supports_immad = false;
+    }
+#else  // ENABLE_ONEDNN_FOR_GPU
+    info.arch = gpu_arch::unknown;
+#endif  // ENABLE_ONEDNN_FOR_GPU
+
+    return info;
+}
+
+memory_capabilities init_memory_caps(ze_device_handle_t device, const device_info& info) {
+    std::vector<allocation_type> memory_caps;
+
+    ze_device_memory_access_properties_t device_memory_access_properties{ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES};
+    OV_ZE_EXPECT(zeDeviceGetMemoryAccessProperties(device, &device_memory_access_properties));
+
+    if (info.supports_usm) {
+        if (device_memory_access_properties.hostAllocCapabilities) {
+            memory_caps.push_back(allocation_type::usm_host);
+        }
+        if (device_memory_access_properties.sharedSingleDeviceAllocCapabilities) {
+            memory_caps.push_back(allocation_type::usm_shared);
+        }
+        if (device_memory_access_properties.deviceAllocCapabilities) {
+            memory_caps.push_back(allocation_type::usm_device);
+        }
+    }
+
+    return memory_capabilities(memory_caps);
+}
+
+}  // namespace
+
+
+ze_device::ze_device(ze_driver_handle_t driver, ze_device_handle_t device, bool initialize)
+: _driver(driver)
+, _device(device)
+, _info(init_device_info(driver, device))
+, _mem_caps(init_memory_caps(device, _info)) {
+    if (initialize) {
+        this->initialize();
+    }
+}
+
+void ze_device::initialize() {
+    if (_is_initialized)
+        return;
+
+    ze_context_desc_t context_desc = { ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0 };
+    OV_ZE_EXPECT(zeContextCreate(_driver, &context_desc, &_context));
+    _is_initialized = true;
+}
+
+bool ze_device::is_initialized() const {
+    return _is_initialized;
+}
+
+bool ze_device::is_same(const device::ptr other) {
+    auto casted = downcast<ze_device>(other.get());
+    if (!casted)
+        return false;
+
+    if (is_initialized() && casted->is_initialized()) {
+        // Do not compare contexts as one driver can have many different contexts
+        return _device == casted->get_device() && _driver == casted->get_driver();
+    }
+    return _info.is_same_device(casted->_info);
+}
+
+void ze_device::set_mem_caps(const memory_capabilities& memory_capabilities) {
+    _mem_caps = memory_capabilities;
+}
+
+ze_device::~ze_device() {
+    //FIXME segfault
+    //if (_is_initialized)
+    //    zeContextDestroy(_context);
+}
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_device.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_device.hpp
new file mode 100644
index 00000000000000..1a20685ed2cc77
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_device.hpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/device.hpp"
+#include <ze_api.h>
+
+namespace cldnn {
+namespace ze {
+
+struct ze_device : public device {
+public:
+    ze_device(ze_driver_handle_t driver, ze_device_handle_t device, bool initialize = true);
+
+    const device_info& get_info() const override { return _info; }
+    memory_capabilities get_mem_caps() const override { return _mem_caps; }
+
+    void initialize() override;
+    bool is_initialized() const override;
+
+    const ze_driver_handle_t get_driver() const { return _driver; }
+    const ze_device_handle_t get_device() const { return _device; }
+    const ze_context_handle_t get_context() const { return _context; }
+
+    bool is_same(const device::ptr other) override;
+    void set_mem_caps(const memory_capabilities& memory_capabilities) override;
+
+    ~ze_device();
+
+private:
+    ze_driver_handle_t _driver = nullptr;
+    ze_device_handle_t _device = nullptr;
+    ze_context_handle_t _context = nullptr;
+    bool _is_initialized = false;
+
+    device_info _info;
+    memory_capabilities _mem_caps;
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_device_detector.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_device_detector.cpp
new file mode 100644
index 00000000000000..a40f4a7b690185
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_device_detector.cpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_device_detector.hpp"
+#include "ze_device.hpp"
+#include "ze_common.hpp"
+#include <ze_api.h>
+#include "intel_gpu/runtime/debug_configuration.hpp"
+#include "openvino/core/except.hpp"
+
+#include <vector>
+
+namespace cldnn {
+namespace ze {
+
+static std::vector<ze_device_handle_t> get_sub_devices(ze_device_handle_t root_device) {
+    uint32_t n_subdevices = 0;
+    OV_ZE_EXPECT(zeDeviceGetSubDevices(root_device, &n_subdevices, nullptr));
+    if (n_subdevices == 0)
+        return {};
+
+    std::vector<ze_device_handle_t> subdevices(n_subdevices);
+
+    OV_ZE_EXPECT(zeDeviceGetSubDevices(root_device, &n_subdevices, &subdevices[0]));
+
+    return subdevices;
+}
+
+std::map<std::string, device::ptr> ze_device_detector::get_available_devices(void* user_context,
+                                                                             void* user_device,
+                                                                             int ctx_device_id,
+                                                                             int target_tile_id,
+                                                                             bool initialize_devices) const {
+    std::vector<device::ptr> devices_list;
+    if (user_context != nullptr) {
+        devices_list = create_device_list_from_user_context(user_context, ctx_device_id);
+    } else if (user_device != nullptr) {
+        devices_list = create_device_list_from_user_device(user_device);
+    } else {
+        devices_list = create_device_list(initialize_devices);
+    }
+
+    devices_list = sort_devices(devices_list);
+
+    std::map<std::string, device::ptr> ret;
+    uint32_t idx = 0;
+    for (auto& dptr : devices_list) {
+        auto map_id = std::to_string(idx++);
+        ret[map_id] = dptr;
+
+        auto root_device = std::dynamic_pointer_cast<ze_device>(dptr);
+        OPENVINO_ASSERT(root_device != nullptr, "[GPU] Invalid device type created in ocl_device_detector");
+
+        auto sub_devices = get_sub_devices(root_device->get_device());
+        if (!sub_devices.empty()) {
+            uint32_t sub_idx = 0;
+            for (auto& sub_device : sub_devices) {
+                if (target_tile_id != -1 && static_cast<int>(sub_idx) != target_tile_id) {
+                    sub_idx++;
+                    continue;
+                }
+                auto sub_device_ptr = std::make_shared<ze_device>(root_device->get_driver(), sub_device, initialize_devices);
+                ret[map_id + "." + std::to_string(sub_idx++)] = sub_device_ptr;
+            }
+        }
+    }
+
+    return ret;
+}
+
+std::vector<device::ptr> ze_device_detector::create_device_list(bool initialize_devices) const {
+    std::vector<device::ptr> ret;
+
+    OV_ZE_EXPECT(zeInit(ZE_INIT_FLAG_GPU_ONLY));
+
+    uint32_t driver_count = 0;
+    OV_ZE_EXPECT(zeDriverGet(&driver_count, nullptr));
+
+    std::vector<ze_driver_handle_t> all_drivers(driver_count);
+    OV_ZE_EXPECT(zeDriverGet(&driver_count, &all_drivers[0]));
+
+    for (uint32_t i = 0; i < driver_count; ++i) {
+        uint32_t device_count = 0;
+        OV_ZE_EXPECT(zeDeviceGet(all_drivers[i], &device_count, nullptr));
+
+        std::vector<ze_device_handle_t> all_devices(device_count);
+        OV_ZE_EXPECT(zeDeviceGet(all_drivers[i], &device_count, &all_devices[0]));
+
+        for (uint32_t d = 0; d < device_count; ++d) {
+            try {
+                ze_device_properties_t device_properties{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
+                OV_ZE_EXPECT(zeDeviceGetProperties(all_devices[d], &device_properties));
+
+                if (ZE_DEVICE_TYPE_GPU == device_properties.type) {
+                    ret.emplace_back(std::make_shared<ze_device>(all_drivers[i], all_devices[d], initialize_devices));
+                }
+            } catch (std::exception& ex) {
+                GPU_DEBUG_LOG << "Devices query/creation failed for driver " << i << ex.what() << std::endl;
+                GPU_DEBUG_LOG << "Platform is skipped" << std::endl;
+                continue;
+            }
+        }
+    }
+
+    return ret;
+}
+
+std::vector<device::ptr> ze_device_detector::create_device_list_from_user_context(void* user_context, int ctx_device_id) const {
+    OPENVINO_NOT_IMPLEMENTED;
+}
+
+std::vector<device::ptr> ze_device_detector::create_device_list_from_user_device(void* user_device) const {
+    OPENVINO_NOT_IMPLEMENTED;
+}
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_device_detector.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_device_detector.hpp
new file mode 100644
index 00000000000000..deeefb36234a59
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_device_detector.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/device.hpp"
+
+#include <string>
+#include <vector>
+#include <map>
+
+namespace cldnn {
+namespace ze {
+
+class ze_device_detector {
+public:
+    ze_device_detector() = default;
+
+    std::map<std::string, device::ptr> get_available_devices(void* user_context,
+                                                             void* user_device,
+                                                             int ctx_device_id,
+                                                             int target_tile_id,
+                                                             bool initialize_devices = false) const;
+private:
+    std::vector<device::ptr> create_device_list(bool initialize_devices) const;
+    std::vector<device::ptr> create_device_list_from_user_context(void* user_context, int ctx_device_id = 0) const;
+    std::vector<device::ptr> create_device_list_from_user_device(void* user_device) const;
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_empty_event.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_empty_event.hpp
new file mode 100644
index 00000000000000..0d16bfeb9d2280
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_empty_event.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_base_event.hpp"
+
+namespace cldnn {
+namespace ze {
+
+
+// Event that does not have underlying Level Zero event object.
+// It is always in signaled state.
+struct ze_empty_event : public ze_base_event {
+public:
+    ze_empty_event(uint64_t queue_stamp)
+    : ze_base_event(queue_stamp) { }
+
+    void wait_impl() override { }
+    void set_impl() override { }
+    bool is_set_impl() override { return true; }
+    ze_event_handle_t get_handle() const override { return nullptr; }
+    std::optional<ze_kernel_timestamp_result_t> query_timestamp() override { return std::nullopt; }
+    bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override {
+        return true;
+    }
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_engine.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_engine.cpp
new file mode 100644
index 00000000000000..5168e0364c4bd8
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_engine.cpp
@@ -0,0 +1,180 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_engine.hpp"
+#include "intel_gpu/runtime/utils.hpp"
+#include "openvino/core/except.hpp"
+#include "ze_kernel_builder.hpp"
+#include "ze_api.h"
+#include "ze_engine_factory.hpp"
+#include "ze_common.hpp"
+#include "ze_memory.hpp"
+#include "ze_stream.hpp"
+#include "ze_device.hpp"
+#include "ze_kernel.hpp"
+#include "ze_module_holder.hpp"
+#include "ze_kernel_holder.hpp"
+#include <exception>
+#include <vector>
+#include <memory>
+#include <stdexcept>
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+#include <oneapi/dnnl/dnnl_l0.hpp>
+#endif
+namespace cldnn {
+namespace ze {
+
+ze_engine::ze_engine(const device::ptr dev, runtime_types runtime_type)
+    : engine(dev) {
+    OPENVINO_ASSERT(runtime_type == runtime_types::ze, "[GPU] Invalid runtime type specified for ZE engine. Only ZE runtime is supported");
+
+    auto casted = dynamic_cast<ze_device*>(dev.get());
+    OPENVINO_ASSERT(casted, "[GPU] Invalid device type passed to ze engine");
+
+    _service_stream.reset(new ze_stream(*this, ExecutionConfig()));
+}
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+void ze_engine::create_onednn_engine(const ExecutionConfig& config) {
+    const std::lock_guard<std::mutex> lock(onednn_mutex);
+    OPENVINO_ASSERT(_device->get_info().vendor_id == INTEL_VENDOR_ID, "[GPU] OneDNN engine can be used for Intel GPUs only");
+    if (!_onednn_engine) {
+        auto casted = std::dynamic_pointer_cast<ze_device>(_device);
+        _onednn_engine = std::make_shared<dnnl::engine>(dnnl::l0_interop::make_engine(casted->get_driver(), casted->get_device(), casted->get_context()));
+    }
+}
+#endif
+
+const ze_driver_handle_t ze_engine::get_driver() const {
+    auto casted = std::dynamic_pointer_cast<ze_device>(_device);
+    OPENVINO_ASSERT(casted, "[GPU] Invalid device type for ze_engine");
+    return casted->get_driver();
+}
+
+const ze_context_handle_t ze_engine::get_context() const {
+    auto casted = std::dynamic_pointer_cast<ze_device>(_device);
+    OPENVINO_ASSERT(casted, "[GPU] Invalid device type for ze_engine");
+    return casted->get_context();
+}
+
+const ze_device_handle_t ze_engine::get_device() const {
+    auto casted = std::dynamic_pointer_cast<ze_device>(_device);
+    OPENVINO_ASSERT(casted, "[GPU] Invalid device type for ze_engine");
+    return casted->get_device();
+}
+
+allocation_type ze_engine::detect_usm_allocation_type(const void* memory) const {
+    return ze::gpu_usm::detect_allocation_type(this, memory);
+}
+
+memory::ptr ze_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) {
+    OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate memory for dynamic layout");
+
+    check_allocatable(layout, type);
+
+    try {
+        memory::ptr res = std::make_shared<ze::gpu_usm>(this, layout, type);
+
+        if (reset || res->is_memory_reset_needed(layout)) {
+            auto ev = res->fill(get_service_stream());
+            if (ev) {
+                get_service_stream().wait_for_events({ev});
+            }
+        }
+
+        return res;
+    } catch (const std::exception& e) {
+        OPENVINO_THROW("[GPU] Failed to allocate memory: ", e.what());
+    }
+}
+
+memory::ptr ze_engine::reinterpret_buffer(const memory& memory, const layout& new_layout) {
+    OPENVINO_ASSERT(memory.get_engine() == this, "[GPU] trying to reinterpret buffer allocated by a different engine");
+    OPENVINO_ASSERT(new_layout.format.is_image() == memory.get_layout().format.is_image(),
+                    "[GPU] trying to reinterpret between image and non-image layouts. Current: ",
+                    memory.get_layout().format.to_string(), " Target: ", new_layout.format.to_string());
+
+    if (memory_capabilities::is_usm_type(memory.get_allocation_type())) {
+            return std::make_shared<ze::gpu_usm>(this,
+                                     new_layout,
+                                     reinterpret_cast<const ze::gpu_usm&>(memory).get_buffer(),
+                                     memory.get_allocation_type(),
+                                     memory.get_mem_tracker());
+    }
+
+    OPENVINO_THROW("[GPU] Trying to reinterpret non usm buffer");
+}
+
+memory::ptr ze_engine::reinterpret_handle(const layout& new_layout, shared_mem_params params) {
+    if (params.mem_type == shared_mem_type::shared_mem_usm) {
+        ze::UsmMemory usm_buffer(get_context(), get_device(), params.mem);
+        size_t actual_mem_size = 0;
+        zeMemGetAddressRange(get_context(), params.mem, nullptr, &actual_mem_size);
+        auto requested_mem_size = new_layout.bytes_count();
+        OPENVINO_ASSERT(actual_mem_size >= requested_mem_size,
+                            "[GPU] shared USM buffer has smaller size (", actual_mem_size,
+                            ") than specified layout (", requested_mem_size, ")");
+        return std::make_shared<ze::gpu_usm>(this, new_layout, usm_buffer, nullptr);
+    } else {
+        OPENVINO_THROW("[GPU] Unsupported shared memory type: ", params.mem_type);
+    }
+}
+
+memory_ptr ze_engine::create_subbuffer(const memory& memory, const layout& new_layout, size_t byte_offset) {
+    OPENVINO_ASSERT(memory.get_engine() == this, "[GPU] Trying to create a subbuffer from a buffer allocated by a different engine");
+    if (new_layout.format.is_image_2d()) {
+        OPENVINO_NOT_IMPLEMENTED;
+    }
+    OPENVINO_ASSERT(memory_capabilities::is_usm_type(memory.get_allocation_type()), "[GPU] Trying to create subbuffer for non usm memory");
+    auto& new_buf = reinterpret_cast<const ze::gpu_usm&>(memory);
+    auto ptr = new_buf.get_buffer().get();
+    auto sub_buffer = ze::UsmMemory(get_context(), get_device(), ptr, byte_offset);
+    return std::make_shared<ze::gpu_usm>(this,
+                             new_layout,
+                             sub_buffer,
+                             memory.get_allocation_type(),
+                             memory.get_mem_tracker());
+}
+
+bool ze_engine::is_the_same_buffer(const memory& mem1, const memory& mem2) {
+    if (mem1.get_engine() != this || mem2.get_engine() != this)
+        return false;
+    if (mem1.get_allocation_type() != mem2.get_allocation_type())
+        return false;
+    if (&mem1 == &mem2)
+        return true;
+
+    return (reinterpret_cast<const ze::gpu_usm&>(mem1).get_buffer().get() == reinterpret_cast<const ze::gpu_usm&>(mem2).get_buffer().get());
+}
+
+std::shared_ptr<kernel_builder> ze_engine::create_kernel_builder() const {
+    auto casted = std::dynamic_pointer_cast<ze_device>(_device);
+    OPENVINO_ASSERT(casted, "[GPU] Invalid device type for ze_engine");
+    return std::make_shared<ze_kernel_builder>(*casted);
+}
+
+void* ze_engine::get_user_context() const {
+    auto& casted = downcast<ze_device>(*_device);
+    return static_cast<void*>(casted.get_context());
+}
+
+stream::ptr ze_engine::create_stream(const ExecutionConfig& config) const {
+    return std::make_shared<ze_stream>(*this, config);
+}
+
+stream::ptr ze_engine::create_stream(const ExecutionConfig& config, void* handle) const {
+    OPENVINO_NOT_IMPLEMENTED;
+}
+
+std::shared_ptr<cldnn::engine> ze_engine::create(const device::ptr device, runtime_types runtime_type) {
+    return std::make_shared<ze::ze_engine>(device, runtime_type);
+}
+
+std::shared_ptr<cldnn::engine> create_ze_engine(const device::ptr device, runtime_types runtime_type) {
+    return ze_engine::create(device, runtime_type);
+}
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_engine.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_engine.hpp
new file mode 100644
index 00000000000000..24d0736b3fbbfc
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_engine.hpp
@@ -0,0 +1,52 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ze_api.h>
+#include "intel_gpu/runtime/memory.hpp"
+#include "intel_gpu/runtime/engine.hpp"
+#include "intel_gpu/runtime/stream.hpp"
+#include "intel_gpu/runtime/device.hpp"
+
+#include <memory>
+
+namespace cldnn {
+namespace ze {
+
+class ze_engine : public engine {
+public:
+    ze_engine(const device::ptr dev, runtime_types runtime_type);
+    engine_types type() const override { return engine_types::ze; };
+    runtime_types runtime_type() const override { return runtime_types::ze; };
+
+    memory_ptr allocate_memory(const layout& layout, allocation_type type, bool reset = true) override;
+    memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) override;
+    memory_ptr create_subbuffer(const memory& memory, const layout& new_layout, size_t byte_offset) override;
+    memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) override;
+    bool is_the_same_buffer(const memory& mem1, const memory& mem2) override;
+
+    void* get_user_context() const override;
+
+    allocation_type get_default_allocation_type() const override { return allocation_type::usm_device; }
+    allocation_type detect_usm_allocation_type(const void* memory) const override;
+
+    const ze_context_handle_t get_context() const;
+    const ze_driver_handle_t get_driver() const;
+    const ze_device_handle_t get_device() const;
+
+    stream_ptr create_stream(const ExecutionConfig& config) const override;
+    stream_ptr create_stream(const ExecutionConfig& config, void *handle) const override;
+
+    std::shared_ptr<kernel_builder> create_kernel_builder() const override;
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+    void create_onednn_engine(const ExecutionConfig& config) override;
+#endif
+
+    static std::shared_ptr<cldnn::engine> create(const device::ptr device, runtime_types runtime_type);
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_engine_factory.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_engine_factory.hpp
new file mode 100644
index 00000000000000..abd2946a2d8e56
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_engine_factory.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/device.hpp"
+#include "intel_gpu/runtime/engine.hpp"
+
+namespace cldnn {
+namespace ze {
+
+// Factory for ze_engine creation. It's moved outside of ze_engine class to avoid possible L0 includes conflict
+// between different engines in engine.cpp file
+std::shared_ptr<cldnn::engine> create_ze_engine(const device::ptr device, runtime_types runtime_type);
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_event.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_event.cpp
new file mode 100644
index 00000000000000..6fb52b9cac2d30
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_event.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_event.hpp"
+#include "ze/ze_common.hpp"
+
+#include <cassert>
+#include <chrono>
+#include <list>
+
+using namespace cldnn;
+using namespace ze;
+
+void ze_event::reset() {
+    event::reset();
+    OV_ZE_EXPECT(zeEventHostReset(m_event));
+}
+
+void ze_event::wait_impl() {
+    OV_ZE_EXPECT(zeEventHostSynchronize(m_event, endless_wait));
+}
+
+void ze_event::set_impl() {
+    OV_ZE_EXPECT(zeEventHostSignal(m_event));
+}
+
+bool ze_event::is_set_impl() {
+    auto ret = zeEventQueryStatus(m_event);
+    switch (ret) {
+    case ZE_RESULT_SUCCESS:
+        return true;
+        break;
+    case ZE_RESULT_NOT_READY:
+        return false;
+        break;
+    default:
+        OPENVINO_THROW("[GPU] Query event returned unexpected value: ", std::to_string(ret));
+        break;
+    }
+}
+
+std::optional<ze_kernel_timestamp_result_t> ze_event::query_timestamp() {
+    if (!m_factory.is_profiling_enabled()) {
+        return std::nullopt;
+    }
+    ze_kernel_timestamp_result_t timestamp{};
+    OV_ZE_EXPECT(zeEventQueryKernelTimestamp(m_event, &timestamp));
+    return timestamp;
+}
+
+ze_event_handle_t ze_event::get_handle() const {
+    return m_event;
+}
+
+bool ze_event::get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) {
+    auto opt_timestamp = query_timestamp();
+    if (!opt_timestamp.has_value()) {
+        return true;
+    }
+    ze_kernel_timestamp_result_t timestamp = opt_timestamp.value();
+    auto &dev_info = m_factory.get_engine().get_device_info();
+    auto wallclock_time = timestamp_to_duration(dev_info, timestamp.global);
+    auto exec_time = timestamp_to_duration(dev_info, timestamp.context);
+
+    auto period_exec = std::make_shared<instrumentation::profiling_period_basic>(timestamp_to_duration(dev_info, timestamp.context));
+    auto period_submit = std::make_shared<instrumentation::profiling_period_basic>(wallclock_time - exec_time);
+
+    info.push_back({ instrumentation::profiling_stage::executing, period_exec });
+    info.push_back({ instrumentation::profiling_stage::submission, period_submit });
+
+    return true;
+}
+
+ze_event::~ze_event() {
+    OV_ZE_WARN(zeEventDestroy(m_event));
+}
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_event.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_event.hpp
new file mode 100644
index 00000000000000..676208490f92d1
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_event.hpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_base_event.hpp"
+#include "ze_event_pool.hpp"
+
+namespace cldnn {
+namespace ze {
+
+// L0 event. Can be either in signaled state or not signaled state.
+struct ze_event : public ze_base_event {
+public:
+    // Take ownership of event handle
+    ze_event(uint64_t queue_stamp, const ze_base_event_factory& factory, ze_event_handle_t ev, std::shared_ptr<ze_event_pool> event_pool)
+        : ze_base_event(queue_stamp)
+        , m_event_pool(event_pool)
+        , m_factory(factory)
+        , m_event(ev) {
+            // Ensure event handle is not null
+            OPENVINO_ASSERT(ev != nullptr, "[GPU] Trying to create event with null handle");
+        }
+    ze_event(const ze_event &) = delete;
+    ze_event& operator=(const ze_event &) = delete;
+    ~ze_event();
+    void reset() override;
+
+    std::optional<ze_kernel_timestamp_result_t> query_timestamp() override;
+    ze_event_handle_t get_handle() const override;
+    bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override;
+
+protected:
+    void wait_impl() override;
+    void set_impl() override;
+    bool is_set_impl() override;
+    // TODO: Implement add_event_handler_impl
+    // bool add_event_handler_impl(event_handler, void*) override;
+
+    std::shared_ptr<ze_event_pool> m_event_pool;
+    const ze_base_event_factory& m_factory;
+    ze_event_handle_t m_event;
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_event_factory.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_event_factory.cpp
new file mode 100644
index 00000000000000..28bfd4a5b980d6
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_event_factory.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_event_factory.hpp"
+#include "ze_common.hpp"
+#include "ze_event.hpp"
+
+#include "zex_event.h"
+
+using namespace cldnn;
+using namespace ze;
+
+ze_event_factory::ze_event_factory(const ze_engine &engine, bool enable_profiling, uint32_t capacity)
+: ze_base_event_factory(engine, enable_profiling)
+, m_current_pool(nullptr)
+, m_capacity(capacity)
+, m_num_used(0) { }
+
+event::ptr ze_event_factory::create_event(uint64_t queue_stamp) {
+    std::lock_guard<std::mutex> lock(_mutex);
+
+    if (m_num_used >= m_capacity || !m_current_pool) {
+        m_num_used = 0;
+        ze_event_pool_flags_t flags = is_profiling_enabled() ? ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP : 0;
+        flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
+        m_current_pool = std::make_shared<ze_event_pool>(m_engine, m_capacity, flags);
+    }
+
+    ze_event_handle_t event;
+    ze_event_desc_t event_desc = {
+        ZE_STRUCTURE_TYPE_EVENT_DESC,
+        nullptr,
+        m_num_used++,
+        ZE_EVENT_SCOPE_FLAG_HOST,
+        0
+    };
+    OV_ZE_EXPECT(zeEventCreate(m_current_pool->m_handle, &event_desc, &event));
+
+    return std::make_shared<ze_event>(queue_stamp, *this, event, m_current_pool);
+}
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_event_factory.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_event_factory.hpp
new file mode 100644
index 00000000000000..d54aff8ecce344
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_event_factory.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_base_event_factory.hpp"
+#include "ze_event_pool.hpp"
+
+#include "mutex"
+
+namespace cldnn {
+namespace ze {
+
+// Interface for creating l0 events using event pools
+struct ze_event_factory : public ze_base_event_factory {
+public:
+    ze_event_factory(const ze_engine &engine, bool enable_profiling, uint32_t capacity = 255);
+    event::ptr create_event(uint64_t queue_stamp) override;
+protected:
+    std::mutex _mutex;
+    std::shared_ptr<ze_event_pool> m_current_pool;
+    const uint32_t m_capacity;
+    uint32_t m_num_used;
+};
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_event_pool.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_event_pool.cpp
new file mode 100644
index 00000000000000..74e040c6831dda
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_event_pool.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_event_pool.hpp"
+#include "ze_event.hpp"
+#include "ze_common.hpp"
+
+namespace cldnn {
+namespace ze {
+
+ze_event_pool::ze_event_pool(const ze_engine& engine, uint32_t capacity, ze_event_pool_flags_t flags)
+    : m_engine(engine) {
+    ze_event_pool_desc_t event_pool_desc = {
+        ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
+        nullptr,
+        flags,
+        capacity
+    };
+    auto device = engine.get_device();
+    OV_ZE_EXPECT(zeEventPoolCreate(engine.get_context(), &event_pool_desc, 1, &device, &m_handle));
+}
+
+ze_event_pool::~ze_event_pool() {
+    zeEventPoolDestroy(m_handle);
+}
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_event_pool.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_event_pool.hpp
new file mode 100644
index 00000000000000..98d5f485b645cb
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_event_pool.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_engine.hpp"
+
+namespace cldnn {
+namespace ze {
+
+// RAII wrapper for Level Zero event pool
+struct ze_event_pool {
+    ze_event_pool(const ze_engine& engine, uint32_t capacity, ze_event_pool_flags_t flags);
+    ~ze_event_pool();
+    ze_event_pool(const ze_event_pool&) = delete;
+    ze_event_pool& operator=(const ze_event_pool&) = delete;
+
+    using ptr = std::shared_ptr<ze_event_pool>;
+
+    ze_event_pool_handle_t m_handle;
+    const ze_engine& m_engine;
+};
+
+}  // namespace ze
+}  // namespace cldnn
+
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_events.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_events.cpp
new file mode 100644
index 00000000000000..f71741c19f6c6c
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_events.cpp
@@ -0,0 +1,155 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_events.hpp"
+#include "ze_common.hpp"
+
+#include <cassert>
+#include <chrono>
+#include <list>
+
+using namespace cldnn;
+using namespace ze;
+
+void ze_events::wait_impl() {
+    if (_last_ze_event) {
+        OV_ZE_EXPECT(zeEventHostSynchronize(_last_ze_event, endless_wait));
+    }
+}
+
+void ze_events::set_impl() {
+    // Call wait_impl to be in line with ocl_events
+    wait_impl();
+}
+
+bool ze_events::is_set_impl() {
+    if (!_last_ze_event) {
+        return true;
+    }
+
+    auto ret = zeEventQueryStatus(_last_ze_event);
+    switch (ret) {
+    case ZE_RESULT_SUCCESS:
+        return true;
+        break;
+    case ZE_RESULT_NOT_READY:
+        return false;
+        break;
+    default:
+        OPENVINO_THROW("[GPU] Query event returned unexpected value: ", std::to_string(ret));
+        break;
+    }
+}
+
+bool ze_events::get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) {
+    // The goal is to sum up all disjoint durations of its projection on the time axis
+    std::vector<ze_kernel_timestamp_data_t> all_global_timestamps;
+    std::vector<ze_kernel_timestamp_data_t> all_context_timestamps;
+
+    auto add_or_merge = [](std::vector<ze_kernel_timestamp_data_t>& all_timestamps, const ze_kernel_timestamp_data_t& ts) {
+        auto it = all_timestamps.begin();
+        bool merged = false;
+        auto target_timestamp = ts;
+        while (it != all_timestamps.end()) {
+            auto& timestamp = *it;
+            bool disjoint = timestamp.kernelEnd < target_timestamp.kernelStart || timestamp.kernelStart > target_timestamp.kernelEnd;
+            bool equal = timestamp.kernelEnd == target_timestamp.kernelEnd && timestamp.kernelStart == target_timestamp.kernelStart;
+            if (!disjoint) {
+                if (equal) {
+                    if (!merged) {
+                        merged = true;
+                        break;
+                    } else {
+                        it = all_timestamps.erase(it);
+                    }
+                } else {
+                    if (!merged) {
+                        timestamp.kernelStart = std::min(timestamp.kernelStart, target_timestamp.kernelStart);
+                        timestamp.kernelEnd = std::max(timestamp.kernelEnd, target_timestamp.kernelEnd);
+                        target_timestamp = timestamp;
+                        merged = true;
+                        it++;
+                    } else {
+                        if (timestamp.kernelEnd > target_timestamp.kernelEnd) {
+                            it--;
+                            it->kernelEnd = target_timestamp.kernelEnd;
+                            it++;
+                        }
+                        it = all_timestamps.erase(it);
+                    }
+                }
+            } else {
+                it++;
+            }
+        }
+
+        if (!merged) {
+            all_timestamps.push_back(target_timestamp);
+        }
+    };
+
+    if (_events.empty())
+        return false;
+
+    auto device_info = _engine.get_device_info();
+
+    auto get_total_exec_time = [&device_info](std::vector<ze_kernel_timestamp_data_t>& all_timestamps) {
+        std::chrono::nanoseconds total_time{0};
+        for (const auto& ts : all_timestamps) {
+            total_time += timestamp_to_duration(device_info, ts);
+        }
+
+        return total_time;
+    };
+
+    // Submission time is calculated as difference between merged context and wallclock intervals
+    // May probably be more accurate if we sum all sub-intervals of wallclock timestamps not covered by execution intervals
+    using intervals_t = std::vector<ze_kernel_timestamp_data_t>;
+    auto get_submission_time = [&device_info](const intervals_t& s_timestamps,
+                                              const intervals_t& e_timestamps) {
+        auto get_minmax = [](const intervals_t& timestamps) {
+            uint64_t min_val = std::min(timestamps.begin(), timestamps.end(),
+                [](const intervals_t::const_iterator& lhs, const intervals_t::const_iterator& rhs) {
+                    return lhs->kernelStart < rhs->kernelStart;
+            })->kernelStart;
+            uint64_t max_val = std::max(timestamps.begin(), timestamps.end(),
+                [](const intervals_t::const_iterator& lhs, const intervals_t::const_iterator& rhs) {
+                    return lhs->kernelEnd < rhs->kernelEnd;
+            })->kernelEnd;
+
+            return ze_kernel_timestamp_data_t{min_val, max_val};
+        };
+
+        auto submission_interval = get_minmax(s_timestamps);
+        auto exec_interval = get_minmax(e_timestamps);
+
+        auto wallclock_time = timestamp_to_duration(device_info, submission_interval);
+        auto exec_time = timestamp_to_duration(device_info, exec_interval);
+
+        return wallclock_time - exec_time;
+    };
+
+    for (size_t i = 0; i < _events.size(); i++) {
+        auto be = downcast<ze_base_event>(_events[i].get());
+        auto opt_timestamp = be->query_timestamp();
+        if (!opt_timestamp.has_value()) {
+            continue;
+        }
+        ze_kernel_timestamp_result_t timestamp = opt_timestamp.value();
+
+        add_or_merge(all_global_timestamps, timestamp.global);
+        add_or_merge(all_context_timestamps, timestamp.context);
+    }
+
+    auto submit_time = get_submission_time(all_global_timestamps, all_context_timestamps);
+    auto exec_time = get_total_exec_time(all_context_timestamps);
+
+    auto period_exec = std::make_shared<instrumentation::profiling_period_basic>(exec_time);
+    auto period_submit = std::make_shared<instrumentation::profiling_period_basic>(submit_time);
+
+    info.push_back({ instrumentation::profiling_stage::executing, period_exec });
+    info.push_back({ instrumentation::profiling_stage::submission, period_submit });
+
+    return true;
+}
\ No newline at end of file
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_events.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_events.hpp
new file mode 100644
index 00000000000000..b38a38303e8b59
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_events.hpp
@@ -0,0 +1,70 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_base_event.hpp"
+#include "ze_engine.hpp"
+
+#include <vector>
+
+namespace cldnn {
+namespace ze {
+
+struct ze_events : public ze_base_event {
+public:
+    ze_events(std::vector<event::ptr> const& ev, const ze_engine &engine)
+        : ze_base_event(0)
+        , _engine(engine) {
+        process_events(ev);
+    }
+
+    void reset() override {
+        event::reset();
+        _events.clear();
+    }
+
+    std::optional<ze_kernel_timestamp_result_t> query_timestamp() override { return std::nullopt; }
+    ze_event_handle_t get_handle() const override { return _last_ze_event; }
+    bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override;
+
+protected:
+    void wait_impl() override;
+    void set_impl() override;
+    bool is_set_impl() override;
+
+    void process_events(const std::vector<event::ptr>& ev) {
+        for (size_t i = 0; i < ev.size(); i++) {
+            auto multiple_events = dynamic_cast<ze_events*>(ev[i].get());
+            if (multiple_events) {
+                for (size_t j = 0; j < multiple_events->_events.size(); j++) {
+                    if (auto base_ev = dynamic_cast<ze_base_event*>(multiple_events->_events[j].get())) {
+                        auto current_ev_queue_stamp = base_ev->get_queue_stamp();
+                        if ((m_queue_stamp == 0) || (current_ev_queue_stamp > m_queue_stamp)) {
+                            m_queue_stamp = current_ev_queue_stamp;
+                            _last_ze_event = base_ev->get_handle();
+                        }
+                    }
+                    _events.push_back(multiple_events->_events[j]);
+                }
+            } else {
+                if (auto base_ev = dynamic_cast<ze_base_event*>(ev[i].get())) {
+                    auto current_ev_queue_stamp = base_ev->get_queue_stamp();
+                    if ((m_queue_stamp == 0) || (current_ev_queue_stamp > m_queue_stamp)) {
+                        m_queue_stamp = current_ev_queue_stamp;
+                        _last_ze_event = base_ev->get_handle();
+                    }
+                }
+                _events.push_back(ev[i]);
+            }
+        }
+    }
+
+    ze_event_handle_t _last_ze_event = nullptr;
+    std::vector<event::ptr> _events;
+    const ze_engine &_engine;
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_kernel.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_kernel.hpp
new file mode 100644
index 00000000000000..9d57a571907a89
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_kernel.hpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/kernel.hpp"
+#include "openvino/core/except.hpp"
+#include "ze_common.hpp"
+#include "ze_kernel_holder.hpp"
+
+#include <memory>
+
+namespace cldnn {
+namespace ze {
+
+class ze_kernel : public kernel {
+public:
+    static void create_kernels_from_module(std::shared_ptr<ze_module_holder> module, std::vector<kernel::ptr> &out) {
+        ze_module_handle_t module_handle = module->get_module_handle();
+        uint32_t kernel_count = 0;
+        OV_ZE_EXPECT(zeModuleGetKernelNames(module_handle, &kernel_count, nullptr));
+        std::vector<const char*> kernel_names(kernel_count);
+        // Specification does not mention who is responsible for the returned pointers
+        // Assume Level Zero owns the pointers and they will remain valid as long as the module resource
+        OV_ZE_EXPECT(zeModuleGetKernelNames(module_handle, &kernel_count, kernel_names.data()));
+
+        ze_kernel_flags_t flags = 0;
+        ze_kernel_desc_t kernel_desc = {
+            ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, flags, nullptr};
+        for (auto name_cstr : kernel_names) {
+            auto name = std::string(name_cstr);
+            // L0 returns Intel_Symbol_Table_Void_Program that does not correspond to actual kernel
+            if (name == "Intel_Symbol_Table_Void_Program") {
+                continue;
+            }
+            kernel_desc.pKernelName = name_cstr;
+            ze_kernel_handle_t kernel_handle;
+            OV_ZE_EXPECT(zeKernelCreate(module_handle, &kernel_desc, &kernel_handle));
+            auto kernel_holder = std::make_shared<ze_kernel_holder>(kernel_handle, module);
+            out.push_back(std::make_shared<ze_kernel>(kernel_holder, name));
+        }
+    }
+
+    ze_kernel(std::shared_ptr<ze_kernel_holder> kernel, const std::string& kernel_id)
+        : m_kernel(kernel)
+        , m_kernel_id(kernel_id) { }
+
+    ze_kernel_handle_t get_kernel_handle() const { return m_kernel->get_kernel_handle(); }
+    ze_module_handle_t get_module_handle() const { return m_kernel->get_module()->get_module_handle(); }
+    std::string get_id() const override { return m_kernel_id; }
+
+    std::shared_ptr<kernel> clone(bool reuse_kernel_handle = false) const override {
+        if (reuse_kernel_handle) {
+            return std::make_shared<ze_kernel>(m_kernel, m_kernel_id);
+        } else {
+            ze_kernel_handle_t cloned_handle;
+            ze_module_handle_t module_handle = get_module_handle();
+            ze_kernel_desc_t descriptor;
+            descriptor.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC;
+            descriptor.pNext = nullptr;
+            descriptor.flags = 0;
+            descriptor.pKernelName = m_kernel_id.c_str();
+            OV_ZE_EXPECT(zeKernelCreate(module_handle, &descriptor, &cloned_handle));
+            auto kernel_holder = std::make_shared<ze_kernel_holder>(cloned_handle, m_kernel->get_module());
+            return std::make_shared<ze_kernel>(kernel_holder, m_kernel_id);
+        }
+    }
+
+    virtual bool is_same(const kernel &other) const override {
+        auto other_ptr = dynamic_cast<const ze_kernel*>(&other);
+        if (other_ptr == nullptr) {
+            return false;
+        }
+        return get_kernel_handle() == other_ptr->get_kernel_handle();
+    }
+
+    std::vector<uint8_t> get_binary() const override {
+        size_t binary_size = 0;
+        ze_module_handle_t module_handle = get_module_handle();
+        OV_ZE_EXPECT(zeModuleGetNativeBinary(module_handle, &binary_size, nullptr));
+
+        std::vector<uint8_t> binary(binary_size);
+        OV_ZE_EXPECT(zeModuleGetNativeBinary(module_handle, &binary_size, binary.data()));
+
+        return binary;
+    }
+
+    std::string get_build_log() const override {
+        ze_module_build_log_handle_t build_log_handle = m_kernel->get_module()->get_build_log_handle();
+        size_t log_size = 0;
+        OV_ZE_EXPECT(zeModuleBuildLogGetString(build_log_handle, &log_size, nullptr));
+
+        std::string log(log_size, ' ');
+        OV_ZE_EXPECT(zeModuleBuildLogGetString(build_log_handle, &log_size, log.data()));
+        return log;
+    }
+
+private:
+    std::shared_ptr<ze_kernel_holder> m_kernel;
+    std::string m_kernel_id;
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_kernel_builder.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_kernel_builder.hpp
new file mode 100644
index 00000000000000..7aae55d6a7f990
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_kernel_builder.hpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2016-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/kernel_builder.hpp"
+#include "intel_gpu/runtime/device.hpp"
+
+#include "ze_device.hpp"
+#include "ze_kernel.hpp"
+#include "ze_common.hpp"
+
+namespace cldnn {
+namespace ze {
+
+class ze_kernel_builder : public kernel_builder{
+    public:
+        ze_kernel_builder(const ze_device &device) : m_device(device) {}
+
+    void build_kernels(const void *src, size_t src_bytes, KernelFormat src_format, const std::string &options, std::vector<kernel::ptr> &out) const override {
+        ze_module_desc_t module_desc = {
+            ZE_STRUCTURE_TYPE_MODULE_DESC,
+            nullptr,
+            ZE_MODULE_FORMAT_NATIVE,
+            src_bytes,
+            reinterpret_cast<const uint8_t *>(src),
+            options.c_str(),
+            nullptr // specialization constants
+        };
+        switch (src_format) {
+        case KernelFormat::SOURCE: {
+            module_desc.format = ze_module_format_oclc;
+            break;
+        }
+        case KernelFormat::NATIVE_BIN: {
+            module_desc.format = ZE_MODULE_FORMAT_NATIVE;
+            break;
+        }
+        default:
+            OPENVINO_THROW("[GPU] Trying to build kernel from unexpected format");
+            break;
+        }
+        ze_module_handle_t module_handle;
+        ze_module_build_log_handle_t log_handle;
+        ze_result_t build_result = zeModuleCreate(m_device.get_context(), m_device.get_device(), &module_desc, &module_handle, &log_handle);
+        if (build_result != ZE_RESULT_SUCCESS) {
+            size_t log_size = 0;
+            OV_ZE_EXPECT(zeModuleBuildLogGetString(log_handle, &log_size, nullptr));
+            std::string log(log_size, ' ');
+            OV_ZE_EXPECT(zeModuleBuildLogGetString(log_handle, &log_size, log.data()));
+            OV_ZE_EXPECT(zeModuleBuildLogDestroy(log_handle));
+            GPU_DEBUG_INFO << "-------- Kernel build error" << std::endl;
+            GPU_DEBUG_INFO << log << std::endl;
+            GPU_DEBUG_INFO << "-------- End of Kernel build error" << std::endl;
+            OPENVINO_THROW("[GPU] Failed to build module");
+        }
+        auto module_holder = std::make_shared<ze_module_holder>(module_handle, log_handle);
+        ze_kernel::create_kernels_from_module(module_holder, out);
+    }
+
+    private:
+        const ze_device &m_device;
+};
+}  // namespace ze
+}  // namespace cldnn
+
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_kernel_holder.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_kernel_holder.hpp
new file mode 100644
index 00000000000000..5d7ab17b6b8367
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_kernel_holder.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_common.hpp"
+#include "ze_module_holder.hpp"
+
+#include <memory>
+
+namespace cldnn {
+namespace ze {
+
+// RAII wrapper for Level Zero kernel
+class ze_kernel_holder {
+public:
+    // Take ownership of existing kernel handle
+    explicit ze_kernel_holder(ze_kernel_handle_t kernel, std::shared_ptr<ze_module_holder> module) : m_kernel(kernel), m_module(module) {}
+    ze_kernel_holder(const ze_kernel_holder& other) = delete;
+    ze_kernel_holder& operator=(const ze_kernel_holder& other) = delete;
+    ~ze_kernel_holder() {
+        OV_ZE_WARN(zeKernelDestroy(m_kernel));
+    }
+    ze_kernel_handle_t get_kernel_handle() { return m_kernel; }
+    std::shared_ptr<ze_module_holder> get_module() { return m_module; }
+private:
+    ze_kernel_handle_t m_kernel;
+    std::shared_ptr<ze_module_holder> m_module;
+};
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_memory.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_memory.cpp
new file mode 100644
index 00000000000000..e3d0e924fa2589
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_memory.cpp
@@ -0,0 +1,267 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/runtime/utils.hpp"
+#include "ze_memory.hpp"
+#include "ze/ze_common.hpp"
+#include "ze_engine.hpp"
+#include "ze_stream.hpp"
+#include "ze_event.hpp"
+#include <stdexcept>
+#include <vector>
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+#include <oneapi/dnnl/dnnl_l0.hpp>
+#endif
+
+namespace cldnn {
+namespace ze {
+namespace {
+static inline cldnn::event::ptr create_event(stream& stream, size_t bytes_count) {
+    if (bytes_count == 0) {
+        GPU_DEBUG_TRACE_DETAIL << "Skip memory operation for 0 size tensor" << std::endl;
+        return stream.create_user_event(true);
+    }
+
+    return stream.create_base_event();
+}
+
+std::vector<ze_event_handle_t> get_ze_events(const std::vector<event::ptr>& events) {
+    std::vector<ze_event_handle_t> ze_events;
+    ze_events.reserve(events.size());
+     for (const auto& ev : events) {
+        auto ze_event = downcast<ze::ze_base_event>(ev.get())->get_handle();
+        if (ze_event != nullptr) {
+            ze_events.push_back(ze_event);
+        }
+    }
+    return ze_events;
+}
+
+}  // namespace
+
+allocation_type gpu_usm::detect_allocation_type(const ze_engine* engine, const void* mem_ptr) {
+    ze_memory_allocation_properties_t props{ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES};
+    ze_device_handle_t device = nullptr;
+    OV_ZE_EXPECT(zeMemGetAllocProperties(engine->get_context(), mem_ptr, &props, &device));
+
+    switch (props.type) {
+        case ZE_MEMORY_TYPE_DEVICE: return allocation_type::usm_device;
+        case ZE_MEMORY_TYPE_HOST: return allocation_type::usm_host;
+        case ZE_MEMORY_TYPE_SHARED: return allocation_type::usm_shared;
+        default: return allocation_type::unknown;
+    }
+
+    return allocation_type::unknown;
+}
+
+allocation_type gpu_usm::detect_allocation_type(const ze_engine* engine, const ze::UsmMemory& buffer) {
+    auto alloc_type = detect_allocation_type(engine, buffer.get());
+    OPENVINO_ASSERT(alloc_type == allocation_type::usm_device ||
+                    alloc_type == allocation_type::usm_host ||
+                    alloc_type == allocation_type::usm_shared, "[GPU] Unsupported USM alloc type: " + to_string(alloc_type));
+    return alloc_type;
+}
+
+gpu_usm::gpu_usm(ze_engine* engine, const layout& new_layout, const ze::UsmMemory& buffer, allocation_type type, std::shared_ptr<MemoryTracker> mem_tracker)
+    : lockable_gpu_mem()
+    , memory(engine, new_layout, type, mem_tracker)
+    , _buffer(buffer)
+    , _host_buffer(engine->get_context(), engine->get_device()) {
+}
+
+gpu_usm::gpu_usm(ze_engine* engine, const layout& new_layout, const ze::UsmMemory& buffer, std::shared_ptr<MemoryTracker> mem_tracker)
+    : lockable_gpu_mem()
+    , memory(engine, new_layout, detect_allocation_type(engine, buffer), mem_tracker)
+    , _buffer(buffer)
+    , _host_buffer(engine->get_context(), engine->get_device()) {
+}
+
+gpu_usm::gpu_usm(ze_engine* engine, const layout& layout, allocation_type type)
+    : lockable_gpu_mem()
+    , memory(engine, layout, type, nullptr)
+    , _buffer(engine->get_context(), engine->get_device())
+    , _host_buffer(engine->get_context(), engine->get_device()) {
+    auto mem_ordinal = engine->get_device_info().device_memory_ordinal;
+    switch (get_allocation_type()) {
+    case allocation_type::usm_host:
+        _buffer.allocateHost(_bytes_count);
+        break;
+    case allocation_type::usm_shared:
+        _buffer.allocateShared(_bytes_count, mem_ordinal);
+        break;
+    case allocation_type::usm_device:
+        _buffer.allocateDevice(_bytes_count, mem_ordinal);
+        break;
+    default:
+        OPENVINO_THROW("[GPU] Unknown unified shared memory type!");
+    }
+
+    m_mem_tracker = std::make_shared<MemoryTracker>(engine, _buffer.get(), layout.bytes_count(), type);
+}
+
+void* gpu_usm::lock(const stream& stream, mem_lock_type type = mem_lock_type::read_write) {
+    std::lock_guard<std::mutex> locker(_mutex);
+    if (0 == _lock_count) {
+        auto& _ze_stream = downcast<const ze_stream>(stream);
+        if (get_allocation_type() == allocation_type::usm_device) {
+            if (type != mem_lock_type::read) {
+                throw std::runtime_error("Unable to lock allocation_type::usm_device with write lock_type.");
+            }
+            GPU_DEBUG_LOG << "Copy usm_device buffer to host buffer." << std::endl;
+            _host_buffer.allocateHost(_bytes_count);
+            OV_ZE_EXPECT(zeCommandListAppendMemoryCopy(_ze_stream.get_queue(),
+                                    _host_buffer.get(),
+                                    _buffer.get(),
+                                    _bytes_count,
+                                    nullptr,
+                                    0,
+                                    nullptr));
+            OV_ZE_EXPECT(zeCommandListHostSynchronize(_ze_stream.get_queue(), endless_wait));
+            _mapped_ptr = _host_buffer.get();
+        } else {
+            _mapped_ptr = _buffer.get();
+        }
+    }
+    _lock_count++;
+    return _mapped_ptr;
+}
+
+void gpu_usm::unlock(const stream& /* stream */) {
+    std::lock_guard<std::mutex> locker(_mutex);
+    _lock_count--;
+    if (0 == _lock_count) {
+        if (get_allocation_type() == allocation_type::usm_device) {
+            _host_buffer.freeMem();
+        }
+        _mapped_ptr = nullptr;
+    }
+}
+
+event::ptr gpu_usm::fill(stream& stream, unsigned char pattern, const std::vector<event::ptr>& dep_events, bool blocking) {
+    auto& _ze_stream = downcast<ze_stream>(stream);
+    auto ev = _ze_stream.create_base_event();
+    auto ev_ze = downcast<ze::ze_base_event>(ev.get())->get_handle();
+    auto ze_dep_events = get_ze_events(dep_events);
+    OV_ZE_EXPECT(zeCommandListAppendMemoryFill(_ze_stream.get_queue(),
+        _buffer.get(),
+        &pattern,
+        sizeof(unsigned char),
+        _bytes_count,
+        ev_ze,
+        ze_dep_events.size(),
+        ze_dep_events.data()));
+    if (blocking) {
+        ev->wait();
+    }
+    return ev;
+}
+
+event::ptr gpu_usm::fill(stream& stream, const std::vector<event::ptr>& dep_events, bool blocking) {
+    return fill(stream, 0, dep_events, blocking);
+}
+
+event::ptr gpu_usm::copy_from(stream& stream, const void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) {
+    auto result_event = create_event(stream, size);
+    if (size == 0)
+        return result_event;
+
+    auto _ze_stream = downcast<ze_stream>(&stream);
+    auto _ze_event = downcast<ze_base_event>(result_event.get())->get_handle();
+    auto src_ptr = reinterpret_cast<const char*>(data_ptr) + src_offset;
+    auto dst_ptr = reinterpret_cast<char*>(buffer_ptr()) + dst_offset;
+
+    OV_ZE_EXPECT(zeCommandListAppendMemoryCopy(_ze_stream->get_queue(),
+                                           dst_ptr,
+                                           src_ptr,
+                                           _bytes_count,
+                                           _ze_event,
+                                           0,
+                                           nullptr));
+
+    if (blocking) {
+        result_event->wait();
+    }
+
+    return result_event;
+}
+
+event::ptr gpu_usm::copy_from(stream& stream, const memory& src_mem, size_t src_offset, size_t dst_offset, size_t size, bool blocking) {
+    auto result_event = create_event(stream, size);
+    if (size == 0)
+        return result_event;
+
+    auto _ze_stream = downcast<ze_stream>(&stream);
+    auto _ze_event = downcast<ze_base_event>(result_event.get())->get_handle();
+    OPENVINO_ASSERT(memory_capabilities::is_usm_type(src_mem.get_allocation_type()));
+
+    auto usm_mem = downcast<const gpu_usm>(&src_mem);
+    auto src_ptr = reinterpret_cast<const char*>(usm_mem->buffer_ptr()) + src_offset;
+    auto dst_ptr = reinterpret_cast<char*>(buffer_ptr()) + dst_offset;
+
+    OV_ZE_EXPECT(zeCommandListAppendMemoryCopy(_ze_stream->get_queue(),
+                                           dst_ptr,
+                                           src_ptr,
+                                           _bytes_count,
+                                           _ze_event,
+                                           0,
+                                           nullptr));
+    if (blocking) {
+        result_event->wait();
+    }
+
+    return result_event;
+}
+
+event::ptr gpu_usm::copy_to(stream& stream, void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) const {
+    auto result_event = create_event(stream, size);
+    if (size == 0)
+        return result_event;
+
+    auto _ze_stream = downcast<ze_stream>(&stream);
+    auto _ze_event = downcast<ze_base_event>(result_event.get())->get_handle();
+    auto src_ptr = reinterpret_cast<const char*>(buffer_ptr()) + src_offset;
+    auto dst_ptr = reinterpret_cast<char*>(data_ptr) + dst_offset;
+
+    OV_ZE_EXPECT(zeCommandListAppendMemoryCopy(_ze_stream->get_queue(),
+                                           dst_ptr,
+                                           src_ptr,
+                                           _bytes_count,
+                                           _ze_event,
+                                           0,
+                                           nullptr));
+    if (blocking) {
+        result_event->wait();
+    }
+
+    return result_event;
+}
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+dnnl::memory gpu_usm::get_onednn_memory(dnnl::memory::desc desc, int64_t offset) const {
+    auto onednn_engine = _engine->get_onednn_engine();
+    dnnl::memory dnnl_mem = dnnl::l0_interop::make_memory(desc, onednn_engine,
+        reinterpret_cast<uint8_t*>(_buffer.get()) + offset);
+    return dnnl_mem;
+}
+#endif
+
+shared_mem_params gpu_usm::get_internal_params() const {
+    auto casted = downcast<ze_engine>(_engine);
+    return {
+        shared_mem_type::shared_mem_usm,  // shared_mem_type
+        static_cast<shared_handle>(casted->get_context()),  // context handle
+        static_cast<shared_handle>(casted->get_device()),  // user_device handle
+        static_cast<shared_handle>(_buffer.get()),  // mem handle
+#ifdef _WIN32
+        nullptr,  // surface handle
+#else
+        0,  // surface handle
+#endif
+        0  // plane
+    };
+}
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_memory.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_memory.hpp
new file mode 100644
index 00000000000000..c76d6c46cc31fe
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_memory.hpp
@@ -0,0 +1,151 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_common.hpp"
+#include "ze_engine.hpp"
+#include "intel_gpu/runtime/memory.hpp"
+
+#include <cassert>
+#include <mutex>
+#include <memory>
+
+namespace cldnn {
+namespace ze {
+struct lockable_gpu_mem {
+    lockable_gpu_mem() :
+        _lock_count(0),
+        _mapped_ptr(nullptr) {}
+
+    std::mutex _mutex;
+    unsigned _lock_count;
+    void* _mapped_ptr;
+};
+
+class UsmHolder {
+public:
+    UsmHolder(ze_context_handle_t context, void* ptr, bool shared_memory = false) : _context(context), _ptr(ptr), _shared_memory(shared_memory) {
+        if (ptr == nullptr)
+            OPENVINO_THROW("[GPU] Can not create UsmHolder with nullptr");
+    }
+    UsmHolder(const UsmHolder&) = delete;
+    UsmHolder& operator=(const UsmHolder&) = delete;
+
+    void* ptr() { return _ptr; }
+    void memFree() {
+        if (!_shared_memory && _ptr != nullptr) {
+            OV_ZE_WARN(zeMemFree(_context, _ptr));
+            _ptr = nullptr;
+        }
+    }
+
+    ~UsmHolder() {
+        memFree();
+    }
+private:
+    ze_context_handle_t _context;
+    void* _ptr;
+    bool _shared_memory = false;
+};
+
+class UsmMemory {
+public:
+    explicit UsmMemory(ze_context_handle_t context, ze_device_handle_t device)
+        : _context(context)
+        , _device(device) {}
+
+    UsmMemory(ze_context_handle_t context, ze_device_handle_t device, void* usm_ptr, size_t offset = 0)
+        : _context(context)
+        , _device(device)
+        , _usm_pointer(std::make_shared<UsmHolder>(_context, reinterpret_cast<uint8_t*>(usm_ptr) + offset, true)) {}
+
+    void* get() const { return _usm_pointer->ptr(); }
+
+    void allocateHost(size_t size) {
+        ze_host_mem_alloc_desc_t host_desc = {};
+        host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
+        host_desc.flags = 0;
+        host_desc.pNext = nullptr;
+
+        void* memory = nullptr;
+        OV_ZE_EXPECT(zeMemAllocHost(_context, &host_desc, size, 1, &memory));
+        _usm_pointer = std::make_shared<UsmHolder>(_context, memory);
+    }
+
+    void allocateShared(size_t size, uint32_t ordinal) {
+        ze_device_mem_alloc_desc_t device_desc = {};
+        device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
+        device_desc.flags = 0;
+        device_desc.ordinal = ordinal;
+        device_desc.pNext = nullptr;
+
+        ze_host_mem_alloc_desc_t host_desc = {};
+        host_desc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
+        host_desc.flags = 0;
+        host_desc.pNext = nullptr;
+
+        void* memory = nullptr;
+        OV_ZE_EXPECT(zeMemAllocShared(_context, &device_desc, &host_desc, size, 1, _device, &memory));
+        _usm_pointer = std::make_shared<UsmHolder>(_context, memory);
+    }
+
+    void allocateDevice(size_t size, uint32_t ordinal) {
+        ze_device_mem_alloc_desc_t device_desc = {};
+        device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
+        device_desc.flags = 0;
+        device_desc.ordinal = ordinal;
+        device_desc.pNext = nullptr;
+
+        void* memory = nullptr;
+        OV_ZE_EXPECT(zeMemAllocDevice(_context, &device_desc, size, 4096, _device, &memory));
+        _usm_pointer = std::make_shared<UsmHolder>(_context, memory);
+    }
+
+    void freeMem() {
+        if (!_usm_pointer)
+            OPENVINO_THROW("[GPU] Can not free memory of empty UsmHolder");
+        _usm_pointer->memFree();
+    }
+
+    virtual ~UsmMemory() = default;
+
+protected:
+    ze_context_handle_t _context;
+    ze_device_handle_t _device;
+    std::shared_ptr<UsmHolder> _usm_pointer = nullptr;
+};
+
+struct gpu_usm : public lockable_gpu_mem, public memory {
+    gpu_usm(ze_engine* engine, const layout& new_layout, const ze::UsmMemory& usm_buffer, allocation_type type, std::shared_ptr<MemoryTracker> mem_tracker);
+    gpu_usm(ze_engine* engine, const layout& new_layout, const ze::UsmMemory& usm_buffer, std::shared_ptr<MemoryTracker> mem_tracker);
+    gpu_usm(ze_engine* engine, const layout& layout, allocation_type type);
+
+    void* lock(const stream& stream, mem_lock_type type) override;
+    void unlock(const stream& stream) override;
+    const ze::UsmMemory& get_buffer() const { return _buffer; }
+    ze::UsmMemory& get_buffer() { return _buffer; }
+
+    event::ptr fill(stream& stream, unsigned char pattern, const std::vector<event::ptr>& dep_events = {}, bool blocking = true) override;
+    event::ptr fill(stream& stream, const std::vector<event::ptr>& dep_events = {}, bool blocking = true) override;
+    shared_mem_params get_internal_params() const override;
+    void* buffer_ptr() const override { return _buffer.get(); }
+
+    event::ptr copy_from(stream& stream, const void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) override;
+    event::ptr copy_from(stream& stream, const memory& src_mem, size_t src_offset, size_t dst_offset, size_t size, bool blocking) override;
+    event::ptr copy_to(stream& stream, void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) const override;
+#ifdef ENABLE_ONEDNN_FOR_GPU
+    dnnl::memory get_onednn_memory(dnnl::memory::desc desc, int64_t offset) const override;
+#endif
+
+    static allocation_type detect_allocation_type(const ze_engine* engine, const void* mem_ptr);
+    static allocation_type detect_allocation_type(const ze_engine* engine, const ze::UsmMemory& buffer);
+
+protected:
+    ze::UsmMemory _buffer;
+    ze::UsmMemory _host_buffer;
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_module_holder.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_module_holder.hpp
new file mode 100644
index 00000000000000..fcd5c5840b935f
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_module_holder.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ze_common.hpp"
+#include <vector>
+#include <string>
+
+namespace cldnn {
+namespace ze {
+
+// RAII wrapper for Level Zero module
+class ze_module_holder {
+public:
+    // Take ownership of existing module and build log handles
+    explicit ze_module_holder(ze_module_handle_t module, ze_module_build_log_handle_t build_log) : m_module(module), m_build_log(build_log) {}
+
+    ze_module_holder(const ze_module_holder& other) = delete;
+    ze_module_holder& operator=(const ze_module_holder& other) = delete;
+    ~ze_module_holder() {
+        OV_ZE_WARN(zeModuleBuildLogDestroy(m_build_log));
+        OV_ZE_WARN(zeModuleDestroy(m_module));
+    }
+    ze_module_handle_t get_module_handle() const { return m_module; }
+    ze_module_build_log_handle_t get_build_log_handle() const { return m_build_log; }
+
+private:
+    ze_module_handle_t m_module;
+    ze_module_build_log_handle_t m_build_log;
+};
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_stream.cpp b/src/plugins/intel_gpu/src/runtime/ze/ze_stream.cpp
new file mode 100644
index 00000000000000..1050c9b753fa76
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_stream.cpp
@@ -0,0 +1,412 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ze_stream.hpp"
+#include "intel_gpu/runtime/memory_caps.hpp"
+#include "intel_gpu/runtime/utils.hpp"
+#include "openvino/core/except.hpp"
+#include "openvino/core/type/element_type.hpp"
+#include "openvino/runtime/properties.hpp"
+
+#include "ze_counter_based_event_factory.hpp"
+#include "ze_event_factory.hpp"
+#include "ze_events.hpp"
+#include "ze_empty_event.hpp"
+
+#include "ze_event.hpp"
+#include "ze_kernel.hpp"
+#include "ze_memory.hpp"
+#include "ze_common.hpp"
+
+#include <ze_api.h>
+#include <ze_intel_gpu.h>
+#include <ze_stypes.h>
+
+#include <cassert>
+#include <string>
+#include <vector>
+#include <memory>
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+#include <oneapi/dnnl/dnnl_l0.hpp>
+#endif
+
+namespace cldnn {
+namespace ze {
+
+namespace {
+inline ze_group_count_t to_group_count(const std::vector<size_t>& v) {
+     switch (v.size()) {
+        case 1:
+            return {uint32_t(v[0]), uint32_t(1), uint32_t(1)};
+        case 2:
+            return {uint32_t(v[0]), uint32_t(v[1]), uint32_t(1)};
+        case 3:
+            return {uint32_t(v[0]), uint32_t(v[1]), uint32_t(v[2])};
+        default:
+            return {uint32_t(1), uint32_t(1), uint32_t(1)};
+    }
+}
+
+template<typename T>
+ze_result_t set_kernel_arg_scalar(ze_kernel_handle_t& kernel, uint32_t idx, const T& val) {
+    GPU_DEBUG_TRACE_DETAIL << "kernel: " << kernel << " set scalar " << idx << " (" << ov::element::from<T>().get_type_name() << ")" << val << "\n";
+    return zeKernelSetArgumentValue(kernel, idx, sizeof(T), &val);
+}
+
+ze_result_t set_kernel_arg_local_memory(ze_kernel_handle_t& kernel, uint32_t idx, size_t size) {
+    if (size == 0)
+        return ZE_RESULT_ERROR_INVALID_ARGUMENT;
+
+    GPU_DEBUG_TRACE_DETAIL << "kernel: " << kernel << " set arg " << idx << " local memory size: " << size << std::endl;
+    return zeKernelSetArgumentValue(kernel, idx, size, NULL);
+}
+
+ze_result_t set_kernel_arg(ze_kernel_handle_t& kernel, uint32_t idx, cldnn::memory::cptr mem) {
+    if (!mem)
+        return ZE_RESULT_ERROR_INVALID_ARGUMENT;
+
+    OPENVINO_ASSERT(memory_capabilities::is_usm_type(mem->get_allocation_type()), "Unsupported alloc type");
+    const auto& buf = std::dynamic_pointer_cast<const ze::gpu_usm>(mem)->get_buffer();
+    auto mem_type = std::dynamic_pointer_cast<const ze::gpu_usm>(mem)->get_allocation_type();
+    GPU_DEBUG_TRACE_DETAIL << "kernel: " << kernel << " set arg (" << mem_type << ") " << idx
+                            << " mem: " << buf.get() << " size: " << mem->size() << std::endl;
+
+    auto ptr = buf.get();
+    return zeKernelSetArgumentValue(kernel, idx, sizeof(ptr), &ptr);
+}
+
+void set_arguments_impl(ze_kernel_handle_t kernel,
+                         const arguments_desc& args,
+                         const kernel_arguments_data& data) {
+    using args_t = argument_desc::Types;
+    using scalar_t = scalar_desc::Types;
+
+    for (uint32_t i = 0; i < static_cast<uint32_t>(args.size()); i++) {
+        ze_result_t status = ZE_RESULT_NOT_READY;
+        switch (args[i].t) {
+            case args_t::INPUT:
+                if (args[i].index < data.inputs.size() && data.inputs[args[i].index]) {
+                    status = set_kernel_arg(kernel, i, data.inputs[args[i].index]);
+                }
+                break;
+            case args_t::INPUT_OF_FUSED_PRIMITIVE:
+                if (args[i].index < data.fused_op_inputs.size() && data.fused_op_inputs[args[i].index]) {
+                    status = set_kernel_arg(kernel, i, data.fused_op_inputs[args[i].index]);
+                }
+                break;
+            case args_t::INTERNAL_BUFFER:
+                if (args[i].index < data.intermediates.size() && data.intermediates[args[i].index]) {
+                    status = set_kernel_arg(kernel, i, data.intermediates[args[i].index]);
+                }
+                break;
+            case args_t::OUTPUT:
+                if (args[i].index < data.outputs.size() && data.outputs[args[i].index]) {
+                    status = set_kernel_arg(kernel, i, data.outputs[args[i].index]);
+                }
+                break;
+            case args_t::WEIGHTS:
+                status = set_kernel_arg(kernel, i, data.weights);
+                break;
+            case args_t::BIAS:
+                status = set_kernel_arg(kernel, i, data.bias);
+                break;
+            case args_t::WEIGHTS_ZERO_POINTS:
+                status = set_kernel_arg(kernel, i, data.weights_zero_points);
+                break;
+            case args_t::ACTIVATIONS_ZERO_POINTS:
+                status = set_kernel_arg(kernel, i, data.activations_zero_points);
+                break;
+            case args_t::COMPENSATION:
+                status = set_kernel_arg(kernel, i, data.compensation);
+                break;
+            case args_t::SCALE_TABLE:
+                status = set_kernel_arg(kernel, i, data.scale_table);
+                break;
+            case args_t::SLOPE:
+                status = set_kernel_arg(kernel, i, data.slope);
+                break;
+            case args_t::SCALAR:
+                if (data.scalars && args[i].index < data.scalars->size()) {
+                    const auto& scalar = (*data.scalars)[args[i].index];
+                    switch (scalar.t) {
+                        case scalar_t::UINT8:
+                            status = set_kernel_arg_scalar<uint8_t>(kernel, i, scalar.v.u8);
+                            break;
+                        case scalar_t::UINT16:
+                            status = set_kernel_arg_scalar<uint16_t>(kernel, i, scalar.v.u16);
+                            break;
+                        case scalar_t::UINT32:
+                            status = set_kernel_arg_scalar<uint32_t>(kernel, i, scalar.v.u32);
+                            break;
+                        case scalar_t::UINT64:
+                            status = set_kernel_arg_scalar<uint64_t>(kernel, i, scalar.v.u64);
+                            break;
+                        case scalar_t::INT8:
+                            status = set_kernel_arg_scalar<int8_t>(kernel, i, scalar.v.s8);
+                            break;
+                        case scalar_t::INT16:
+                            status = set_kernel_arg_scalar<int16_t>(kernel, i, scalar.v.s16);
+                            break;
+                        case scalar_t::INT32:
+                            status = set_kernel_arg_scalar<int32_t>(kernel, i, scalar.v.s32);
+                            break;
+                        case scalar_t::INT64:
+                            status = set_kernel_arg_scalar<int64_t>(kernel, i, scalar.v.s64);
+                            break;
+                        case scalar_t::FLOAT32:
+                            status = set_kernel_arg_scalar<float>(kernel, i, scalar.v.f32);
+                            break;
+                        case scalar_t::FLOAT64:
+                            status = set_kernel_arg_scalar<double>(kernel, i, scalar.v.f64);
+                            break;
+                        default:
+                            break;
+                    }
+                }
+                break;
+            case args_t::CELL:
+                status = set_kernel_arg(kernel, i, data.cell);
+                break;
+            case args_t::SHAPE_INFO:
+                status = set_kernel_arg(kernel, i, data.shape_info);
+                break;
+            case args_t::LOCAL_MEMORY_SIZE:
+                OPENVINO_ASSERT(args[i].index < data.local_memory_args->size() && data.local_memory_args->at(args[i].index),
+                                "The allocated local memory is necessary to set kernel arguments.");
+                status = set_kernel_arg_local_memory(kernel, i,  data.local_memory_args->at(args[i].index));
+                break;
+            default:
+                break;
+        }
+        if (status != ZE_RESULT_SUCCESS) {
+            throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n");
+        }
+    }
+}
+
+}  // namespace
+
+ze_stream::ze_stream(const ze_engine &engine, const ExecutionConfig& config)
+    : stream(config.get_queue_type(), stream::get_expected_sync_method(config))
+    , _engine(engine) {
+    const auto &info = engine.get_device_info();
+
+    ze_command_queue_desc_t command_queue_desc = {};
+    command_queue_desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
+    command_queue_desc.pNext = nullptr;
+    command_queue_desc.index = 0;
+    command_queue_desc.ordinal = info.compute_queue_group_ordinal;
+    command_queue_desc.flags = m_queue_type == QueueTypes::out_of_order ? 0 : ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
+    command_queue_desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
+    command_queue_desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
+
+    zex_intel_queue_copy_operations_offload_hint_exp_desc_t cp_offload_desc = {};
+    cp_offload_desc.stype = ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES;
+    cp_offload_desc.copyOffloadEnabled = true;
+    cp_offload_desc.pNext = nullptr;
+    bool use_cp_offload = info.supports_cp_offload;
+    if (use_cp_offload) {
+        command_queue_desc.pNext = &cp_offload_desc;
+    }
+
+    OV_ZE_EXPECT(zeCommandListCreateImmediate(_engine.get_context(), _engine.get_device(), &command_queue_desc, &m_command_list));
+    bool use_counter_based_events = m_queue_type == QueueTypes::in_order && info.supports_counter_based_events;
+    if (use_counter_based_events) {
+        m_ev_factory = std::make_unique<ze_counter_based_event_factory>(engine, config.get_enable_profiling());
+    } else {
+        m_ev_factory = std::make_unique<ze_event_factory>(engine, config.get_enable_profiling());
+    }
+    GPU_DEBUG_INFO << "[GPU] Created L0 stream ("
+        << "use_cp_offload=" << use_cp_offload
+        << ", use_counter_based_events=" << use_counter_based_events
+        << ")" << std::endl;
+}
+
+ze_stream::~ze_stream() {
+#ifdef ENABLE_ONEDNN_FOR_GPU
+    // Destroy OneDNN stream before destroying command list
+    _onednn_stream.reset();
+#endif
+    if (m_command_list != nullptr)
+        zeCommandListDestroy(m_command_list);
+}
+
+void ze_stream::set_arguments(kernel& kernel, const kernel_arguments_desc& args_desc, const kernel_arguments_data& args) {
+    static std::mutex m;
+    std::lock_guard<std::mutex> guard(m);
+
+    auto& ze_kernel = downcast<ze::ze_kernel>(kernel);
+    auto kern = ze_kernel.get_kernel_handle();
+    set_arguments_impl(kern, args_desc.arguments, args);
+}
+
+event::ptr ze_stream::enqueue_kernel(kernel& kernel,
+                                     const kernel_arguments_desc& args_desc,
+                                     const kernel_arguments_data& /* args */,
+                                     std::vector<event::ptr> const& deps,
+                                     bool is_output) {
+    auto& ze_kernel = downcast<ze::ze_kernel>(kernel);
+
+    auto kern = ze_kernel.get_kernel_handle();
+
+    std::vector<ze_event_handle_t> dep_events;
+    std::vector<ze_event_handle_t>* dep_events_ptr = nullptr;
+    if (m_sync_method == SyncMethods::events) {
+        for (auto& dep : deps) {
+            if (auto ze_base_ev = std::dynamic_pointer_cast<ze_base_event>(dep)) {
+                if (ze_base_ev->get_handle() != nullptr)
+                    dep_events.push_back(ze_base_ev->get_handle());
+            }
+        }
+        dep_events_ptr = &dep_events;
+    } else if (m_sync_method == SyncMethods::barriers) {
+        sync_events(deps, is_output);
+    }
+    bool set_output_event = m_sync_method == SyncMethods::events || is_output;
+
+    auto ev = set_output_event ? create_base_event() : std::make_shared<ze_empty_event>(++m_queue_counter);
+    auto global = to_group_count(args_desc.workGroups.global);
+    auto local = to_group_count(args_desc.workGroups.local);
+    ze_group_count_t args = { global.groupCountX / local.groupCountX, global.groupCountY / local.groupCountY, global.groupCountZ / local.groupCountZ };
+    OV_ZE_EXPECT(zeKernelSetGroupSize(kern, local.groupCountX, local.groupCountY, local.groupCountZ));
+    OV_ZE_EXPECT(zeCommandListAppendLaunchKernel(m_command_list,
+                                             kern,
+                                             &args,
+                                             set_output_event ? std::dynamic_pointer_cast<ze_base_event>(ev)->get_handle() : nullptr,
+                                             dep_events_ptr == nullptr ? 0 : static_cast<uint32_t>(dep_events_ptr->size()),
+                                             dep_events_ptr == nullptr ? 0 : &dep_events_ptr->front()));
+
+    return ev;
+}
+
+void ze_stream::enqueue_barrier() {
+    OV_ZE_EXPECT(zeCommandListAppendBarrier(m_command_list, nullptr, 0, nullptr));
+}
+
+event::ptr ze_stream::enqueue_marker(std::vector<ze_event::ptr> const& deps, bool is_output) {
+    if (deps.empty()) {
+        auto ev = create_base_event();
+        OV_ZE_EXPECT(zeCommandListAppendBarrier(m_command_list, std::dynamic_pointer_cast<ze_base_event>(ev)->get_handle(), 0, nullptr));
+        return ev;
+    }
+
+    if (m_sync_method  == SyncMethods::events) {
+        std::vector<ze_event_handle_t> dep_events;
+        for (auto& dep : deps) {
+            if (auto ze_base_ev = std::dynamic_pointer_cast<ze_base_event>(dep)) {
+                if (ze_base_ev->get_handle() != nullptr)
+                    dep_events.push_back(ze_base_ev->get_handle());
+            }
+        }
+        if (dep_events.empty())
+            return create_user_event(true);
+
+        auto ev = create_base_event();
+        OV_ZE_EXPECT(zeCommandListAppendBarrier(m_command_list,
+                                            std::dynamic_pointer_cast<ze_base_event>(ev)->get_handle(),
+                                            static_cast<uint32_t>(dep_events.size()),
+                                            &dep_events.front()));
+        return ev;
+    } else if (m_sync_method == SyncMethods::barriers) {
+        sync_events(deps, is_output);
+        assert(m_last_barrier_ev != nullptr);
+        return m_last_barrier_ev;
+    } else {
+        return create_user_event(true);
+    }
+}
+
+ze_event::ptr ze_stream::group_events(std::vector<ze_event::ptr> const& deps) {
+    return std::make_shared<ze_events>(deps, _engine);
+}
+
+void ze_stream::wait() {
+    finish();
+}
+
+event::ptr ze_stream::create_user_event(bool set) {
+    auto ev = m_ev_factory->create_event(++m_queue_counter);
+    if (set)
+        ev->set();
+
+    return ev;
+}
+
+event::ptr ze_stream::create_base_event() {
+    return m_ev_factory->create_event(++m_queue_counter);
+}
+
+std::unique_ptr<surfaces_lock> ze_stream::create_surfaces_lock(const std::vector<memory::ptr> &mem) const {
+    // Level Zero egnine currently does not support surfaces lock
+    return nullptr;
+}
+
+void ze_stream::flush() const {
+    // Immediate Command List submits commands immediately - no flush impl
+}
+
+void ze_stream::finish() const {
+    OV_ZE_EXPECT(zeCommandListHostSynchronize(m_command_list, endless_wait));
+}
+
+void ze_stream::wait_for_events(const std::vector<event::ptr>& events) {
+    bool needs_sync = false;
+    for (auto& ev : events) {
+        auto* ze_base_ev = dynamic_cast<ze_base_event*>(ev.get());
+        if (ze_base_ev->get_handle() != nullptr) {
+            ze_base_ev->wait();
+        } else {
+            needs_sync = true;
+        }
+        // Block thread and wait for event signal
+        ev->wait();
+    }
+
+    if (needs_sync) {
+        finish();
+    }
+}
+
+void ze_stream::sync_events(std::vector<event::ptr> const& deps, bool is_output) {
+    bool needs_barrier = false;
+    for (auto& dep : deps) {
+        auto* ze_base_ev = dynamic_cast<ze_base_event*>(dep.get());
+        assert(ze_base_ev != nullptr);
+        if (ze_base_ev->get_queue_stamp() > m_last_barrier) {
+            needs_barrier = true;
+        }
+    }
+
+    if (needs_barrier) {
+        if (is_output) {
+            m_last_barrier_ev = std::dynamic_pointer_cast<ze_event>(create_base_event());
+            m_last_barrier_ev->set_queue_stamp(m_queue_counter.load());
+            OV_ZE_EXPECT(zeCommandListAppendBarrier(m_command_list, m_last_barrier_ev->get_handle(), 0, nullptr));
+        } else {
+            OV_ZE_EXPECT(zeCommandListAppendBarrier(m_command_list, nullptr, 0, nullptr));
+        }
+        m_last_barrier = ++m_queue_counter;
+    }
+
+    if (!m_last_barrier_ev) {
+        m_last_barrier_ev = std::dynamic_pointer_cast<ze_event>(create_user_event(true));
+        m_last_barrier_ev->set_queue_stamp(m_queue_counter.load());
+    }
+}
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+dnnl::stream& ze_stream::get_onednn_stream() {
+    OPENVINO_ASSERT(m_queue_type == QueueTypes::in_order, "[GPU] Can't create onednn stream handle as onednn doesn't support out-of-order queue");
+    OPENVINO_ASSERT(_engine.get_device_info().vendor_id == INTEL_VENDOR_ID, "[GPU] Can't create onednn stream handle as for non-Intel devices");
+    if (!_onednn_stream) {
+        _onednn_stream = std::make_shared<dnnl::stream>(dnnl::l0_interop::make_stream(_engine.get_onednn_engine(), m_command_list, m_ev_factory->is_profiling_enabled()));
+    }
+
+    return *_onednn_stream;
+}
+#endif
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/runtime/ze/ze_stream.hpp b/src/plugins/intel_gpu/src/runtime/ze/ze_stream.hpp
new file mode 100644
index 00000000000000..06647fda325304
--- /dev/null
+++ b/src/plugins/intel_gpu/src/runtime/ze/ze_stream.hpp
@@ -0,0 +1,73 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/runtime/event.hpp"
+#include "intel_gpu/runtime/stream.hpp"
+#include "ze_common.hpp"
+#include "ze_engine.hpp"
+#include "ze_event.hpp"
+#include "ze_base_event_factory.hpp"
+
+namespace cldnn {
+namespace ze {
+
+class ze_stream : public stream {
+public:
+    ze_command_list_handle_t get_queue() const { return m_command_list; }
+
+    ze_stream(const ze_engine& engine, const ExecutionConfig& config);
+    ze_stream(ze_stream&& other)
+        : stream(other.m_queue_type, other.m_sync_method)
+        , _engine(other._engine)
+        , m_command_list(other.m_command_list)
+        , m_queue_counter(other.m_queue_counter.load())
+        , m_last_barrier(other.m_last_barrier.load())
+        , m_last_barrier_ev(other.m_last_barrier_ev)
+        , m_ev_factory(other.m_ev_factory.release()) {
+            other.m_command_list = nullptr;
+        }
+
+    ~ze_stream();
+
+    void flush() const override;
+    void finish() const override;
+    void wait() override;
+
+    void set_arguments(kernel& kernel, const kernel_arguments_desc& args_desc, const kernel_arguments_data& args) override;
+    event::ptr enqueue_kernel(kernel& kernel,
+                              const kernel_arguments_desc& args_desc,
+                              const kernel_arguments_data& args,
+                              std::vector<event::ptr> const& deps,
+                              bool is_output = false) override;
+    event::ptr enqueue_marker(std::vector<event::ptr> const& deps, bool is_output) override;
+    event::ptr group_events(std::vector<event::ptr> const& deps) override;
+    void wait_for_events(const std::vector<event::ptr>& events) override;
+    void enqueue_barrier() override;
+    event::ptr create_user_event(bool set) override;
+    event::ptr create_base_event() override;
+    std::unique_ptr<surfaces_lock> create_surfaces_lock(const std::vector<memory::ptr> &mem) const override;
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+    dnnl::stream& get_onednn_stream() override;
+#endif
+
+private:
+    void sync_events(std::vector<event::ptr> const& deps, bool is_output = false);
+
+    const ze_engine& _engine;
+    mutable ze_command_list_handle_t m_command_list = 0;
+    mutable std::atomic<uint64_t> m_queue_counter{0};
+    std::atomic<uint64_t> m_last_barrier{0};
+    std::shared_ptr<ze_event> m_last_barrier_ev = nullptr;
+    std::unique_ptr<ze_base_event_factory> m_ev_factory;
+
+#ifdef ENABLE_ONEDNN_FOR_GPU
+    std::shared_ptr<dnnl::stream> _onednn_stream = nullptr;
+#endif
+};
+
+}  // namespace ze
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/tests/functional/CMakeLists.txt b/src/plugins/intel_gpu/tests/functional/CMakeLists.txt
index 2edaf4be161991..3dd2da44dc6d41 100644
--- a/src/plugins/intel_gpu/tests/functional/CMakeLists.txt
+++ b/src/plugins/intel_gpu/tests/functional/CMakeLists.txt
@@ -32,12 +32,12 @@ ov_add_test_target(
         LINK_LIBRARIES
             openvino::reference
             funcSharedTests
-            OpenCL::NewHeaders # should come before OpenCL::OpenCL
-            OpenCL::OpenCL
         LABELS
             OV GPU
 )
 
+ov_gpu_set_runtime_interface_for(${TARGET_NAME})
+
 if(ENABLE_PROXY)
     target_compile_definitions(${TARGET_NAME} PUBLIC PROXY_PLUGIN_ENABLED)
 endif()
diff --git a/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/gpu_remote_tensor_tests.cpp b/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/ocl_remote_tensor_tests.cpp
similarity index 99%
rename from src/plugins/intel_gpu/tests/functional/remote_tensor_tests/gpu_remote_tensor_tests.cpp
rename to src/plugins/intel_gpu/tests/functional/remote_tensor_tests/ocl_remote_tensor_tests.cpp
index 70298a79449971..38c548c4820276 100644
--- a/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/gpu_remote_tensor_tests.cpp
+++ b/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/ocl_remote_tensor_tests.cpp
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#ifdef OV_GPU_WITH_OCL_RT
+
 #include "openvino/core/preprocess/pre_post_process.hpp"
 #include "openvino/op/add.hpp"
 #include "openvino/op/constant.hpp"
@@ -2945,3 +2947,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_RemoteTensorDataType, OVRemoteTensorDataType_Test
                                                               ov::element::Type_t::u16,
                                                               ov::element::Type_t::u32)),
                          OVRemoteTensorDataType_Test::getTestCaseName);
+#endif  // OV_GPU_WITH_OCL_RT
diff --git a/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/ze_remote_tensor_tests.cpp b/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/ze_remote_tensor_tests.cpp
new file mode 100644
index 00000000000000..b593552272d691
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/ze_remote_tensor_tests.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifdef OV_GPU_WITH_ZE_RT
+
+#include "openvino/runtime/intel_gpu/remote_properties.hpp"
+#include "openvino/runtime/remote_tensor.hpp"
+
+#include "shared_test_classes/base/ov_behavior_test_utils.hpp"
+
+TEST(ZeRemoteContext, smoke_CorrectContextType) {
+    auto core = ov::Core();
+    auto remote_context = core.get_default_context(ov::test::utils::DEVICE_GPU);
+    ASSERT_EQ(remote_context.get_params().at(ov::intel_gpu::context_type.name()), ov::intel_gpu::ContextType::ZE);
+}
+
+#endif  // OV_GPU_WITH_ZE_RT
diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/serialize.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/serialize.cpp
index 09b5052df9dc07..3ca4052b22d01d 100644
--- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/serialize.cpp
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/serialize.cpp
@@ -46,7 +46,7 @@ class LSTMSequenceTest : virtual public SerializeBaseTest {
 
 class GRUSequenceTest : virtual public SerializeBaseTest {
 public:
-    void SetUp() {
+    void SetUp() override {
         std::string cacheDirName = "cache_gru";
         auto init_shape = ov::PartialShape({1, 30, 512});
         auto batch_size = static_cast<size_t>(init_shape[0].get_length());
diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/vlsdpa.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/vlsdpa.cpp
index 38be79bcb5391e..f2fae883a0f718 100644
--- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/vlsdpa.cpp
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/vlsdpa.cpp
@@ -11,7 +11,6 @@
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/engine_configuration.hpp"
-#include "openvino/runtime/intel_gpu/ocl/ocl.hpp"
 
 #include "openvino/opsets/opset13.hpp"
 #include "ov_ops/vl_sdpa.hpp"
diff --git a/src/plugins/intel_gpu/tests/unit/CMakeLists.txt b/src/plugins/intel_gpu/tests/unit/CMakeLists.txt
index 1616c9e0bb687a..8b56fedf4d4a06 100644
--- a/src/plugins/intel_gpu/tests/unit/CMakeLists.txt
+++ b/src/plugins/intel_gpu/tests/unit/CMakeLists.txt
@@ -34,6 +34,24 @@ file(GLOB_RECURSE SOURCES_MAIN
     "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/simple_math.cpp"
   )
 
+# Those tests have dependency on OpenCL runtime
+# Need to be excluded from the build with a different runtime
+file(GLOB_RECURSE SOURCES_WITH_OCL_RT
+    "${CMAKE_CURRENT_SOURCE_DIR}/module_tests/device_test.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/module_tests/engine_test.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/module_tests/events_test.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/module_tests/network_test.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/module_tests/usm_memory_test.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/test_cases/convert_color_gpu_test.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/test_cases/cl_mem_input_test.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/test_cases/mem_perf_test.cpp"
+  )
+if (NOT GPU_RT_TYPE STREQUAL "OCL")
+    foreach (SOURCE_FILE IN LISTS SOURCES_WITH_OCL_RT)
+        list (REMOVE_ITEM SOURCES_MAIN ${SOURCE_FILE})
+    endforeach()
+endif()
+
 if (NOT ENABLE_ONEDNN_FOR_GPU)
     set(EXCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/onednn/")
     foreach (SOURCE_FILE IN LISTS SOURCES_MAIN)
@@ -60,6 +78,7 @@ add_executable(${TARGET_NAME} ${SOURCES_ALL})
 target_compile_definitions(${TARGET_NAME} PRIVATE CI_BUILD_NUMBER="")
 
 ov_set_threading_interface_for(${TARGET_NAME})
+ov_gpu_set_runtime_interface_for(${TARGET_NAME})
 
 # Workaround to avoid warnings during LTO build
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
@@ -69,7 +88,6 @@ endif()
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 
 target_link_libraries(${TARGET_NAME} PRIVATE openvino_intel_gpu_graph
-                                             OpenCL::OpenCL
                                              gtest
                                              gtest_main
                                              gflags
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
index 33c6ada95eab2d..822f618b0e49b2 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/device_test.cpp
@@ -56,7 +56,7 @@ TEST(devices_test, sort_order_single_vendor) {
     devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
     devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
 
-    auto sorted_list = ocl::ocl_device_detector::sort_devices(devices_list);
+    auto sorted_list = sort_devices(devices_list);
 
     std::vector<size_t> expected_devices_order = {2, 0, 1, 3, 4};
 
@@ -77,7 +77,7 @@ TEST(devices_test, sort_order_two_vendors) {
     devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
     devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::integrated_gpu, device_id++));
 
-    auto sorted_list = ocl::ocl_device_detector::sort_devices(devices_list);
+    auto sorted_list = sort_devices(devices_list);
 
     std::vector<size_t> expected_devices_order = {3, 2, 0, 1};
 
@@ -101,7 +101,7 @@ TEST(devices_test, sort_order_three_vendors) {
     devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID2, device_type::discrete_gpu, device_id++));
     devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID2, device_type::discrete_gpu, device_id++));
 
-    auto sorted_list = ocl::ocl_device_detector::sort_devices(devices_list);
+    auto sorted_list = sort_devices(devices_list);
 
     std::vector<size_t> expected_devices_order = {2, 3, 0, 1, 4, 5};
 
diff --git a/src/plugins/intel_gpu/tests/unit/passes/kernels_cache_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/kernels_cache_test.cpp
index 30225132c35488..871038a6c17f31 100644
--- a/src/plugins/intel_gpu/tests/unit/passes/kernels_cache_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/passes/kernels_cache_test.cpp
@@ -4,7 +4,6 @@
 
 #include "test_utils.h"
 
-#include "runtime/ocl/ocl_kernel.hpp"
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/graph/program.hpp"
 #include "intel_gpu/graph/network.hpp"
@@ -181,12 +180,7 @@ TEST(kernels_cache, reuse_kernels_property) {
         auto conv1_kern = cache.get_cached_kernel_id(conv1_kernels[idx]);
         auto conv2_kern = cache.get_cached_kernel_id(conv2_kernels[idx]);
         ASSERT_EQ(conv1_kern, conv2_kern);
-
-        auto conv1_ocl_kernel = std::dynamic_pointer_cast<ocl::ocl_kernel>(conv1_kernels[idx]);
-        auto conv2_ocl_kernel = std::dynamic_pointer_cast<ocl::ocl_kernel>(conv2_kernels[idx]);
-        if (conv1_ocl_kernel && conv2_ocl_kernel) {
-            ASSERT_EQ(conv1_ocl_kernel->get_handle().get(), conv2_ocl_kernel->get_handle().get());
-        }
+        ASSERT_TRUE(conv1_kernels[idx]->is_same(*conv2_kernels[idx].get()));
     }
 
     auto& concat1_node = prog->get_node("concat1");
@@ -200,11 +194,6 @@ TEST(kernels_cache, reuse_kernels_property) {
         auto concat1_kern = cache.get_cached_kernel_id(concat1_kernels[idx]);
         auto concat2_kern = cache.get_cached_kernel_id(concat2_kernels[idx]);
         ASSERT_EQ(concat1_kern, concat2_kern);
-
-        auto concat1_ocl_kernel = std::dynamic_pointer_cast<ocl::ocl_kernel>(concat1_kernels[idx]);
-        auto concat2_ocl_kernel = std::dynamic_pointer_cast<ocl::ocl_kernel>(concat2_kernels[idx]);
-        if (concat1_ocl_kernel && concat2_ocl_kernel) {
-            ASSERT_EQ(concat1_ocl_kernel->get_handle().get(), concat2_ocl_kernel->get_handle().get());
-        }
+        ASSERT_TRUE(concat1_kernels[idx]->is_same(*concat2_kernels[idx].get()));
     }
 }
diff --git a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp
index e4fbb1f3d3b228..be8e390749b801 100644
--- a/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_utils/test_utils.cpp
@@ -306,7 +306,8 @@ cldnn::ExecutionConfig get_test_default_config(const cldnn::engine& engine,
 }
 
 std::shared_ptr<cldnn::engine> create_test_engine() {
-    auto ret = cldnn::engine::create(engine_types::ocl, runtime_types::ocl);
+    auto ret = cldnn::engine::create(
+            cldnn::device_query::get_default_engine_type(), cldnn::device_query::get_default_runtime_type());
 #ifdef ENABLE_ONEDNN_FOR_GPU
     if (ret->get_device_info().supports_immad)
         ret->create_onednn_engine({});
diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
index b9a904a03f6271..d876d52f97baed 100644
--- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
+++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt
@@ -9,7 +9,7 @@ set(XETLA_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/xetla/include/" CACHE PATH "P
 #
 
 if(ENABLE_ONEDNN_FOR_GPU)
-    function(build_onednn_gpu)
+    function(build_onednn_gpu GPU_RUNTIME)
         include(ExternalProject)
         set(ONEDNN_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_build")
         set(ONEDNN_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_install" CACHE PATH "Installation path for oneDNN GPU library")
@@ -99,11 +99,16 @@ if(ENABLE_ONEDNN_FOR_GPU)
 
         set(onednn_gpu_lib "${CMAKE_STATIC_LIBRARY_PREFIX}${DNNL_GPU_LIBRARY_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
         set(ONEDNN_GPU_LIB_PATH ${ONEDNN_INSTALL_DIR}/lib/${onednn_gpu_lib} CACHE FILEPATH "Path to oneDNN GPU library")
+        if(GPU_RUNTIME STREQUAL "L0")
+            set(ONEDNN_GPU_DIR ${CMAKE_CURRENT_SOURCE_DIR}/l0_onednn_gpu CACHE FILEPATH "Path to oneDNN GPU repository")
+        elseif(GPU_RUNTIME STREQUAL "OCL")
+            set(ONEDNN_GPU_DIR ${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu CACHE FILEPATH "Path to oneDNN GPU repository")
+        endif()
 
         ExternalProject_Add(onednn_gpu_build
             # Directory Options:
             PREFIX "${ONEDNN_PREFIX_DIR}"
-            SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu"
+            SOURCE_DIR "${ONEDNN_GPU_DIR}"
             BINARY_DIR "${ONEDNN_BUILD_DIR}"
             INSTALL_DIR "${ONEDNN_INSTALL_DIR}"
             # Configure Step Options:
@@ -113,7 +118,7 @@ if(ENABLE_ONEDNN_FOR_GPU)
                 "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
                 "-DDNNL_TARGET_ARCH=${ONEDNN_TARGET_ARCH}"
                 "-DDNNL_CPU_RUNTIME=NONE"
-                "-DDNNL_GPU_RUNTIME=OCL"
+                "-DDNNL_GPU_RUNTIME=${GPU_RUNTIME}"
                 "-DDNNL_LIBRARY_NAME=${DNNL_GPU_LIBRARY_NAME}"
                 "-DCMAKE_INSTALL_PREFIX=${ONEDNN_INSTALL_DIR}"
                 "-DDNNL_ENABLE_CONCURRENT_EXEC=ON"
@@ -155,13 +160,12 @@ if(ENABLE_ONEDNN_FOR_GPU)
                 DEPENDEES install  # Ensures this runs after install
             )
         endif()
-
         set(LIB_INCLUDE_DIRS "${ONEDNN_INSTALL_DIR}/include"
-                             "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src"
-                             "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src/gpu/intel/jit/ngen"
-                             "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src/gpu/intel/jit/config"
-                             "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/src/gpu/intel/gemm/jit/include"
-                             "${CMAKE_CURRENT_SOURCE_DIR}/onednn_gpu/third_party/ngen")
+                             "${ONEDNN_GPU_DIR}/src"
+                             "${ONEDNN_GPU_DIR}/src/gpu/intel/jit/ngen"
+                             "${ONEDNN_GPU_DIR}/src/gpu/intel/jit/config"
+                             "${ONEDNN_GPU_DIR}/src/gpu/intel/gemm/jit/include"
+                             "${ONEDNN_GPU_DIR}/third_party/ngen")
         set(LIB_DEFINITIONS ENABLE_ONEDNN_FOR_GPU
                             DNNL_DLL
                             DNNL_DLL_EXPORTS
@@ -188,5 +192,5 @@ if(ENABLE_ONEDNN_FOR_GPU)
                     COMPONENT ${OV_CPACK_COMP_CORE})
         endif()
     endfunction()
-    build_onednn_gpu()
+    build_onednn_gpu(${GPU_RT_TYPE})
 endif()
diff --git a/src/plugins/intel_gpu/thirdparty/l0_onednn_gpu b/src/plugins/intel_gpu/thirdparty/l0_onednn_gpu
new file mode 160000
index 00000000000000..9cb2921cc19dc9
--- /dev/null
+++ b/src/plugins/intel_gpu/thirdparty/l0_onednn_gpu
@@ -0,0 +1 @@
+Subproject commit 9cb2921cc19dc9ca74309bb65149ac002ea56b2c
diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake
index 371ea475b13214..5a8b5ef918a3c1 100644
--- a/thirdparty/dependencies.cmake
+++ b/thirdparty/dependencies.cmake
@@ -68,7 +68,7 @@ endif()
 # LevelZero
 #
 
-if(ENABLE_INTEL_NPU)
+if(ENABLE_INTEL_GPU OR ENABLE_INTEL_NPU)
     if(ENABLE_SYSTEM_LEVEL_ZERO)
         pkg_search_module(level_zero QUIET
                           IMPORTED_TARGET
diff --git a/thirdparty/level_zero/CMakeLists.txt b/thirdparty/level_zero/CMakeLists.txt
index b78f115b32f9a9..0fbbd2ad303106 100644
--- a/thirdparty/level_zero/CMakeLists.txt
+++ b/thirdparty/level_zero/CMakeLists.txt
@@ -30,7 +30,24 @@ endif()
 set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
 add_subdirectory(level-zero EXCLUDE_FROM_ALL)
 
-set_property(TARGET ze_loader APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/level-zero/include>)
+set(ZE_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include/")
+file(GLOB_RECURSE COMPUTE_RUNTIME_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/compute-runtime/*.h")
+file(GLOB_RECURSE LEVEL_ZERO_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/level-zero/include")
+add_custom_command(OUTPUT "${ZE_INCLUDE_DIR}"
+    COMMAND "${CMAKE_COMMAND}" -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}/level-zero/include" "${ZE_INCLUDE_DIR}/level_zero"
+    COMMAND "${CMAKE_COMMAND}" -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}/compute-runtime" "${ZE_INCLUDE_DIR}/level_zero"
+    DEPENDS "${COMPUTE_RUNTIME_HEADERS}" "${LEVEL_ZERO_HEADERS}"
+    COMMENT "Copying Level Zero and compute-runtime headers..."
+)
+add_custom_target(prepare_ze_headers ALL DEPENDS "${ZE_INCLUDE_DIR}")
+add_dependencies(ze_loader prepare_ze_headers)
+
+# Allow include patterns with and without level-zero/ prefix
+set_property(TARGET ze_loader APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    $<BUILD_INTERFACE:${ZE_INCLUDE_DIR}>
+    $<BUILD_INTERFACE:${ZE_INCLUDE_DIR}/level_zero>
+)
+
 
 # This VERSION file created by L0 may cause compilation issue of oneTBB headers, so remove it
 file(REMOVE "${CMAKE_BINARY_DIR}/VERSION")
diff --git a/thirdparty/level_zero/compute-runtime/ze_intel_gpu.h b/thirdparty/level_zero/compute-runtime/ze_intel_gpu.h
new file mode 100644
index 00000000000000..d7c9f3f56aa6bf
--- /dev/null
+++ b/thirdparty/level_zero/compute-runtime/ze_intel_gpu.h
@@ -0,0 +1,819 @@
+// intel/compute-runtime 4df478c5139703c82e548a65eafbcc69923953ac
+/*
+ * Copyright (C) 2020-2025 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#ifndef _ZE_INTEL_GPU_H
+#define _ZE_INTEL_GPU_H
+
+#include <ze_api.h>
+
+#include "ze_stypes.h"
+
+#if defined(__cplusplus)
+#pragma once
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define ZE_INTEL_GPU_VERSION_MAJOR 0
+#define ZE_INTEL_GPU_VERSION_MINOR 1
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_NAME
+/// @brief Module DP properties driver extension name
+#define ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_NAME "ZE_intel_experimental_device_module_dp_properties"
+#endif // ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Module DP properties driver extension Version(s)
+typedef enum _ze_intel_device_module_dp_properties_exp_version_t {
+    ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+
+} ze_intel_device_module_dp_properties_exp_version_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Supported Dot Product flags
+typedef uint32_t ze_intel_device_module_dp_exp_flags_t;
+typedef enum _ze_intel_device_module_dp_exp_flag_t {
+    ZE_INTEL_DEVICE_MODULE_EXP_FLAG_DP4A = ZE_BIT(0), ///< Supports DP4A operation
+    ZE_INTEL_DEVICE_MODULE_EXP_FLAG_DPAS = ZE_BIT(1), ///< Supports DPAS operation
+    ZE_INTEL_DEVICE_MODULE_EXP_FLAG_FORCE_UINT32 = 0x7fffffff
+
+} ze_intel_device_module_dp_exp_flag_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Device Module dot product properties queried using
+///        ::zeDeviceGetModuleProperties
+///
+/// @details
+///     - This structure may be passed to ::zeDeviceGetModuleProperties, via
+///       `pNext` member of ::ze_device_module_properties_t.
+/// @brief Device module dot product properties
+typedef struct _ze_intel_device_module_dp_exp_properties_t {
+    ze_structure_type_ext_t stype = ZE_STRUCTURE_INTEL_DEVICE_MODULE_DP_EXP_PROPERTIES; ///< [in] type of this structure
+    void *pNext;                                                                        ///< [in,out][optional] must be null or a pointer to an extension-specific
+                                                                                        ///< structure (i.e. contains sType and pNext).
+    ze_intel_device_module_dp_exp_flags_t flags;                                        ///< [out] 0 (none) or a valid combination of ::ze_intel_device_module_dp_flag_t
+} ze_intel_device_module_dp_exp_properties_t;
+
+#ifndef ZE_INTEL_COMMAND_LIST_MEMORY_SYNC
+/// @brief Cmd List memory sync extension name
+#define ZE_INTEL_COMMAND_LIST_MEMORY_SYNC "ZE_intel_experimental_command_list_memory_sync"
+#endif // ZE_INTEL_COMMAND_LIST_MEMORY_SYNC
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Cmd List memory sync extension Version(s)
+typedef enum _ze_intel_command_list_memory_sync_exp_version_t {
+    ZE_INTEL_COMMAND_LIST_MEMORY_SYNC_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZE_INTEL_COMMAND_LIST_MEMORY_SYNC_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_INTEL_COMMAND_LIST_MEMORY_SYNC_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+} ze_intel_command_list_memory_sync_exp_version_t;
+
+#ifndef ZE_INTEL_STRUCTURE_TYPE_DEVICE_COMMAND_LIST_WAIT_ON_MEMORY_DATA_SIZE_EXP_DESC
+/// @brief stype for _ze_intel_device_command_list_wait_on_memory_data_size_exp_desc_t
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Extended descriptor for cmd list memory sync
+///
+/// @details
+///     - Implementation must support ::ZE_intel_experimental_command_list_memory_sync extension
+///     - May be passed to ze_device_properties_t through pNext.
+typedef struct _ze_intel_device_command_list_wait_on_memory_data_size_exp_desc_t {
+    ze_structure_type_ext_t stype;               ///< [in] type of this structure
+    const void *pNext;                           ///< [in][optional] must be null or a pointer to an extension-specific
+                                                 ///< structure (i.e. contains stype and pNext).
+    uint32_t cmdListWaitOnMemoryDataSizeInBytes; /// <out> Defines supported data size for zexCommandListAppendWaitOnMemory[64] API
+} ze_intel_device_command_list_wait_on_memory_data_size_exp_desc_t;
+
+#ifndef ZEX_INTEL_EVENT_SYNC_MODE_EXP_NAME
+/// @brief Event sync mode extension name
+#define ZEX_INTEL_EVENT_SYNC_MODE_EXP_NAME "ZEX_intel_experimental_event_sync_mode"
+#endif // ZE_INTEL_EVENT_SYNC_MODE_EXP_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Event sync mode extension Version(s)
+typedef enum _zex_intel_event_sync_mode_exp_version_t {
+    ZEX_INTEL_EVENT_SYNC_MODE_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZEX_INTEL_EVENT_SYNC_MODE_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZEX_INTEL_EVENT_SYNC_MODE_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+} zex_intel_event_sync_mode_exp_version_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Supported event sync mode flags
+typedef uint32_t zex_intel_event_sync_mode_exp_flags_t;
+typedef enum _zex_intel_event_sync_mode_exp_flag_t {
+    ZEX_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT = ZE_BIT(0),          ///< Low power host synchronization mode, for better CPU utilization
+    ZEX_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT = ZE_BIT(1),        ///< Generate interrupt when Event is signalled on Device
+    ZEX_INTEL_EVENT_SYNC_MODE_EXP_FLAG_EXTERNAL_INTERRUPT_WAIT = ZE_BIT(2), ///< Host synchronization APIs wait for external interrupt. Can be used only for Events created via zexCounterBasedEventCreate
+    ZEX_INTEL_EVENT_SYNC_MODE_EXP_EXP_FLAG_FORCE_UINT32 = 0x7fffffff
+
+} zex_intel_event_sync_mode_exp_flag_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Extended descriptor for event sync mode
+///
+/// @details
+///     - Implementation must support ::ZEX_intel_experimental_event_sync_mode extension
+///     - May be passed to ze_event_desc_t through pNext.
+typedef struct _zex_intel_event_sync_mode_exp_desc_t {
+    ze_structure_type_ext_t stype;                       ///< [in] type of this structure
+    const void *pNext;                                   ///< [in][optional] must be null or a pointer to an extension-specific
+                                                         ///< structure (i.e. contains stype and pNext).
+    zex_intel_event_sync_mode_exp_flags_t syncModeFlags; /// <in> valid combination of ::ze_intel_event_sync_mode_exp_flag_t
+    uint32_t externalInterruptId;                        /// <in> External interrupt id. Used only when ZEX_INTEL_EVENT_SYNC_MODE_EXP_FLAG_EXTERNAL_INTERRUPT_WAIT flag is set
+} zex_intel_event_sync_mode_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Forward-declare zex_intel_queue_allocate_msix_hint_exp_desc_t
+typedef struct _zex_intel_queue_allocate_msix_hint_exp_desc_t zex_intel_queue_allocate_msix_hint_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Command queue descriptor for allocating unique msix. This structure may be
+/// passed as pNext member of ::ze_command_queue_desc_t.
+
+typedef struct _zex_intel_queue_allocate_msix_hint_exp_desc_t {
+    ze_structure_type_ext_t stype; ///< [in] type of this structure
+    const void *pNext;             ///< [in][optional] must be null or a pointer to an extension-specific
+                                   ///< structure (i.e. contains stype and pNext).
+    ze_bool_t uniqueMsix;          ///< [in] If set, try to allocate unique msix for command queue.
+                                   ///< If not set, driver will follow default behaviour. It may share msix for signaling completion with other queues.
+                                   ///< Number of unique msixes may be limited. On unsuccessful allocation, queue or immediate cmd list creation API fallbacks to default behaviour.
+
+} zex_intel_queue_allocate_msix_hint_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Command queue descriptor for enabling copy operations offload. This structure may be
+/// passed as pNext member of ::ze_command_queue_desc_t.
+
+typedef struct _zex_intel_queue_copy_operations_offload_hint_exp_desc_t {
+    ze_structure_type_ext_t stype; ///< [in] type of this structure
+    const void *pNext;             ///< [in][optional] must be null or a pointer to an extension-specific
+                                   ///< structure (i.e. contains stype and pNext).
+    ze_bool_t copyOffloadEnabled;  ///< [in] If set, try to offload copy operations to different engines. Applicable only for compute queues.
+                                   ///< This is only a hint. Driver may ignore it per append call, based on platform capabilities or internal heuristics.
+                                   ///< If not set, driver will follow default behaviour. Copy operations will be submitted to same engine as compute operations.
+
+} zex_intel_queue_copy_operations_offload_hint_exp_desc_t;
+
+#ifndef ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_NAME
+/// @brief Queue copy operations offload hint extension name
+#define ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_NAME "ZEX_intel_experimental_queue_copy_operations_offload_hint"
+#endif // ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Queue copy operations offload hint extension version(s)
+typedef enum _zex_intel_queue_copy_operations_offload_hint_exp_version_t {
+    ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZEX_INTEL_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+} zex_intel_queue_copy_operations_offload_hint_exp_version_t;
+
+#if ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Command queue flag for enabling copy operations offload
+///
+/// If set, try to offload copy operations to different engines. Applicable only for compute queues.
+/// This is only a hint. Driver may ignore it per append call, based on platform capabilities or internal heuristics.
+#define ZE_COMMAND_QUEUE_FLAG_COPY_OFFLOAD_HINT ZE_BIT(2)
+
+#endif // ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+
+#ifndef ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_NAME
+/// @brief Extension name for query to read the Intel Level Zero Driver Version String
+#define ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_NAME "ZE_intel_get_driver_version_string"
+#endif // ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Query to read the Intel Level Zero Driver Version String extension version(s)
+typedef enum _ze_intel_get_driver_version_string_exp_version_t {
+    ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_INTEL_GET_DRIVER_VERSION_STRING_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+} ze_intel_get_driver_version_string_exp_version_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Supported 2D Block Array flags
+typedef uint32_t ze_intel_device_block_array_exp_flags_t;
+typedef enum _ze_intel_device_block_array_exp_flag_t {
+    ZE_INTEL_DEVICE_EXP_FLAG_2D_BLOCK_STORE = ZE_BIT(0), ///< Supports store operation
+    ZE_INTEL_DEVICE_EXP_FLAG_2D_BLOCK_LOAD = ZE_BIT(1),  ///< Supports load operation
+    ZE_INTEL_DEVICE_EXP_FLAG_2D_BLOCK_FORCE_UINT32 = 0x7fffffff
+
+} ze_intel_device_block_array_exp_flag_t;
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
+/// @brief Device 2D block array properties driver extension name
+#define ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME "ZE_intel_experimental_device_block_array_properties"
+#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
+
+/// @brief Device 2D block array properties queried using
+///        ::zeDeviceGetProperties
+///
+/// @details
+///     - This structure may be passed to ::zeDeviceGetProperties, via
+///       `pNext` member of ::ze_device_properties_t.
+/// @brief Device 2D block array properties
+
+typedef struct _ze_intel_device_block_array_exp_properties_t {
+    ze_structure_type_ext_t stype;                 ///< [in] type of this structure
+    void *pNext;                                   ///< [in,out][optional] must be null or a pointer to an extension-specific
+                                                   ///< structure (i.e. contains sType and pNext).
+    ze_intel_device_block_array_exp_flags_t flags; ///< [out] 0 (none) or a valid combination of ::ze_intel_device_block_array_exp_flag_t
+} ze_intel_device_block_array_exp_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Device 2D block array properties driver extension versions
+typedef enum _ze_intel_device_block_array_exp_properties_version_t {
+    ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_PROPERTIES_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0), ///< version 1.0
+    ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_PROPERTIES_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_PROPERTIES_VERSION_FORCE_UINT32 = 0x7fffffff
+
+} ze_intel_device_block_array_exp_properties_version_t;
+
+/// @brief Query to read the Intel Level Zero Driver Version String
+///
+/// @details
+///     - The application may call this function from simultaneous threads.
+///     - The implementation of this function should be lock-free.
+///     - The Driver Version String will be in the format:
+///     - Major.Minor.Patch+Optional per semver guidelines https://semver.org/#spec-item-10
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+ZE_APIEXPORT ze_result_t ZE_APICALL
+zeIntelGetDriverVersionString(
+    ze_driver_handle_t hDriver, ///< [in] Driver handle whose version is being read.
+    char *pDriverVersion,       ///< [in,out] pointer to driver version string.
+    size_t *pVersionSize);      ///< [in,out] pointer to the size of the driver version string.
+                                ///< if size is zero, then the size of the version string is returned.
+
+/// @brief Get Kernel Program Binary
+///
+/// @details
+///     - A valid kernel handle must be created with zeKernelCreate.
+///     - Returns Intel Graphics Assembly (GEN ISA) format binary program data for kernel handle.
+///     - The application may call this function from simultaneous threads.
+///     - The implementation of this function should be lock-free.
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef ZEX_MEMORY_FREE_CALLBACK_EXT_NAME
+/// @brief Memory Free Callback Extension Name
+#define ZEX_MEMORY_FREE_CALLBACK_EXT_NAME "ZEX_extension_memory_free_callback"
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Memory Free Callback Extension Version(s)
+typedef enum _zex_memory_free_callback_ext_version_t {
+    ZEX_MEMORY_FREE_CALLBACK_EXT_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZEX_MEMORY_FREE_CALLBACK_EXT_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZEX_MEMORY_FREE_CALLBACK_EXT_VERSION_FORCE_UINT32 = 0x7fffffff        ///< Value marking end of ZEX_MEMORY_FREE_CALLBACK_EXT_VERSION_* ENUMs
+
+} zex_memory_free_callback_ext_version_t;
+
+#ifndef ZEX_STRUCTURE_TYPE_MEMORY_FREE_CALLBACK_EXT_DESC
+/// @brief stype for _zex_memory_free_callback_ext_desc_t
+#endif
+
+/**
+ * @brief Callback function type for memory free events.
+ *
+ * This function is called when a memory free operation occurs.
+ *
+ * @param pUserData Pointer to user-defined data passed to the callback.
+ */
+typedef void (*zex_mem_free_callback_fn_t)(void *pUserData);
+
+/**
+ * @brief Descriptor for a memory free callback extension.
+ *
+ * This structure is used to specify a callback function that will be invoked when memory is freed.
+ *
+ * Members:
+ * - stype: Specifies the type of this structure.
+ * - pNext: Optional pointer to an extension-specific structure; must be null or point to a structure containing stype and pNext.
+ * - pfnCallback: Callback function to be called when memory is freed.
+ * - pUserData: Optional user data to be passed to the callback function.
+ */
+typedef struct _zex_memory_free_callback_ext_desc_t {
+    ze_structure_type_ext_t stype;          ///< [in] type of this structure
+    const void *pNext;                      ///< [in][optional] must be null or a pointer to an extension-specific
+                                            ///< structure (i.e. contains stype and pNext).
+    zex_mem_free_callback_fn_t pfnCallback; // [in] callback function to be called on memory free
+    void *pUserData;                        // [in][optional] user data passed to callback
+} zex_memory_free_callback_ext_desc_t;
+
+/**
+ * @brief Registers a callback to be invoked when memory is freed.
+ *
+ * This function allows the user to register a callback that will be called
+ * whenever the specified memory is freed within the given context.
+ *
+ * @param hContext
+ *        [in] Handle to the context in which the memory was allocated.
+ * @param hFreeCallbackDesc
+ *        [in] Pointer to a descriptor specifying the callback function and its parameters.
+ * @param ptr
+ *        [in] Pointer to the memory for which the free callback is to be registered.
+ *
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_UNINITIALIZED
+///     - ::ZE_RESULT_ERROR_DEVICE_LOST
+///     - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY
+///     - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
+///     - ::ZE_RESULT_ERROR_INVALID_ARGUMENT
+///         + `nullptr == hFreeCallbackDesc`
+///         + `nullptr == ptr`
+ *
+ * @note The callback will be invoked when the specified memory is freed.
+ */
+ZE_APIEXPORT ze_result_t ZE_APICALL zexMemFreeRegisterCallbackExt(ze_context_handle_t hContext, zex_memory_free_callback_ext_desc_t *hFreeCallbackDesc, void *ptr);
+#endif // ZEX_MEMORY_FREE_CALLBACK_EXT_NAME
+
+#ifndef ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME
+/// @brief Get Kernel Program Binary experimental name
+#define ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME "ZE_intel_experimental_kernel_get_program_binary"
+#endif // ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Intel Kernel Get Binary Extension Version(s)
+typedef enum _ze_intel_kernel_get_binary_exp_version_t {
+    ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_INTEL_KERNEL_GET_PROGRAM_BINARY_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+
+} ze_intel_kernel_get_binary_exp_version_t;
+
+ZE_APIEXPORT ze_result_t ZE_APICALL
+zeIntelKernelGetBinaryExp(
+    ze_kernel_handle_t hKernel, ///< [in] Kernel handle
+    size_t *pSize,              ///< [in, out] pointer to variable with size of GEN ISA binary
+    char *pKernelBinary         ///< [in,out] pointer to storage area for GEN ISA binary function
+);
+
+#ifndef ZE_INTEL_DRM_FORMAT_MODIFIER_EXP_NAME
+/// @brief DRM format modifier extension name
+#define ZE_INTEL_DRM_FORMAT_MODIFIER_EXP_NAME "ZE_intel_experimental_drm_format_modifier"
+#endif // ZE_INTEL_DRM_FORMAT_MODIFIER_EXP_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief DRM format modifier extension Version(s)
+typedef enum _ze_intel_drm_format_modifier_exp_version_t {
+    ZE_INTEL_DRM_FORMAT_MODIFIER_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZE_INTEL_DRM_FORMAT_MODIFIER_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_INTEL_DRM_FORMAT_MODIFIER_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+} ze_intel_drm_format_modifier_exp_version_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Image DRM format modifier properties
+///
+/// @details
+///     - This structure may be passed as pNext member of ::ze_image_desc_t,
+///       when using a DRM format modifier.
+///     - Properties struct for providing user with the selected drm format modifier for the image
+///     - This is useful if the application wants to export the image to another API that requires the DRM format modifier
+///     - The application can query the chosen DRM format modifier for the image.
+///     - The application can use this information to choose a DRM format modifier for the image during creation
+typedef struct _ze_intel_image_selected_format_modifier_exp_properties_t {
+    ze_structure_type_t stype;  ///< [in] type of this structure
+    const void *pNext;          ///< [in][optional] must be null or a pointer to an extension-specific
+                                ///< structure (i.e. contains stype and pNext).
+    uint64_t drmFormatModifier; ///< [out] DRM format modifier
+} ze_intel_image_selected_format_modifier_exp_properties_t;
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Image DRM format modifier create list
+///
+/// @details
+///     - This structure may be passed as pNext member of ::ze_image_desc_t,
+///       when providing a list of DRM format modifiers to choose from during image creation.
+///     - This is a descriptor for creating image with the specified list of drm format modifier
+///     - If the user passes a list struct, then implementation chooses one from the list of drm modifiers as it sees fit.
+///     - If user wants to pass a single drm modifier then they can set the drmFormatModifierCount to 1 and pass the single drm modifier in pDrmFormatModifiers
+typedef struct _ze_intel_image_format_modifier_create_list_exp_desc_t {
+    ze_structure_type_t stype;       ///< [in] type of this structure
+    const void *pNext;               ///< [in][optional] must be null or a pointer to an extension-specific
+                                     ///< structure (i.e. contains stype and pNext).
+    uint32_t drmFormatModifierCount; ///< [in] number of DRM format modifiers in the list
+    uint64_t *pDrmFormatModifiers;   ///< [in][range(0, drmFormatModifierCount)] array of DRM format modifiers
+} ze_intel_image_format_modifier_create_list_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Image DRM format modifier import descriptor
+///
+/// @details
+///     - This structure may be passed as pNext member of ::ze_image_desc_t,
+///       when importing an image with a specific DRM format modifier.
+///     - The pNext chain is setup accordingly in ze_image_desc_t prior to calling zeImageCreate API
+typedef struct _ze_intel_image_format_modifier_import_exp_desc_t {
+    ze_structure_type_t stype;  ///< [in] type of this structure
+    const void *pNext;          ///< [in][optional] must be null or a pointer to an extension-specific
+                                ///< structure (i.e. contains stype and pNext).
+    uint64_t drmFormatModifier; ///< [in] DRM format modifier to use for the image
+} ze_intel_image_format_modifier_import_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Buffer DRM format modifier create list
+///
+/// @details
+///     - This structure may be passed as pNext member of ::ze_device_mem_alloc_desc_t,
+///       when providing a list of DRM format modifiers to choose from during buffer creation.
+///     - This is a descriptor for creating buffer with the specified list of drm format modifier
+///     - If the user passes a list struct, then implementation chooses one from the list of drm modifiers as it sees fit.
+///     - If user wants to pass a single drm modifier then they can set the drmFormatModifierCount to 1 and pass the single drm modifier in pDrmFormatModifiers
+///     - The pNext chain is setup accordingly in ze_device_mem_alloc_desc_t prior to calling zeMemAllocDevice API
+typedef struct _ze_intel_mem_format_modifier_create_list_exp_desc_t {
+    ze_structure_type_t stype;       ///< [in] type of this structure
+    const void *pNext;               ///< [in][optional] must be null or a pointer to an extension-specific
+                                     ///< structure (i.e. contains stype and pNext).
+    uint32_t drmFormatModifierCount; ///< [in] number of DRM format modifiers in the list
+    uint64_t *pDrmFormatModifiers;   ///< [in][range(0, drmFormatModifierCount)] array of DRM format modifiers
+} ze_intel_mem_format_modifier_create_list_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Buffer DRM format modifier import descriptor
+///
+/// @details
+///     - This structure may be passed as pNext member of ::ze_device_mem_alloc_desc_t,
+///       when importing a buffer with a specific DRM format modifier.
+///     - This descriptor must be used in conjunction with ze_external_memory_import_fd_t. If not, implementation will return an error.
+///     - The pNext chain is setup accordingly in ze_device_mem_alloc_desc_t prior to calling zeMemAllocDevice API
+typedef struct _ze_intel_mem_format_modifier_import_exp_desc_t {
+    ze_structure_type_t stype;  ///< [in] type of this structure
+    const void *pNext;          ///< [in][optional] must be null or a pointer to an extension-specific
+                                ///< structure (i.e. contains stype and pNext).
+    uint64_t drmFormatModifier; ///< [in] DRM format modifier to use for the buffer
+} ze_intel_mem_format_modifier_import_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Buffer DRM format modifier properties
+///
+/// @details
+///     - This structure may be passed as pNext member of ::ze_memory_allocation_properties_t,
+///       when querying the DRM format modifier of a buffer.
+///     - Properties struct for providing user with the selected drm format modifier for the buffer
+///     - This is useful if the application wants to export the buffer to another API that requires the DRM format modifier
+///     - The application can query the chosen DRM format modifier for the buffer via zeMemGetAllocProperties API
+typedef struct _ze_intel_mem_selected_format_modifier_exp_properties_t {
+    ze_structure_type_t stype;  ///< [in] type of this structure
+    const void *pNext;          ///< [in][optional] must be null or a pointer to an extension-specific
+                                ///< structure (i.e. contains stype and pNext).
+    uint64_t drmFormatModifier; ///< [out] DRM format modifier
+} ze_intel_mem_selected_format_modifier_exp_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Query for supported DRM format modifiers for a given image descriptor
+///
+/// @details
+///     - The application may call this function from simultaneous threads.
+///     - The implementation of this function should be lock-free.
+///     - This function can be used to query supported DRM format modifiers for a specific image description.
+///     - User can use this API in two ways:
+///         1. Set pCount to the address of a uint32_t with value 0 and pDrmFormatModifiers to nullptr
+///            to query just the number of supported DRM format modifiers.
+///         2. Set pCount to the address of a uint32_t with the number of elements in the pDrmFormatModifiers
+///            array to retrieve the list of supported DRM format modifiers.
+///     - The application can use the returned DRM format modifiers to:
+///         1. Create L0 images with supported DRM format modifiers.
+///         2. Compare with DRM format modifiers from other APIs (like Vulkan) to find common
+///            modifiers that work for interop scenarios.
+///
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_UNINITIALIZED
+///     - ::ZE_RESULT_ERROR_DEVICE_LOST
+///     - ::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT
+///     - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY
+///     - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE
+///         + `nullptr == hDevice`
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER
+///         + `nullptr == pCount`
+///     - ::ZE_RESULT_ERROR_INVALID_IMAGE_DESC
+///         + The image description doesn't match the device capabilities
+ze_result_t ZE_APICALL
+zeIntelImageGetFormatModifiersSupportedExp(
+    ze_device_handle_t hDevice,        ///< [in] handle of the device
+    const ze_image_desc_t *pImageDesc, ///< [in] pointer to image descriptor
+    uint32_t *pCount,                  ///< [in,out] pointer to the number of DRM format modifiers.
+                                       ///< if count is zero, then the driver shall update the value with the
+                                       ///< total number of supported DRM format modifiers for the image format.
+                                       ///< if count is greater than the number of supported DRM format modifiers,
+                                       ///< then the driver shall update the value with the correct number of supported DRM format modifiers.
+    uint64_t *pDrmFormatModifiers      ///< [in,out][optional][range(0, *pCount)] array of supported DRM format modifiers
+);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Query for supported DRM format modifiers for a memory allocation descriptor
+///
+/// @details
+///     - The application may call this function from simultaneous threads.
+///     - The implementation of this function should be lock-free.
+///     - This function can be used to query supported DRM format modifiers for a specific memory allocation description.
+///     - User can use this API in two ways:
+///         1. Set pCount to the address of a uint32_t with value 0 and pDrmFormatModifiers to nullptr
+///            to query just the number of supported DRM format modifiers.
+///         2. Set pCount to the address of a uint32_t with the number of elements in the pDrmFormatModifiers
+///            array to retrieve the list of supported DRM format modifiers.
+///     - The application can use the returned DRM format modifiers to:
+///         1. Create L0 memory allocations with supported DRM format modifiers.
+///         2. Compare with DRM format modifiers from other APIs (like Vulkan) to find common
+///            modifiers that work for interop scenarios.
+///
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_UNINITIALIZED
+///     - ::ZE_RESULT_ERROR_DEVICE_LOST
+///     - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY
+///     - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE
+///         + `nullptr == hContext`
+ze_result_t ZE_APICALL
+zeIntelMemGetFormatModifiersSupportedExp(
+    ze_context_handle_t hContext,                  ///< [in] handle of the context
+    const ze_device_mem_alloc_desc_t *pDeviceDesc, ///< [in] pointer to device memory allocation descriptor
+    size_t size,                                   ///< [in] size in bytes to allocate
+    size_t alignment,                              ///< [in] minimum alignment in bytes for the allocation
+    ze_device_handle_t hDevice,                    ///< [in] handle of the device
+    uint32_t *pCount,                              ///< [in,out] pointer to the number of DRM format modifiers.
+                                                   ///< if count is zero, then the driver shall update the value with the
+                                                   ///< total number of supported DRM format modifiers for the memory allocation.
+                                                   ///< if count is greater than the number of supported DRM format modifiers,
+                                                   ///< then the driver shall update the value with the correct number of supported DRM format modifiers.
+    uint64_t *pDrmFormatModifiers                  ///< [in,out][optional][range(0, *pCount)] array of supported DRM format modifiers
+);
+
+/// @brief Get priority levels
+///
+/// @details
+///    - The application may call this function from simultaneous threads.
+///    - The implementation of this function should be lock-free.
+///    - Returns priority levels supported by the device
+///    - lowestPriority reports the numerical value that corresponds to lowest queue priority
+///    - highesPriority reports the numerical value that corresponds to highest queue priority
+///    - Lower numbers indicate greater priorities
+///    - The range of meaningful queue properties is represented by [*highestPriority, *lowestPriority]
+///    - Priority passed upon queue creation would automatically clamp down or up to the nearest supported value
+///    - 0 means default priority
+///
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER
+ze_result_t ZE_APICALL zeDeviceGetPriorityLevels(
+    ze_device_handle_t hDevice,
+    int32_t *lowestPriority,
+    int32_t *highestPriority);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Descriptor used for setting priority on command queues and immediate command lists.
+/// This structure may be passed as pNext member of ::ze_command_queue_desc_t.
+typedef struct _ze_queue_priority_desc_t {
+    ze_structure_type_ext_t stype; ///< [in] type of this structure
+    const void *pNext;             ///< [in][optional] must be null or a pointer to an extension-specific structure
+    int priority;                  ///< [in] priority of the queue
+} ze_queue_priority_desc_t;
+
+/// @brief Get default context associated with default driver
+///
+/// @details
+///     - The application may call this function from simultaneous threads.
+///     - The implementation of this function should be lock-free.
+///     - Default context contains all devices within default driver instance
+/// @returns
+///     - Context handle associated with default driver
+ZE_APIEXPORT ze_context_handle_t ZE_APICALL zerGetDefaultContext();
+
+/// @brief Get Device Identifier
+///
+/// @details
+///     - The application may call this function from simultaneous threads.
+///     - The implementation of this function should be lock-free.
+///     - Returned identifier is a 32-bit unsigned integer that is unique to the driver.
+///     - The identifier can be used then in zerTranslateIdentifierToDeviceHandle to get the device handle.
+/// @returns
+///     - 32-bit unsigned integer identifier
+ZE_APIEXPORT uint32_t ZE_APICALL zerTranslateDeviceHandleToIdentifier(ze_device_handle_t hDevice); ///< [in] handle of the device
+
+/// @brief Translate Device Identifier to Device Handle from default Driver
+///
+/// @details
+///    - The application may call this function from simultaneous threads.
+///    - The implementation of this function should be lock-free.
+///    - Returned device is associated to default driver handle.
+/// @returns
+///     - device handle associated with the identifier
+ZE_APIEXPORT ze_device_handle_t ZE_APICALL zerTranslateIdentifierToDeviceHandle(uint32_t identifier); ///< [in] integer identifier of the device
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieves a string describing the last error code returned by the
+///        default driver in the current thread.
+///
+/// @details
+///     - String returned is thread local.
+///     - String is only updated on calls returning an error, i.e., not on calls
+///       returning ::ZE_RESULT_SUCCESS.
+///     - String may be empty if driver considers error code is already explicit
+///       enough to describe cause.
+///     - Memory pointed to by ppString is owned by the driver.
+///     - String returned is null-terminated.
+///
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_UNINITIALIZED
+///     - ::ZE_RESULT_ERROR_DEVICE_LOST
+///     - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY
+///     - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER
+///         + `nullptr == ppString`
+ZE_APIEXPORT ze_result_t ZE_APICALL
+zerGetLastErrorDescription(
+    const char **ppString ///< [in,out] pointer to a null-terminated array of characters describing
+                          ///< cause of error.
+);
+
+#if ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+
+/// @brief Get default context associated with driver
+///
+/// @details
+///     - The application may call this function from simultaneous threads.
+///     - The implementation of this function should be lock-free.
+///     - Default context contains all devices within driver instance
+/// @returns
+///     - Context handle associated with driver
+ze_context_handle_t ZE_APICALL zeDriverGetDefaultContext(ze_driver_handle_t hDriver); ///> [in] handle of the driver
+
+/// @brief Global device synchronization
+///
+/// @details
+///    - The application may call this function from simultaneous threads.
+///    - The implementation of this function should be lock-free.
+///    - Ensures that everything that was submitted to the device is completed.
+///    - Ensures that all submissions in all queues on device are completed.
+///    - It is not allowed to call this function while some command list are in graph capture mode.
+///    - Returns error if error is detected during execution on device.
+///    - Hangs indefinitely if GPU execution is blocked on non signaled event.
+///
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_DEVICE_LOST
+ze_result_t ZE_APICALL zeDeviceSynchronize(ze_device_handle_t hDevice); ///> [in] handle of the device
+
+/// @brief Append with arguments
+///
+/// @details
+///    - The application may call this function from simultaneous threads.
+///    - The implementation of this function should be lock-free.
+///    - Appends kernel to command list with arguments.
+///    - Kernel object state is updated with new arguments, as if separate zeKernelSetArgumentValue were called.
+///    - If argument is SLM (size), then SLM size in bytes for this resource is provided under pointer on specific index and its type is size_t.
+///    - If argument is an immediate type (i.e. structure, non pointer type), then values under pointer must contain full size of immediate type.
+///
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_DEVICE_LOST
+///     - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY
+///     - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
+///     - ::ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE
+///         + `nullptr == hCommandList`
+///         + `nullptr == hKernel`
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER
+///         + `nullptr == pArguments`
+///     - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT
+///     - ::ZE_RESULT_ERROR_INVALID_SIZE
+///         + `(nullptr == phWaitEvents) && (0 < numWaitEvents)`
+typedef struct _ze_group_size_t {
+    uint32_t groupSizeX; ///< [in] local work-group size in X dimension
+    uint32_t groupSizeY; ///< [in] local work-group size in Y dimension
+    uint32_t groupSizeZ; ///< [in] local work-group size in Z dimension
+
+} ze_group_size_t;
+
+ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelWithArguments(
+    ze_command_list_handle_t hCommandList, ///< [in] handle of the command list
+    ze_kernel_handle_t hKernel,            ///< [in] handle of the kernel object
+    const ze_group_count_t groupCounts,    ///< [in] thread group counts
+    const ze_group_size_t groupSizes,      ///< [in] thread group sizes
+    void **pArguments,                     ///< [in] kernel arguments; pointer to list where each argument represents a pointer to the argument value on specific index
+    const void *pNext,                     ///< [in][optional] extensions
+    ze_event_handle_t hSignalEvent,        ///< [in][optional] handle of the event to signal on completion
+    uint32_t numWaitEvents,                ///< [in][optional] number of events to wait on before launching
+    ze_event_handle_t *phWaitEvents);      ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait on before launching
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Extension descriptor for cooperative kernel launch via pNext chain.
+/// @details
+///     - This structure can be passed through pNext to zeCommandListAppendLaunchKernelWithParameters
+typedef struct _ze_command_list_append_launch_kernel_param_cooperative_desc_t {
+    ze_structure_type_ext_t stype; ///< [in] Type of this structure (e.g. ZE_STRUCTURE_TYPE_COMMAND_LIST_APPEND_PARAM_COOPERATIVE_DESC)
+    const void *pNext;             ///< [in][optional] Pointer to the next extension-specific structure
+    ze_bool_t isCooperative;       ///< [in] Indicates if the kernel should be launched as cooperative
+} ze_command_list_append_launch_kernel_param_cooperative_desc_t;
+/// @brief Append with parameters
+///
+/// @details
+///    - The application may call this function from simultaneous threads.
+///    - The implementation of this function should be lock-free.
+///    - Appends kernel to command list with additional parameters via pNext chain.
+///    - Allows passing core and extension descriptors (e.g. cooperative kernel).
+///
+/// @returns
+///     - ::ZE_RESULT_SUCCESS
+///     - ::ZE_RESULT_ERROR_DEVICE_LOST
+///     - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY
+///     - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
+///     - ::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE
+///         + `nullptr == hCommandList`
+///         + `nullptr == hKernel`
+///     - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER
+///         + `nullptr == pGroupCounts`
+///     - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT
+///     - ::ZE_RESULT_ERROR_INVALID_SIZE
+///         + `(nullptr == phWaitEvents) && (0 < numWaitEvents)`
+ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelWithParameters(
+    ze_command_list_handle_t hCommandList, ///< [in] handle of the command list
+    ze_kernel_handle_t hKernel,            ///< [in] handle of the kernel object
+    const ze_group_count_t *pGroupCounts,  ///< [in] thread group launch arguments
+    const void *pNext,                     ///< [in][optional] additional parameters (pNext chain)
+    ze_event_handle_t hSignalEvent,        ///< [in][optional] handle of the event to signal on completion
+    uint32_t numWaitEvents,                ///< [in][optional] number of events to wait on before launching
+    ze_event_handle_t *phWaitEvents        ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait on before launching
+);
+
+#endif // ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+static const ze_device_mem_alloc_desc_t defaultIntelDeviceMemDesc = {
+    ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, // stype
+    nullptr,                                 // pNext
+    ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED,    // flags
+    0                                        // ordinal
+};
+
+static const ze_host_mem_alloc_desc_t defaultIntelHostMemDesc = {
+    ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC,                                             // stype
+    nullptr,                                                                           // pNext
+    ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED | ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT // flags
+};
+
+static const ze_command_queue_desc_t defaultIntelCommandQueueDesc = {
+    ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,                                     // stype
+    nullptr,                                                                  // pNext
+    0,                                                                        // ordinal
+    0,                                                                        // index
+    ZE_COMMAND_QUEUE_FLAG_IN_ORDER | ZE_COMMAND_QUEUE_FLAG_COPY_OFFLOAD_HINT, // flags
+    ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,                                       // mode
+    ZE_COMMAND_QUEUE_PRIORITY_NORMAL                                          // priority
+};
+
+#if ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef ZE_EXTERNAL_MEMORY_MAPPING_EXT_NAME
+/// @brief External Memory Mapping Extension Name
+#define ZE_EXTERNAL_MEMORY_MAPPING_EXT_NAME "ZE_extension_external_memmap_sysmem"
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief External Memory Mapping Extension Version(s)
+typedef enum _ze_external_memmap_sysmem_ext_version_t {
+    ZE_EXTERNAL_MEMMAP_SYSMEM_EXT_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZE_EXTERNAL_MEMMAP_SYSMEM_EXT_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_EXTERNAL_MEMMAP_SYSMEM_EXT_VERSION_FORCE_UINT32 = 0x7fffffff        ///< Value marking end of ZE_EXTERNAL_MEMMAP_SYSMEM_EXT_VERSION_* ENUMs
+
+} ze_external_memmap_sysmem_ext_version_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Maps external system memory for an allocation
+///
+/// @details
+///     - This structure may be passed to ::zeMemAllocHost, via the `pNext`
+///       member of ::ze_host_mem_alloc_desc_t to map system memory for a host
+///       allocation.
+///     - The system memory pointer and size being mapped must be page aligned
+///       based on the supported page sizes on the device.
+typedef struct _ze_external_memmap_sysmem_ext_desc_t {
+    ze_structure_type_ext_t stype; ///< [in] type of this structure
+    const void *pNext;             ///< [in][optional] must be null or a pointer to an extension-specific
+                                   ///< structure (i.e. contains stype and pNext).
+    const void *pSystemMemory;     ///< [in] system memory pointer to map; must be page-aligned.
+    const uint64_t size;           ///< [in] size of the system memory to map; must be page-aligned.
+
+} ze_external_memmap_sysmem_ext_desc_t;
+#endif // ZE_EXTERNAL_MEMORY_MAPPING_EXT_NAME
+
+#endif // ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+#endif
diff --git a/thirdparty/level_zero/compute-runtime/ze_stypes.h b/thirdparty/level_zero/compute-runtime/ze_stypes.h
new file mode 100644
index 00000000000000..50c9dbaa4e35c2
--- /dev/null
+++ b/thirdparty/level_zero/compute-runtime/ze_stypes.h
@@ -0,0 +1,65 @@
+// intel/compute-runtime 4df478c5139703c82e548a65eafbcc69923953ac
+/*
+ * Copyright (C) 2024-2025 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#ifndef _ZE_STYPES_H
+#define _ZE_STYPES_H
+
+#include <ze_api.h>
+#include <zes_api.h>
+#include <zet_api.h>
+
+#include <cstdint>
+using ze_structure_type_ext_t = uint32_t;
+using zet_structure_type_ext_t = uint32_t;
+using zes_structure_type_ext_t = uint32_t;
+
+#define ZE_STRUCTURE_TYPE_SYNCHRONIZED_DISPATCH_EXP_DESC static_cast<ze_structure_type_ext_t>(0x00020020)
+#define ZE_STRUCTURE_TYPE_INTEL_MEDIA_COMMUNICATION_DESC static_cast<ze_structure_type_ext_t>(0x00020021)
+#define ZE_STRUCTURE_TYPE_INTEL_MEDIA_DOORBELL_HANDLE_DESC static_cast<ze_structure_type_ext_t>(0x00020022)
+#define ZE_STRUCTURE_TYPE_INTEL_DEVICE_MEDIA_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00020023)
+#if ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+#define ZE_STRUCTURE_TYPE_COMMAND_LIST_APPEND_PARAM_COOPERATIVE_DESC static_cast<ze_structure_type_ext_t>(0x00020036)
+#define ZE_STRUCTURE_TYPE_EXTERNAL_MEMMAP_SYSMEM_EXT_DESC static_cast<ze_structure_type_ext_t>(0x00020037)
+#endif // ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
+#define ZEX_STRUCTURE_TYPE_LABEL_DESCRIPTOR static_cast<ze_structure_type_ext_t>(0x00030002)
+#define ZEX_STRUCTURE_TYPE_OPERAND_DESCRIPTOR static_cast<ze_structure_type_ext_t>(0x00030003)
+#define ZEX_STRUCTURE_TYPE_VARIABLE_DESCRIPTOR static_cast<ze_structure_type_ext_t>(0x00030004)
+#define ZEX_STRUCTURE_TYPE_TEMP_VARIABLE_DESCRIPTOR static_cast<ze_structure_type_ext_t>(0x00030005)
+#define ZEX_STRUCTURE_TYPE_VARIABLE_INFO static_cast<ze_structure_type_ext_t>(0x00030006)
+#define ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00030007)
+#define ZEX_STRUCTURE_DEVICE_MODULE_REGISTER_FILE_EXP static_cast<ze_structure_type_ext_t>(0x00030010)
+#define ZEX_STRUCTURE_KERNEL_REGISTER_FILE_SIZE_EXP static_cast<ze_structure_type_ext_t>(0x00030012)
+#define ZE_STRUCTURE_INTEL_DEVICE_MODULE_DP_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00030013)
+#define ZEX_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC static_cast<ze_structure_type_ext_t>(0x00030016)
+#define ZE_INTEL_STRUCTURE_TYPE_DEVICE_COMMAND_LIST_WAIT_ON_MEMORY_DATA_SIZE_EXP_DESC static_cast<ze_structure_type_ext_t>(0x00030017)
+#define ZEX_INTEL_STRUCTURE_TYPE_QUEUE_ALLOCATE_MSIX_HINT_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00030018)
+#define ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x0003001B)
+#define ZE_STRUCTURE_INTEL_DEVICE_MEMORY_CXL_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00030019)
+#define ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC static_cast<ze_structure_type_ext_t>(0x0003001C)
+#define ZEX_STRUCTURE_COUNTER_BASED_EVENT_EXTERNAL_SYNC_ALLOC_PROPERTIES static_cast<ze_structure_type_ext_t>(0x0003001D)
+#define ZEX_STRUCTURE_COUNTER_BASED_EVENT_EXTERNAL_STORAGE_ALLOC_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00030027)
+#define ZE_STRUCTURE_TYPE_QUEUE_PRIORITY_DESC static_cast<ze_structure_type_ext_t>(0x00030028)
+#ifndef ZE_RECORD_REPLAY_GRAPH_EXP_NAME
+#define ZE_STRUCTURE_TYPE_RECORD_REPLAY_GRAPH_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00030029)
+#endif // ZE_RECORD_REPLAY_GRAPH_EXP_NAME
+#define ZEX_STRUCTURE_TYPE_MEMORY_FREE_CALLBACK_EXT_DESC static_cast<ze_structure_type_ext_t>(0x00030030)
+
+// Metric structure types
+#define ZET_STRUCTURE_TYPE_INTEL_METRIC_SCOPE_PROPERTIES_EXP static_cast<zet_structure_type_ext_t>(0x00010006)
+#define ZET_INTEL_STRUCTURE_TYPE_METRIC_HW_BUFFER_SIZE_EXP_DESC static_cast<zet_structure_type_ext_t>(0x00010007)
+#define ZET_INTEL_STRUCTURE_TYPE_METRIC_GROUP_CALCULATION_EXP_PROPERTIES static_cast<zet_structure_type_ext_t>(0x00010008)
+#define ZET_INTEL_STRUCTURE_TYPE_METRIC_CALCULATION_DESC_EXP static_cast<zet_structure_type_ext_t>(0x00010009)
+#define ZET_INTEL_STRUCTURE_TYPE_METRIC_SOURCE_ID_EXP static_cast<zet_structure_type_ext_t>(0x0001000a)
+#define ZET_INTEL_STRUCTURE_TYPE_METRIC_DECODED_BUFFER_PROPERTIES_EXP static_cast<zet_structure_type_ext_t>(0x0001000b)
+#define ZET_INTEL_STRUCTURE_TYPE_METRIC_CALCULABLE_PROPERTIES_EXP static_cast<zet_structure_type_ext_t>(0x0001000c)
+
+// Sysman structure types
+#define ZES_INTEL_PCI_LINK_SPEED_DOWNGRADE_EXP_STATE static_cast<zes_structure_type_ext_t>(0x00040001)
+#define ZES_INTEL_PCI_LINK_SPEED_DOWNGRADE_EXP_PROPERTIES static_cast<zes_structure_type_ext_t>(0x00040002)
+
+#endif
diff --git a/thirdparty/level_zero/compute-runtime/zex_common.h b/thirdparty/level_zero/compute-runtime/zex_common.h
new file mode 100644
index 00000000000000..7c8f050c38166e
--- /dev/null
+++ b/thirdparty/level_zero/compute-runtime/zex_common.h
@@ -0,0 +1,241 @@
+// intel/compute-runtime 4df478c5139703c82e548a65eafbcc69923953ac
+/*
+ * Copyright (C) 2022-2025 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#ifndef _ZEX_COMMON_H
+#define _ZEX_COMMON_H
+#if defined(__cplusplus)
+#pragma once
+#endif
+#include "ze_stypes.h"
+#include <ze_api.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of command list object
+typedef ze_command_list_handle_t zex_command_list_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Handle of event object
+typedef ze_event_handle_t zex_event_handle_t;
+
+#define ZEX_BIT(_i) (1 << _i)
+
+typedef uint32_t zex_mem_action_scope_flags_t;
+typedef enum _zex_mem_action_scope_flag_t {
+    ZEX_MEM_ACTION_SCOPE_FLAG_SUBDEVICE = ZEX_BIT(0),
+    ZEX_MEM_ACTION_SCOPE_FLAG_DEVICE = ZEX_BIT(1),
+    ZEX_MEM_ACTION_SCOPE_FLAG_HOST = ZEX_BIT(2),
+    ZEX_MEM_ACTION_SCOPE_FLAG_FORCE_UINT32 = 0x7fffffff
+} zex_mem_action_scope_flag_t;
+
+typedef uint32_t zex_wait_on_mem_action_flags_t;
+typedef enum _zex_wait_on_mem_action_flag_t {
+    ZEX_WAIT_ON_MEMORY_FLAG_EQUAL = ZEX_BIT(0),
+    ZEX_WAIT_ON_MEMORY_FLAG_NOT_EQUAL = ZEX_BIT(1),
+    ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN = ZEX_BIT(2),
+    ZEX_WAIT_ON_MEMORY_FLAG_GREATER_THAN_EQUAL = ZEX_BIT(3),
+    ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN = ZEX_BIT(4),
+    ZEX_WAIT_ON_MEMORY_FLAG_LESSER_THAN_EQUAL = ZEX_BIT(5),
+    ZEX_WAIT_ON_MEMORY_FLAG_FORCE_UINT32 = 0x7fffffff
+} zex_wait_on_mem_action_flag_t;
+
+typedef struct _zex_wait_on_mem_desc_t {
+    zex_wait_on_mem_action_flags_t actionFlag;
+    zex_mem_action_scope_flags_t waitScope;
+} zex_wait_on_mem_desc_t;
+
+typedef struct _zex_write_to_mem_desc_t {
+    zex_mem_action_scope_flags_t writeScope;
+} zex_write_to_mem_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef ZE_SYNCHRONIZED_DISPATCH_EXP_NAME
+/// @brief Synchronized Dispatch extension name
+#define ZE_SYNCHRONIZED_DISPATCH_EXP_NAME "ZE_experimental_synchronized_dispatch"
+#endif // ZE_SYNCHRONIZED_DISPATCH_EXP_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Synchronized Dispatch extension version(s)
+typedef enum _ze_synchronized_dispatch_exp_version_t {
+    ZE_SYNCHRONIZED_DISPATCH_EXP_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZE_SYNCHRONIZED_DISPATCH_EXP_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZE_SYNCHRONIZED_DISPATCH_EXP_VERSION_FORCE_UINT32 = 0x7fffffff
+
+} ze_synchronized_dispatch_exp_version_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Supported synchronized dispatch flags
+typedef uint32_t ze_synchronized_dispatch_exp_flags_t;
+typedef enum _ze_synchronized_dispatch_exp_flag_t {
+    ZE_SYNCHRONIZED_DISPATCH_DISABLED_EXP_FLAG = ZE_BIT(0), ///< Non-synchronized dispatch. Must synchronize only with other synchronized dispatches
+    ZE_SYNCHRONIZED_DISPATCH_ENABLED_EXP_FLAG = ZE_BIT(1),  ///< Synchronized dispatch. Must synchronize with all synchronized and non-synchronized dispatches
+    ZE_SYNCHRONIZED_DISPATCH_EXP_FLAG_FORCE_UINT32 = 0x7fffffff
+
+} ze_synchronized_dispatch_exp_flag_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Forward-declare ze_synchronized_dispatch_exp_desc_t
+typedef struct _ze_synchronized_dispatch_exp_desc_t ze_synchronized_dispatch_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Command queue or command list descriptor for synchronized dispatch. This structure may be
+///        passed as pNext member of ::ze_command_queue_desc_t. or ::ze_command_list_desc_t.
+typedef struct _ze_synchronized_dispatch_exp_desc_t {
+    ze_structure_type_ext_t stype;              ///< [in] type of this structure
+    const void *pNext;                          ///< [in][optional] must be null or a pointer to an extension-specific
+                                                ///< structure (i.e. contains stype and pNext).
+    ze_synchronized_dispatch_exp_flags_t flags; ///< [in] mode flags.
+                                                ///< must be valid value of ::ze_synchronized_dispatch_exp_flag_t
+
+} ze_synchronized_dispatch_exp_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Forward-declare ze_intel_media_communication_desc_t
+typedef struct _ze_intel_media_communication_desc_t ze_intel_media_communication_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief ze_intel_media_communication_desc_t
+typedef struct _ze_intel_media_communication_desc_t {
+    ze_structure_type_ext_t stype;          ///< [in] type of this structure
+    void *pNext;                            ///< [in][optional] must be null or a pointer to an extension-specific, this will be used to extend this in future
+    void *controlSharedMemoryBuffer;        ///< [in] control shared memory buffer pointer, must be USM address
+    uint32_t controlSharedMemoryBufferSize; ///< [in] control shared memory buffer size
+    void *controlBatchBuffer;               ///< [in] control batch buffer pointer, must be USM address
+    uint32_t controlBatchBufferSize;        ///< [in] control batch buffer size
+} ze_intel_media_communication_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Forward-declare ze_intel_media_doorbell_handle_desc_t
+typedef struct _ze_intel_media_doorbell_handle_desc_t ze_intel_media_doorbell_handle_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief ze_intel_media_doorbell_handle_desc_t
+/// @details Handle of the doorbell. This structure is passed as argument of zeIntelMediaCommunicationCreate and zeIntelMediaCommunicationDestroy
+typedef struct _ze_intel_media_doorbell_handle_desc_t {
+    ze_structure_type_ext_t stype; ///< [in] type of this structure
+    void *pNext;                   ///< [in][optional] must be null or a pointer to an extension-specific, this will be used to extend this in future
+    void *doorbell;                ///< [in,out] handle of the doorbell
+} ze_intel_media_doorbell_handle_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Supported device media flags
+typedef uint32_t ze_intel_device_media_exp_flags_t;
+typedef enum _ze_intel_device_media_exp_flag_t {
+    ZE_INTEL_DEVICE_MEDIA_SUPPORTS_ENCODING_EXP_FLAG = ZE_BIT(0), ///< Supports encoding
+    ZE_INTEL_DEVICE_MEDIA_SUPPORTS_DECODING_EXP_FLAG = ZE_BIT(1), ///< Supports decoding
+    ZE_INTEL_DEVICE_MEDIA_EXP_FLAG_FORCE_UINT32 = 0x7fffffff
+} ze_intel_device_media_exp_flag_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Forward-declare ze_intel_device_media_exp_properties_t
+typedef struct _ze_intel_device_media_exp_properties_t ze_intel_device_media_exp_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief  May be passed to ze_device_properties_t through pNext.
+typedef struct _ze_intel_device_media_exp_properties_t {
+    ze_structure_type_ext_t stype;           ///< [in] type of this structure
+    const void *pNext;                       ///< [in][optional] must be null or a pointer to an extension-specific
+    ze_intel_device_media_exp_flags_t flags; ///< [out] device media flags
+    uint32_t numEncoderCores;                ///< [out] number of encoder cores
+    uint32_t numDecoderCores;                ///< [out] number of decoder cores
+} ze_intel_device_media_exp_properties_t;
+
+#ifndef ZEX_COUNTER_BASED_EVENT_EXT_NAME
+/// @brief Counter Based Event Extension Name
+#define ZEX_COUNTER_BASED_EVENT_EXT_NAME "ZEX_counter_based_event"
+#endif // ZEX_COUNTER_BASED_EVENT_EXT_NAME
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Counter Based Event Extension Version(s)
+typedef enum _zex_counter_based_event_version_t {
+    ZEX_COUNTER_BASED_EVENT_VERSION_1_0 = ZE_MAKE_VERSION(1, 0),     ///< version 1.0
+    ZEX_COUNTER_BASED_EVENT_VERSION_CURRENT = ZE_MAKE_VERSION(1, 0), ///< latest known version
+    ZEX_COUNTER_BASED_EVENT_VERSION_FORCE_UINT32 = 0x7fffffff
+
+} zex_counter_based_event_version_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief IPC handle to counter based event
+typedef struct _zex_ipc_counter_based_event_handle_t {
+    char data[ZE_MAX_IPC_HANDLE_SIZE]; ///< [out] Opaque data representing an IPC handle
+} zex_ipc_counter_based_event_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Supported event flags for defining counter-based event
+typedef uint32_t zex_counter_based_event_exp_flags_t;
+typedef enum _zex_counter_based_event_exp_flag_t {
+    ZEX_COUNTER_BASED_EVENT_FLAG_IMMEDIATE = ZE_BIT(0),               ///< Counter-based event is used for immediate command lists (default)
+    ZEX_COUNTER_BASED_EVENT_FLAG_NON_IMMEDIATE = ZE_BIT(1),           ///< Counter-based event is used for non-immediate command lists
+    ZEX_COUNTER_BASED_EVENT_FLAG_HOST_VISIBLE = ZE_BIT(2),            ///< Signals and waits are also visible to host
+    ZEX_COUNTER_BASED_EVENT_FLAG_IPC = ZE_BIT(3),                     ///< Event can be shared across processes for waiting
+    ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_TIMESTAMP = ZE_BIT(4),        ///< Event contains kernel timestamps
+    ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_MAPPED_TIMESTAMP = ZE_BIT(5), ///< Event contains kernel timestamps synchronized to host time domain.
+                                                                      ///< Cannot be combined with::ZEX_COUNTER_BASED_EVENT_FLAG_KERNEL_TIMESTAMP
+    ZEX_COUNTER_BASED_EVENT_FLAG_GRAPH_EXTERNAL_EVENT = ZE_BIT(6),    ///< Event when is used in graph record & replay, can be used outside
+                                                                      ///< recorded graph for synchronization (using as wait event or for host synchronization)
+    ZEX_COUNTER_BASED_EVENT_FLAG_FORCE_UINT32 = 0x7fffffff
+
+} zex_counter_based_event_exp_flag_t;
+
+typedef struct _zex_counter_based_event_desc_t {
+    ze_structure_type_ext_t stype;             ///< [in] type of this structure
+    const void *pNext;                         ///< [in][optional] must be null or a pointer to an extension-specific
+    zex_counter_based_event_exp_flags_t flags; ///< [in] counter based event flags.
+                                               ///< Must be 0 (default) or a valid combination of ::zex_counter_based_event_exp_flag_t
+    ze_event_scope_flags_t signalScope;        ///< [in] defines the scope of relevant cache hierarchies to flush on a
+                                               ///< signal action before the event is triggered.
+                                               ///< must be 0 (default) or a valid combination of ::ze_event_scope_flag_t;
+                                               ///< default behavior is synchronization within the command list only, no
+                                               ///< additional cache hierarchies are flushed.
+    ze_event_scope_flags_t waitScope;          ///< [in] defines the scope of relevant cache hierarchies to invalidate on
+                                               ///< a wait action after the event is complete.
+                                               ///< must be 0 (default) or a valid combination of ::ze_event_scope_flag_t;
+                                               ///< default behavior is synchronization within the command list only, no
+                                               ///< additional cache hierarchies are invalidated.
+} zex_counter_based_event_desc_t;
+
+static const zex_counter_based_event_desc_t defaultIntelCounterBasedEventDesc = {
+    ZEX_STRUCTURE_COUNTER_BASED_EVENT_DESC, // stype
+    nullptr,                                // pNext
+    ZEX_COUNTER_BASED_EVENT_FLAG_IMMEDIATE |
+        ZEX_COUNTER_BASED_EVENT_FLAG_NON_IMMEDIATE |
+        ZEX_COUNTER_BASED_EVENT_FLAG_HOST_VISIBLE, // flags
+    ZE_EVENT_SCOPE_FLAG_HOST,                      // signalScope
+    ZE_EVENT_SCOPE_FLAG_DEVICE                     // waitScope
+};
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Initial Counter Based Event synchronization parameters. This structure may be
+///        passed as pNext member of ::zex_counter_based_event_desc_t.
+typedef struct _zex_counter_based_event_external_sync_alloc_properties_t {
+    ze_structure_type_ext_t stype; ///< [in] type of this structure
+    const void *pNext;             ///< [in][optional] must be null or a pointer to an extension-specific
+    uint64_t *deviceAddress;       ///< [in] device address for external synchronization allocation
+    uint64_t *hostAddress;         ///< [in] host address for external synchronization allocation
+    uint64_t completionValue;      ///< [in] completion value for external synchronization allocation
+} zex_counter_based_event_external_sync_alloc_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Initial Counter Based Event synchronization parameters. This structure may be
+///        passed as pNext member of ::zex_counter_based_event_desc_t.
+typedef struct _zex_counter_based_event_external_storage_properties_t {
+    ze_structure_type_ext_t stype; ///< [in] type of this structure
+    const void *pNext;             ///< [in][optional] must be null or a pointer to an extension-specific
+    uint64_t *deviceAddress;       ///< [in] device address that would be updated with atomic_add upon signaling of this event, must be device USM memory
+    uint64_t incrementValue;       ///< [in] value which would by atomically added upon each completion
+    uint64_t completionValue;      ///< [in] final completion value, when value under deviceAddress is equal or greater then this value then event is considered as completed
+} zex_counter_based_event_external_storage_properties_t;
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif // _ZEX_COMMON_EXTENDED_H
diff --git a/thirdparty/level_zero/compute-runtime/zex_event.h b/thirdparty/level_zero/compute-runtime/zex_event.h
new file mode 100644
index 00000000000000..ec38aaeba9bd80
--- /dev/null
+++ b/thirdparty/level_zero/compute-runtime/zex_event.h
@@ -0,0 +1,58 @@
+// intel/compute-runtime 4df478c5139703c82e548a65eafbcc69923953ac
+/*
+ * Copyright (C) 2023-2025 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#ifndef _ZEX_EVENT_H
+#define _ZEX_EVENT_H
+#if defined(__cplusplus)
+#pragma once
+#endif
+
+#include <ze_api.h>
+
+#include "zex_common.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+ZE_APIEXPORT ze_result_t ZE_APICALL
+zexEventGetDeviceAddress(
+    ze_event_handle_t event,
+    uint64_t *completionValue,
+    uint64_t *address);
+
+// deprecated
+ZE_APIEXPORT ze_result_t ZE_APICALL
+zexCounterBasedEventCreate(
+    ze_context_handle_t hContext,
+    ze_device_handle_t hDevice,
+    uint64_t *deviceAddress,
+    uint64_t *hostAddress,
+    uint64_t completionValue,
+    const ze_event_desc_t *desc,
+    ze_event_handle_t *phEvent);
+
+ZE_APIEXPORT ze_result_t ZE_APICALL zexIntelAllocateNetworkInterrupt(ze_context_handle_t hContext, uint32_t &networkInterruptId);
+
+ZE_APIEXPORT ze_result_t ZE_APICALL zexIntelReleaseNetworkInterrupt(ze_context_handle_t hContext, uint32_t networkInterruptId);
+
+ZE_APIEXPORT ze_result_t ZE_APICALL zexCounterBasedEventCreate2(ze_context_handle_t hContext, ze_device_handle_t hDevice, const zex_counter_based_event_desc_t *desc, ze_event_handle_t *phEvent);
+
+ZE_APIEXPORT ze_result_t ZE_APICALL zexCounterBasedEventGetIpcHandle(ze_event_handle_t hEvent, zex_ipc_counter_based_event_handle_t *phIpc);
+
+ZE_APIEXPORT ze_result_t ZE_APICALL zexCounterBasedEventOpenIpcHandle(ze_context_handle_t hContext, zex_ipc_counter_based_event_handle_t hIpc, ze_event_handle_t *phEvent);
+
+ZE_APIEXPORT ze_result_t ZE_APICALL zexCounterBasedEventCloseIpcHandle(ze_event_handle_t hEvent);
+
+ZE_APIEXPORT ze_result_t ZE_APICALL zexDeviceGetAggregatedCopyOffloadIncrementValue(ze_device_handle_t hDevice, uint32_t *incrementValue);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif // _ZEX_EVENT_H