-
Notifications
You must be signed in to change notification settings - Fork 3k
[GPU][WIP] Add L0 runtime support #30789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 63 commits
38f889e
ce58599
d8283b1
f38ae58
8aaec53
51e581b
f8eb99a
44d0a79
c3b1a67
7f79b07
2bbe030
0e70375
35e492d
6b586d3
afe606b
66a9c6c
758c022
02d4a15
40d7e4b
b7c56f5
ab762bf
8f48750
862df7e
96e70c7
fedec68
8725b28
4fa5c19
933e262
3993c80
078d3a3
08c1555
31311c7
e8dce15
ae16177
8a7b566
0e7c363
7b5829f
debf47f
33a1736
37cfa39
8e7c1ae
55cb68e
4893a28
3007f7b
ddbe71b
7c01937
707a238
7b2d8ae
cba0d29
2f855da
9ba3c9c
d0f157c
4ec431e
3e6b368
cfdbb02
04b086a
6c6664d
a3eac10
e6480ac
c76cdb2
6473aa0
93bb04b
c621131
fadca80
8c9e855
d377c45
ca7f6cf
674556c
863ea10
0cdb6f5
86c81b4
8621d68
8d0b5bf
d64e6b6
5ccd52c
786fbb6
4690578
206a580
d71ae12
0a48846
ae5afd8
d964b51
19ddbe9
85972fb
8e1bdb9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| # Copyright (C) 2024 Intel Corporation | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
|
|
||
| function(ov_gpu_set_runtime_interface_for TARGET_NAME) | ||
| if(GPU_RT_TYPE STREQUAL "L0") | ||
| target_compile_definitions(${TARGET_NAME} PRIVATE OV_GPU_WITH_ZE_RT=1) | ||
| target_link_libraries(${TARGET_NAME} PRIVATE LevelZero::LevelZero) | ||
| elseif(GPU_RT_TYPE STREQUAL "OCL") | ||
| target_compile_definitions(${TARGET_NAME} PRIVATE OV_GPU_WITH_OCL_RT=1) | ||
| target_link_libraries(${TARGET_NAME} PRIVATE OpenCL::OpenCL) | ||
| else() | ||
| message(FATAL_ERROR "Invalid GPU runtime type: `${GPU_RT_TYPE}` Only `L0` and `OCL` are supported") | ||
| endif() | ||
| endfunction() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,13 +56,13 @@ struct gfx_version { | |
| <= std::tie(r.major, r.minor, r.revision); // same order | ||
| } | ||
|
|
||
| bool operator==(const gfx_version& other) { | ||
| bool operator==(const gfx_version& other) const { | ||
| return major == other.major && | ||
| minor == other.minor && | ||
| revision == other.revision; | ||
| } | ||
|
|
||
| bool operator!=(const gfx_version& other) { | ||
| bool operator!=(const gfx_version& other) const { | ||
| return !(*this == other); | ||
| } | ||
| }; | ||
|
|
@@ -73,14 +73,14 @@ struct pci_bus_info { | |
| uint32_t pci_device = 0; | ||
| uint32_t pci_function = 0; | ||
|
|
||
| bool operator==(const pci_bus_info& other) { | ||
| bool operator==(const pci_bus_info& other) const { | ||
| return pci_domain == other.pci_domain && | ||
| pci_bus == other.pci_bus && | ||
| pci_device == other.pci_device && | ||
| pci_function == other.pci_function; | ||
| } | ||
|
|
||
| bool operator!=(const pci_bus_info& other) { | ||
| bool operator!=(const pci_bus_info& other) const { | ||
| return !(*this == other); | ||
| } | ||
| }; | ||
|
|
@@ -116,9 +116,14 @@ struct device_info { | |
| bool supports_imad; ///< Does engine support int8 mad. | ||
| bool supports_immad; ///< Does engine support int8 multi mad. | ||
|
|
||
| bool supports_mutable_command_list; ///< [L0] Does the target runtime/device support mutable command list feature | ||
|
|
||
| bool supports_usm; ///< Does engine support unified shared memory. | ||
| bool has_separate_cache; ///< Does the target hardware has separate cache for usm_device and usm_host | ||
|
|
||
| bool supports_cp_offload; ///< [L0] Does the command queue support copy offload | ||
| bool supports_cb_events; ///< [L0] Does the target runtime support counter based events | ||
|
||
|
|
||
| std::vector<size_t> supported_simd_sizes; ///< List of SIMD sizes supported by current device and compiler | ||
|
|
||
| uint32_t vendor_id; ///< Vendor ID | ||
|
|
@@ -140,8 +145,45 @@ struct device_info { | |
|
|
||
| pci_bus_info pci_info; ///< PCI bus information for the device | ||
|
|
||
| uint64_t timer_resolution; ///< [L0] Resolution of device timer used for profiling in cycles/sec | ||
| uint32_t kernel_timestamp_valid_bits; ///< [L0] Number of valid bits in the kernel timestamp values | ||
| uint32_t compute_queue_group_ordinal; ///< [L0] Ordinal of the command queue group to use for compute | ||
| uint32_t device_memory_ordinal; ///< [L0] Ordinal of the selected global device memory | ||
|
|
||
| ov::device::UUID uuid; ///< UUID of the gpu device | ||
| ov::device::LUID luid; ///< LUID of the gpu device | ||
|
|
||
| inline bool is_same_device(const device_info &other) const { | ||
| // Relying solely on the UUID is not reliable in all the cases (particularly on legacy platforms), | ||
| // where the UUID may be missing or incorrectly generated | ||
| // Therefore, we also validate other attributes | ||
| if (uuid.uuid != other.uuid.uuid) | ||
| return false; | ||
|
|
||
| if (pci_info != other.pci_info) | ||
| return false; | ||
|
|
||
| if (sub_device_idx != other.sub_device_idx) | ||
| return false; | ||
|
|
||
| if (vendor_id != other.vendor_id || | ||
| dev_name != other.dev_name || | ||
| driver_version != other.driver_version) | ||
| return false; | ||
|
|
||
| if (dev_type != other.dev_type || | ||
| gfx_ver != other.gfx_ver || | ||
| arch != other.arch) | ||
| return false; | ||
|
|
||
| if (ip_version != other.ip_version || device_id != other.device_id) | ||
| return false; | ||
|
|
||
| if (execution_units_count != other.execution_units_count || max_global_mem_size != other.max_global_mem_size) | ||
| return false; | ||
|
|
||
| return true; | ||
| } | ||
| }; | ||
|
|
||
| /// @} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
| #include <utility> | ||
| #include <utility> | ||
| #include <functional> | ||
| #include <optional> | ||
|
||
|
|
||
| namespace cldnn { | ||
| struct user_event; | ||
|
|
@@ -32,6 +33,15 @@ struct event { | |
| _profiling_captured = false; | ||
| _profiling_info.clear(); | ||
| } | ||
| // Set event profiling data instead of retrieving it from event object | ||
| void set_profiling(uint64_t duration_nsec) { | ||
|
||
| auto stage = instrumentation::profiling_stage::executing; | ||
| auto duration = std::chrono::nanoseconds(duration_nsec); | ||
| auto period = std::make_shared<instrumentation::profiling_period_basic>(duration); | ||
|
|
||
| _profiling_info.push_back({ stage, period }); | ||
| _profiling_captured = true; | ||
| } | ||
|
|
||
| // returns true if handler has been successfully added | ||
| bool add_event_handler(event_handler handler, void* data); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for the record: I guess we will have single copy of onednn_gpu. Please update that before it is merged.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to wait for uxlfoundation/oneDNN#4499 to be merged first.